NVPTXAsmPrinter.cpp revision 93cf0939f95b3d580d9c05375a7c84164e1ba72e
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a printer that converts from our internal representation 11// of machine-dependent LLVM code to NVPTX assembly language. 12// 13//===----------------------------------------------------------------------===// 14 15#include "NVPTXAsmPrinter.h" 16#include "MCTargetDesc/NVPTXMCAsmInfo.h" 17#include "NVPTX.h" 18#include "NVPTXInstrInfo.h" 19#include "NVPTXMCExpr.h" 20#include "NVPTXRegisterInfo.h" 21#include "NVPTXTargetMachine.h" 22#include "NVPTXUtilities.h" 23#include "InstPrinter/NVPTXInstPrinter.h" 24#include "cl_common_defines.h" 25#include "llvm/ADT/StringExtras.h" 26#include "llvm/Analysis/ConstantFolding.h" 27#include "llvm/Assembly/Writer.h" 28#include "llvm/CodeGen/Analysis.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineModuleInfo.h" 31#include "llvm/CodeGen/MachineRegisterInfo.h" 32#include "llvm/DebugInfo.h" 33#include "llvm/IR/DerivedTypes.h" 34#include "llvm/IR/Function.h" 35#include "llvm/IR/GlobalVariable.h" 36#include "llvm/IR/Module.h" 37#include "llvm/IR/Operator.h" 38#include "llvm/MC/MCStreamer.h" 39#include "llvm/MC/MCSymbol.h" 40#include "llvm/Support/CommandLine.h" 41#include "llvm/Support/ErrorHandling.h" 42#include "llvm/Support/FormattedStream.h" 43#include "llvm/Support/Path.h" 44#include "llvm/Support/TargetRegistry.h" 45#include "llvm/Support/TimeValue.h" 46#include "llvm/Target/Mangler.h" 47#include "llvm/Target/TargetLoweringObjectFile.h" 48#include <sstream> 49using namespace llvm; 50 51#define DEPOTNAME "__local_depot" 52 53static cl::opt<bool> 54EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden, 55 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 56 cl::init(true)); 57 58static cl::opt<bool> 59InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden, 60 cl::desc("NVPTX Specific: Emit source line in ptx file"), 61 cl::init(false)); 62 63namespace { 64/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 65/// depends. 66void DiscoverDependentGlobals(const Value *V, 67 DenseSet<const GlobalVariable *> &Globals) { 68 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 69 Globals.insert(GV); 70 else { 71 if (const User *U = dyn_cast<User>(V)) { 72 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 73 DiscoverDependentGlobals(U->getOperand(i), Globals); 74 } 75 } 76 } 77} 78 79/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 80/// instances to be emitted, but only after any dependents have been added 81/// first. 82void VisitGlobalVariableForEmission( 83 const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order, 84 DenseSet<const GlobalVariable *> &Visited, 85 DenseSet<const GlobalVariable *> &Visiting) { 86 // Have we already visited this one? 87 if (Visited.count(GV)) 88 return; 89 90 // Do we have a circular dependency? 91 if (Visiting.count(GV)) 92 report_fatal_error("Circular dependency found in global variable set"); 93 94 // Start visiting this global 95 Visiting.insert(GV); 96 97 // Make sure we visit all dependents first 98 DenseSet<const GlobalVariable *> Others; 99 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 100 DiscoverDependentGlobals(GV->getOperand(i), Others); 101 102 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 103 E = Others.end(); 104 I != E; ++I) 105 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 106 107 // Now we can visit ourself 108 Order.push_back(GV); 109 Visited.insert(GV); 110 Visiting.erase(GV); 111} 112} 113 114// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we 115// cannot just link to the existing version. 116/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. 117/// 118using namespace nvptx; 119const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { 120 MCContext &Ctx = AP.OutContext; 121 122 if (CV->isNullValue() || isa<UndefValue>(CV)) 123 return MCConstantExpr::Create(0, Ctx); 124 125 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 126 return MCConstantExpr::Create(CI->getZExtValue(), Ctx); 127 128 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) 129 return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); 130 131 if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) 132 return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); 133 134 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 135 if (CE == 0) 136 llvm_unreachable("Unknown constant value to lower!"); 137 138 switch (CE->getOpcode()) { 139 default: 140 // If the code isn't optimized, there may be outstanding folding 141 // opportunities. Attempt to fold the expression using DataLayout as a 142 // last resort before giving up. 143 if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) 144 if (C != CE) 145 return LowerConstant(C, AP); 146 147 // Otherwise report the problem to the user. 148 { 149 std::string S; 150 raw_string_ostream OS(S); 151 OS << "Unsupported expression in static initializer: "; 152 WriteAsOperand(OS, CE, /*PrintType=*/ false, 153 !AP.MF ? 0 : AP.MF->getFunction()->getParent()); 154 report_fatal_error(OS.str()); 155 } 156 case Instruction::GetElementPtr: { 157 const DataLayout &TD = *AP.TM.getDataLayout(); 158 // Generate a symbolic expression for the byte address 159 APInt OffsetAI(TD.getPointerSizeInBits(), 0); 160 cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI); 161 162 const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); 163 if (!OffsetAI) 164 return Base; 165 166 int64_t Offset = OffsetAI.getSExtValue(); 167 return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), 168 Ctx); 169 } 170 171 case Instruction::Trunc: 172 // We emit the value and depend on the assembler to truncate the generated 173 // expression properly. This is important for differences between 174 // blockaddress labels. Since the two labels are in the same function, it 175 // is reasonable to treat their delta as a 32-bit value. 176 // FALL THROUGH. 177 case Instruction::BitCast: 178 return LowerConstant(CE->getOperand(0), AP); 179 180 case Instruction::IntToPtr: { 181 const DataLayout &TD = *AP.TM.getDataLayout(); 182 // Handle casts to pointers by changing them into casts to the appropriate 183 // integer type. This promotes constant folding and simplifies this code. 184 Constant *Op = CE->getOperand(0); 185 Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), 186 false /*ZExt*/); 187 return LowerConstant(Op, AP); 188 } 189 190 case Instruction::PtrToInt: { 191 const DataLayout &TD = *AP.TM.getDataLayout(); 192 // Support only foldable casts to/from pointers that can be eliminated by 193 // changing the pointer to the appropriately sized integer type. 194 Constant *Op = CE->getOperand(0); 195 Type *Ty = CE->getType(); 196 197 const MCExpr *OpExpr = LowerConstant(Op, AP); 198 199 // We can emit the pointer value into this slot if the slot is an 200 // integer slot equal to the size of the pointer. 201 if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) 202 return OpExpr; 203 204 // Otherwise the pointer is smaller than the resultant integer, mask off 205 // the high bits so we are sure to get a proper truncation if the input is 206 // a constant expr. 207 unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); 208 const MCExpr *MaskExpr = 209 MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); 210 return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); 211 } 212 213 // The MC library also has a right-shift operator, but it isn't consistently 214 // signed or unsigned between different targets. 215 case Instruction::Add: 216 case Instruction::Sub: 217 case Instruction::Mul: 218 case Instruction::SDiv: 219 case Instruction::SRem: 220 case Instruction::Shl: 221 case Instruction::And: 222 case Instruction::Or: 223 case Instruction::Xor: { 224 const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); 225 const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); 226 switch (CE->getOpcode()) { 227 default: 228 llvm_unreachable("Unknown binary operator constant cast expr"); 229 case Instruction::Add: 230 return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); 231 case Instruction::Sub: 232 return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); 233 case Instruction::Mul: 234 return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); 235 case Instruction::SDiv: 236 return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); 237 case Instruction::SRem: 238 return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); 239 case Instruction::Shl: 240 return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); 241 case Instruction::And: 242 return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); 243 case Instruction::Or: 244 return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); 245 case Instruction::Xor: 246 return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); 247 } 248 } 249 } 250} 251 252void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 253 if (!EmitLineNumbers) 254 return; 255 if (ignoreLoc(MI)) 256 return; 257 258 DebugLoc curLoc = MI.getDebugLoc(); 259 260 if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) 261 return; 262 263 if (prevDebugLoc == curLoc) 264 return; 265 266 prevDebugLoc = curLoc; 267 268 if (curLoc.isUnknown()) 269 return; 270 271 const MachineFunction *MF = MI.getParent()->getParent(); 272 //const TargetMachine &TM = MF->getTarget(); 273 274 const LLVMContext &ctx = MF->getFunction()->getContext(); 275 DIScope Scope(curLoc.getScope(ctx)); 276 277 assert((!Scope || Scope.isScope()) && 278 "Scope of a DebugLoc should be null or a DIScope."); 279 if (!Scope) 280 return; 281 282 StringRef fileName(Scope.getFilename()); 283 StringRef dirName(Scope.getDirectory()); 284 SmallString<128> FullPathName = dirName; 285 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 286 sys::path::append(FullPathName, fileName); 287 fileName = FullPathName.str(); 288 } 289 290 if (filenameMap.find(fileName.str()) == filenameMap.end()) 291 return; 292 293 // Emit the line from the source file. 294 if (InterleaveSrc) 295 this->emitSrcInText(fileName.str(), curLoc.getLine()); 296 297 std::stringstream temp; 298 temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() 299 << " " << curLoc.getCol(); 300 OutStreamer.EmitRawText(Twine(temp.str().c_str())); 301} 302 303void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 304 SmallString<128> Str; 305 raw_svector_ostream OS(Str); 306 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 307 emitLineNumberAsDotLoc(*MI); 308 309 MCInst Inst; 310 lowerToMCInst(MI, Inst); 311 OutStreamer.EmitInstruction(Inst); 312} 313 314void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 315 OutMI.setOpcode(MI->getOpcode()); 316 317 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 318 const MachineOperand &MO = MI->getOperand(i); 319 320 MCOperand MCOp; 321 if (lowerOperand(MO, MCOp)) 322 OutMI.addOperand(MCOp); 323 } 324} 325 326bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 327 MCOperand &MCOp) { 328 switch (MO.getType()) { 329 default: llvm_unreachable("unknown operand type"); 330 case MachineOperand::MO_Register: 331 MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg())); 332 break; 333 case MachineOperand::MO_Immediate: 334 MCOp = MCOperand::CreateImm(MO.getImm()); 335 break; 336 case MachineOperand::MO_MachineBasicBlock: 337 MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( 338 MO.getMBB()->getSymbol(), OutContext)); 339 break; 340 case MachineOperand::MO_ExternalSymbol: 341 MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); 342 break; 343 case MachineOperand::MO_GlobalAddress: 344 MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal())); 345 break; 346 case MachineOperand::MO_FPImmediate: { 347 const ConstantFP *Cnt = MO.getFPImm(); 348 APFloat Val = Cnt->getValueAPF(); 349 350 switch (Cnt->getType()->getTypeID()) { 351 default: report_fatal_error("Unsupported FP type"); break; 352 case Type::FloatTyID: 353 MCOp = MCOperand::CreateExpr( 354 NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext)); 355 break; 356 case Type::DoubleTyID: 357 MCOp = MCOperand::CreateExpr( 358 NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext)); 359 break; 360 } 361 break; 362 } 363 } 364 return true; 365} 366 367unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 368 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 369 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 370 371 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 372 unsigned RegNum = RegMap[Reg]; 373 374 // Encode the register class in the upper 4 bits 375 // Must be kept in sync with NVPTXInstPrinter::printRegName 376 unsigned Ret = 0; 377 if (RC == &NVPTX::Int1RegsRegClass) { 378 Ret = (1 << 28); 379 } else if (RC == &NVPTX::Int16RegsRegClass) { 380 Ret = (2 << 28); 381 } else if (RC == &NVPTX::Int32RegsRegClass) { 382 Ret = (3 << 28); 383 } else if (RC == &NVPTX::Int64RegsRegClass) { 384 Ret = (4 << 28); 385 } else if (RC == &NVPTX::Float32RegsRegClass) { 386 Ret = (5 << 28); 387 } else if (RC == &NVPTX::Float64RegsRegClass) { 388 Ret = (6 << 28); 389 } else { 390 report_fatal_error("Bad register class"); 391 } 392 393 // Insert the vreg number 394 Ret |= (RegNum & 0x0FFFFFFF); 395 return Ret; 396 } else { 397 // Some special-use registers are actually physical registers. 398 // Encode this as the register class ID of 0 and the real register ID. 399 return Reg & 0x0FFFFFFF; 400 } 401} 402 403MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, 404 const MCSymbol *Symbol) { 405 const MCExpr *Expr; 406 Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, 407 OutContext); 408 return MCOperand::CreateExpr(Expr); 409} 410 411void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 412 const DataLayout *TD = TM.getDataLayout(); 413 const TargetLowering *TLI = TM.getTargetLowering(); 414 415 Type *Ty = F->getReturnType(); 416 417 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 418 419 if (Ty->getTypeID() == Type::VoidTyID) 420 return; 421 422 O << " ("; 423 424 if (isABI) { 425 if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { 426 unsigned size = 0; 427 if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { 428 size = ITy->getBitWidth(); 429 if (size < 32) 430 size = 32; 431 } else { 432 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 433 size = Ty->getPrimitiveSizeInBits(); 434 } 435 436 O << ".param .b" << size << " func_retval0"; 437 } else if (isa<PointerType>(Ty)) { 438 O << ".param .b" << TLI->getPointerTy().getSizeInBits() 439 << " func_retval0"; 440 } else { 441 if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { 442 SmallVector<EVT, 16> vtparts; 443 ComputeValueVTs(*TLI, Ty, vtparts); 444 unsigned totalsz = 0; 445 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 446 unsigned elems = 1; 447 EVT elemtype = vtparts[i]; 448 if (vtparts[i].isVector()) { 449 elems = vtparts[i].getVectorNumElements(); 450 elemtype = vtparts[i].getVectorElementType(); 451 } 452 for (unsigned j = 0, je = elems; j != je; ++j) { 453 unsigned sz = elemtype.getSizeInBits(); 454 if (elemtype.isInteger() && (sz < 8)) 455 sz = 8; 456 totalsz += sz / 8; 457 } 458 } 459 unsigned retAlignment = 0; 460 if (!llvm::getAlign(*F, 0, retAlignment)) 461 retAlignment = TD->getABITypeAlignment(Ty); 462 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 463 << "]"; 464 } else 465 assert(false && "Unknown return type"); 466 } 467 } else { 468 SmallVector<EVT, 16> vtparts; 469 ComputeValueVTs(*TLI, Ty, vtparts); 470 unsigned idx = 0; 471 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 472 unsigned elems = 1; 473 EVT elemtype = vtparts[i]; 474 if (vtparts[i].isVector()) { 475 elems = vtparts[i].getVectorNumElements(); 476 elemtype = vtparts[i].getVectorElementType(); 477 } 478 479 for (unsigned j = 0, je = elems; j != je; ++j) { 480 unsigned sz = elemtype.getSizeInBits(); 481 if (elemtype.isInteger() && (sz < 32)) 482 sz = 32; 483 O << ".reg .b" << sz << " func_retval" << idx; 484 if (j < je - 1) 485 O << ", "; 486 ++idx; 487 } 488 if (i < e - 1) 489 O << ", "; 490 } 491 } 492 O << ") "; 493 return; 494} 495 496void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 497 raw_ostream &O) { 498 const Function *F = MF.getFunction(); 499 printReturnValStr(F, O); 500} 501 502void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 503 SmallString<128> Str; 504 raw_svector_ostream O(Str); 505 506 if (!GlobalsEmitted) { 507 emitGlobals(*MF->getFunction()->getParent()); 508 GlobalsEmitted = true; 509 } 510 511 // Set up 512 MRI = &MF->getRegInfo(); 513 F = MF->getFunction(); 514 emitLinkageDirective(F, O); 515 if (llvm::isKernelFunction(*F)) 516 O << ".entry "; 517 else { 518 O << ".func "; 519 printReturnValStr(*MF, O); 520 } 521 522 O << *CurrentFnSym; 523 524 emitFunctionParamList(*MF, O); 525 526 if (llvm::isKernelFunction(*F)) 527 emitKernelFunctionDirectives(*F, O); 528 529 OutStreamer.EmitRawText(O.str()); 530 531 prevDebugLoc = DebugLoc(); 532} 533 534void NVPTXAsmPrinter::EmitFunctionBodyStart() { 535 VRegMapping.clear(); 536 OutStreamer.EmitRawText(StringRef("{\n")); 537 setAndEmitFunctionVirtualRegisters(*MF); 538 539 SmallString<128> Str; 540 raw_svector_ostream O(Str); 541 emitDemotedVars(MF->getFunction(), O); 542 OutStreamer.EmitRawText(O.str()); 543} 544 545void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 546 OutStreamer.EmitRawText(StringRef("}\n")); 547 VRegMapping.clear(); 548} 549 550void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { 551 unsigned RegNo = MI->getOperand(0).getReg(); 552 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 553 if (TRI->isVirtualRegister(RegNo)) { 554 OutStreamer.AddComment(Twine("implicit-def: ") + 555 getVirtualRegisterName(RegNo)); 556 } else { 557 OutStreamer.AddComment(Twine("implicit-def: ") + 558 TM.getRegisterInfo()->getName(RegNo)); 559 } 560 OutStreamer.AddBlankLine(); 561} 562 563void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 564 raw_ostream &O) const { 565 // If the NVVM IR has some of reqntid* specified, then output 566 // the reqntid directive, and set the unspecified ones to 1. 567 // If none of reqntid* is specified, don't output reqntid directive. 568 unsigned reqntidx, reqntidy, reqntidz; 569 bool specified = false; 570 if (llvm::getReqNTIDx(F, reqntidx) == false) 571 reqntidx = 1; 572 else 573 specified = true; 574 if (llvm::getReqNTIDy(F, reqntidy) == false) 575 reqntidy = 1; 576 else 577 specified = true; 578 if (llvm::getReqNTIDz(F, reqntidz) == false) 579 reqntidz = 1; 580 else 581 specified = true; 582 583 if (specified) 584 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 585 << "\n"; 586 587 // If the NVVM IR has some of maxntid* specified, then output 588 // the maxntid directive, and set the unspecified ones to 1. 589 // If none of maxntid* is specified, don't output maxntid directive. 590 unsigned maxntidx, maxntidy, maxntidz; 591 specified = false; 592 if (llvm::getMaxNTIDx(F, maxntidx) == false) 593 maxntidx = 1; 594 else 595 specified = true; 596 if (llvm::getMaxNTIDy(F, maxntidy) == false) 597 maxntidy = 1; 598 else 599 specified = true; 600 if (llvm::getMaxNTIDz(F, maxntidz) == false) 601 maxntidz = 1; 602 else 603 specified = true; 604 605 if (specified) 606 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 607 << "\n"; 608 609 unsigned mincta; 610 if (llvm::getMinCTASm(F, mincta)) 611 O << ".minnctapersm " << mincta << "\n"; 612} 613 614std::string 615NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { 616 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 617 618 std::string Name; 619 raw_string_ostream NameStr(Name); 620 621 VRegRCMap::const_iterator I = VRegMapping.find(RC); 622 assert(I != VRegMapping.end() && "Bad register class"); 623 const DenseMap<unsigned, unsigned> &RegMap = I->second; 624 625 VRegMap::const_iterator VI = RegMap.find(Reg); 626 assert(VI != RegMap.end() && "Bad virtual register"); 627 unsigned MappedVR = VI->second; 628 629 NameStr << getNVPTXRegClassStr(RC) << MappedVR; 630 631 NameStr.flush(); 632 return Name; 633} 634 635void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, 636 raw_ostream &O) { 637 O << getVirtualRegisterName(vr); 638} 639 640void NVPTXAsmPrinter::printVecModifiedImmediate( 641 const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 642 static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 643 int Imm = (int) MO.getImm(); 644 if (0 == strcmp(Modifier, "vecelem")) 645 O << "_" << vecelem[Imm]; 646 else if (0 == strcmp(Modifier, "vecv4comm1")) { 647 if ((Imm < 0) || (Imm > 3)) 648 O << "//"; 649 } else if (0 == strcmp(Modifier, "vecv4comm2")) { 650 if ((Imm < 4) || (Imm > 7)) 651 O << "//"; 652 } else if (0 == strcmp(Modifier, "vecv4pos")) { 653 if (Imm < 0) 654 Imm = 0; 655 O << "_" << vecelem[Imm % 4]; 656 } else if (0 == strcmp(Modifier, "vecv2comm1")) { 657 if ((Imm < 0) || (Imm > 1)) 658 O << "//"; 659 } else if (0 == strcmp(Modifier, "vecv2comm2")) { 660 if ((Imm < 2) || (Imm > 3)) 661 O << "//"; 662 } else if (0 == strcmp(Modifier, "vecv2pos")) { 663 if (Imm < 0) 664 Imm = 0; 665 O << "_" << vecelem[Imm % 2]; 666 } else 667 llvm_unreachable("Unknown Modifier on immediate operand"); 668} 669 670 671 672void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 673 674 emitLinkageDirective(F, O); 675 if (llvm::isKernelFunction(*F)) 676 O << ".entry "; 677 else 678 O << ".func "; 679 printReturnValStr(F, O); 680 O << *getSymbol(F) << "\n"; 681 emitFunctionParamList(F, O); 682 O << ";\n"; 683} 684 685static bool usedInGlobalVarDef(const Constant *C) { 686 if (!C) 687 return false; 688 689 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 690 if (GV->getName().str() == "llvm.used") 691 return false; 692 return true; 693 } 694 695 for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 696 ui != ue; ++ui) { 697 const Constant *C = dyn_cast<Constant>(*ui); 698 if (usedInGlobalVarDef(C)) 699 return true; 700 } 701 return false; 702} 703 704static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 705 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 706 if (othergv->getName().str() == "llvm.used") 707 return true; 708 } 709 710 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 711 if (instr->getParent() && instr->getParent()->getParent()) { 712 const Function *curFunc = instr->getParent()->getParent(); 713 if (oneFunc && (curFunc != oneFunc)) 714 return false; 715 oneFunc = curFunc; 716 return true; 717 } else 718 return false; 719 } 720 721 if (const MDNode *md = dyn_cast<MDNode>(U)) 722 if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || 723 (md->getName().str() == "llvm.dbg.sp"))) 724 return true; 725 726 for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end(); 727 ui != ue; ++ui) { 728 if (usedInOneFunc(*ui, oneFunc) == false) 729 return false; 730 } 731 return true; 732} 733 734/* Find out if a global variable can be demoted to local scope. 735 * Currently, this is valid for CUDA shared variables, which have local 736 * scope and global lifetime. So the conditions to check are : 737 * 1. Is the global variable in shared address space? 738 * 2. Does it have internal linkage? 739 * 3. Is the global variable referenced only in one function? 740 */ 741static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 742 if (gv->hasInternalLinkage() == false) 743 return false; 744 const PointerType *Pty = gv->getType(); 745 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 746 return false; 747 748 const Function *oneFunc = 0; 749 750 bool flag = usedInOneFunc(gv, oneFunc); 751 if (flag == false) 752 return false; 753 if (!oneFunc) 754 return false; 755 f = oneFunc; 756 return true; 757} 758 759static bool useFuncSeen(const Constant *C, 760 llvm::DenseMap<const Function *, bool> &seenMap) { 761 for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); 762 ui != ue; ++ui) { 763 if (const Constant *cu = dyn_cast<Constant>(*ui)) { 764 if (useFuncSeen(cu, seenMap)) 765 return true; 766 } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { 767 const BasicBlock *bb = I->getParent(); 768 if (!bb) 769 continue; 770 const Function *caller = bb->getParent(); 771 if (!caller) 772 continue; 773 if (seenMap.find(caller) != seenMap.end()) 774 return true; 775 } 776 } 777 return false; 778} 779 780void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 781 llvm::DenseMap<const Function *, bool> seenMap; 782 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 783 const Function *F = FI; 784 785 if (F->isDeclaration()) { 786 if (F->use_empty()) 787 continue; 788 if (F->getIntrinsicID()) 789 continue; 790 emitDeclaration(F, O); 791 continue; 792 } 793 for (Value::const_use_iterator iter = F->use_begin(), 794 iterEnd = F->use_end(); 795 iter != iterEnd; ++iter) { 796 if (const Constant *C = dyn_cast<Constant>(*iter)) { 797 if (usedInGlobalVarDef(C)) { 798 // The use is in the initialization of a global variable 799 // that is a function pointer, so print a declaration 800 // for the original function 801 emitDeclaration(F, O); 802 break; 803 } 804 // Emit a declaration of this function if the function that 805 // uses this constant expr has already been seen. 806 if (useFuncSeen(C, seenMap)) { 807 emitDeclaration(F, O); 808 break; 809 } 810 } 811 812 if (!isa<Instruction>(*iter)) 813 continue; 814 const Instruction *instr = cast<Instruction>(*iter); 815 const BasicBlock *bb = instr->getParent(); 816 if (!bb) 817 continue; 818 const Function *caller = bb->getParent(); 819 if (!caller) 820 continue; 821 822 // If a caller has already been seen, then the caller is 823 // appearing in the module before the callee. so print out 824 // a declaration for the callee. 825 if (seenMap.find(caller) != seenMap.end()) { 826 emitDeclaration(F, O); 827 break; 828 } 829 } 830 seenMap[F] = true; 831 } 832} 833 834void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 835 DebugInfoFinder DbgFinder; 836 DbgFinder.processModule(M); 837 838 unsigned i = 1; 839 for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), 840 E = DbgFinder.compile_unit_end(); 841 I != E; ++I) { 842 DICompileUnit DIUnit(*I); 843 StringRef Filename(DIUnit.getFilename()); 844 StringRef Dirname(DIUnit.getDirectory()); 845 SmallString<128> FullPathName = Dirname; 846 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 847 sys::path::append(FullPathName, Filename); 848 Filename = FullPathName.str(); 849 } 850 if (filenameMap.find(Filename.str()) != filenameMap.end()) 851 continue; 852 filenameMap[Filename.str()] = i; 853 OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); 854 ++i; 855 } 856 857 for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), 858 E = DbgFinder.subprogram_end(); 859 I != E; ++I) { 860 DISubprogram SP(*I); 861 StringRef Filename(SP.getFilename()); 862 StringRef Dirname(SP.getDirectory()); 863 SmallString<128> FullPathName = Dirname; 864 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 865 sys::path::append(FullPathName, Filename); 866 Filename = FullPathName.str(); 867 } 868 if (filenameMap.find(Filename.str()) != filenameMap.end()) 869 continue; 870 filenameMap[Filename.str()] = i; 871 ++i; 872 } 873} 874 875bool NVPTXAsmPrinter::doInitialization(Module &M) { 876 877 SmallString<128> Str1; 878 raw_svector_ostream OS1(Str1); 879 880 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 881 MMI->AnalyzeModule(M); 882 883 // We need to call the parent's one explicitly. 884 //bool Result = AsmPrinter::doInitialization(M); 885 886 // Initialize TargetLoweringObjectFile. 887 const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 888 .Initialize(OutContext, TM); 889 890 Mang = new Mangler(&TM); 891 892 // Emit header before any dwarf directives are emitted below. 893 emitHeader(M, OS1); 894 OutStreamer.EmitRawText(OS1.str()); 895 896 // Already commented out 897 //bool Result = AsmPrinter::doInitialization(M); 898 899 // Emit module-level inline asm if it exists. 900 if (!M.getModuleInlineAsm().empty()) { 901 OutStreamer.AddComment("Start of file scope inline assembly"); 902 OutStreamer.AddBlankLine(); 903 OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm())); 904 OutStreamer.AddBlankLine(); 905 OutStreamer.AddComment("End of file scope inline assembly"); 906 OutStreamer.AddBlankLine(); 907 } 908 909 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 910 recordAndEmitFilenames(M); 911 912 GlobalsEmitted = false; 913 914 return false; // success 915} 916 917void NVPTXAsmPrinter::emitGlobals(const Module &M) { 918 SmallString<128> Str2; 919 raw_svector_ostream OS2(Str2); 920 921 emitDeclarations(M, OS2); 922 923 // As ptxas does not support forward references of globals, we need to first 924 // sort the list of module-level globals in def-use order. We visit each 925 // global variable in order, and ensure that we emit it *after* its dependent 926 // globals. We use a little extra memory maintaining both a set and a list to 927 // have fast searches while maintaining a strict ordering. 928 SmallVector<const GlobalVariable *, 8> Globals; 929 DenseSet<const GlobalVariable *> GVVisited; 930 DenseSet<const GlobalVariable *> GVVisiting; 931 932 // Visit each global variable, in order 933 for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); 934 I != E; ++I) 935 VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); 936 937 assert(GVVisited.size() == M.getGlobalList().size() && 938 "Missed a global variable"); 939 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 940 941 // Print out module-level global variables in proper order 942 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 943 printModuleLevelGV(Globals[i], OS2); 944 945 OS2 << '\n'; 946 947 OutStreamer.EmitRawText(OS2.str()); 948} 949 950void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { 951 O << "//\n"; 952 O << "// Generated by LLVM NVPTX Back-End\n"; 953 O << "//\n"; 954 O << "\n"; 955 956 unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); 957 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 958 959 O << ".target "; 960 O << nvptxSubtarget.getTargetName(); 961 962 if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) 963 O << ", texmode_independent"; 964 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 965 if (!nvptxSubtarget.hasDouble()) 966 O << ", map_f64_to_f32"; 967 } 968 969 if (MAI->doesSupportDebugInformation()) 970 O << ", debug"; 971 972 O << "\n"; 973 974 O << ".address_size "; 975 if (nvptxSubtarget.is64Bit()) 976 O << "64"; 977 else 978 O << "32"; 979 O << "\n"; 980 981 O << "\n"; 982} 983 984bool NVPTXAsmPrinter::doFinalization(Module &M) { 985 986 // If we did not emit any functions, then the global declarations have not 987 // yet been emitted. 988 if (!GlobalsEmitted) { 989 emitGlobals(M); 990 GlobalsEmitted = true; 991 } 992 993 // XXX Temproarily remove global variables so that doFinalization() will not 994 // emit them again (global variables are emitted at beginning). 995 996 Module::GlobalListType &global_list = M.getGlobalList(); 997 int i, n = global_list.size(); 998 GlobalVariable **gv_array = new GlobalVariable *[n]; 999 1000 // first, back-up GlobalVariable in gv_array 1001 i = 0; 1002 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 1003 I != E; ++I) 1004 gv_array[i++] = &*I; 1005 1006 // second, empty global_list 1007 while (!global_list.empty()) 1008 global_list.remove(global_list.begin()); 1009 1010 // call doFinalization 1011 bool ret = AsmPrinter::doFinalization(M); 1012 1013 // now we restore global variables 1014 for (i = 0; i < n; i++) 1015 global_list.insert(global_list.end(), gv_array[i]); 1016 1017 delete[] gv_array; 1018 return ret; 1019 1020 //bool Result = AsmPrinter::doFinalization(M); 1021 // Instead of calling the parents doFinalization, we may 1022 // clone parents doFinalization and customize here. 1023 // Currently, we if NVISA out the EmitGlobals() in 1024 // parent's doFinalization, which is too intrusive. 1025 // 1026 // Same for the doInitialization. 1027 //return Result; 1028} 1029 1030// This function emits appropriate linkage directives for 1031// functions and global variables. 1032// 1033// extern function declaration -> .extern 1034// extern function definition -> .visible 1035// external global variable with init -> .visible 1036// external without init -> .extern 1037// appending -> not allowed, assert. 1038 1039void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 1040 raw_ostream &O) { 1041 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 1042 if (V->hasExternalLinkage()) { 1043 if (isa<GlobalVariable>(V)) { 1044 const GlobalVariable *GVar = cast<GlobalVariable>(V); 1045 if (GVar) { 1046 if (GVar->hasInitializer()) 1047 O << ".visible "; 1048 else 1049 O << ".extern "; 1050 } 1051 } else if (V->isDeclaration()) 1052 O << ".extern "; 1053 else 1054 O << ".visible "; 1055 } else if (V->hasAppendingLinkage()) { 1056 std::string msg; 1057 msg.append("Error: "); 1058 msg.append("Symbol "); 1059 if (V->hasName()) 1060 msg.append(V->getName().str()); 1061 msg.append("has unsupported appending linkage type"); 1062 llvm_unreachable(msg.c_str()); 1063 } 1064 } 1065} 1066 1067void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 1068 raw_ostream &O, 1069 bool processDemoted) { 1070 1071 // Skip meta data 1072 if (GVar->hasSection()) { 1073 if (GVar->getSection() == "llvm.metadata") 1074 return; 1075 } 1076 1077 const DataLayout *TD = TM.getDataLayout(); 1078 1079 // GlobalVariables are always constant pointers themselves. 1080 const PointerType *PTy = GVar->getType(); 1081 Type *ETy = PTy->getElementType(); 1082 1083 if (GVar->hasExternalLinkage()) { 1084 if (GVar->hasInitializer()) 1085 O << ".visible "; 1086 else 1087 O << ".extern "; 1088 } 1089 1090 if (llvm::isTexture(*GVar)) { 1091 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 1092 return; 1093 } 1094 1095 if (llvm::isSurface(*GVar)) { 1096 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 1097 return; 1098 } 1099 1100 if (GVar->isDeclaration()) { 1101 // (extern) declarations, no definition or initializer 1102 // Currently the only known declaration is for an automatic __local 1103 // (.shared) promoted to global. 1104 emitPTXGlobalVariable(GVar, O); 1105 O << ";\n"; 1106 return; 1107 } 1108 1109 if (llvm::isSampler(*GVar)) { 1110 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 1111 1112 const Constant *Initializer = NULL; 1113 if (GVar->hasInitializer()) 1114 Initializer = GVar->getInitializer(); 1115 const ConstantInt *CI = NULL; 1116 if (Initializer) 1117 CI = dyn_cast<ConstantInt>(Initializer); 1118 if (CI) { 1119 unsigned sample = CI->getZExtValue(); 1120 1121 O << " = { "; 1122 1123 for (int i = 0, 1124 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1125 i < 3; i++) { 1126 O << "addr_mode_" << i << " = "; 1127 switch (addr) { 1128 case 0: 1129 O << "wrap"; 1130 break; 1131 case 1: 1132 O << "clamp_to_border"; 1133 break; 1134 case 2: 1135 O << "clamp_to_edge"; 1136 break; 1137 case 3: 1138 O << "wrap"; 1139 break; 1140 case 4: 1141 O << "mirror"; 1142 break; 1143 } 1144 O << ", "; 1145 } 1146 O << "filter_mode = "; 1147 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1148 case 0: 1149 O << "nearest"; 1150 break; 1151 case 1: 1152 O << "linear"; 1153 break; 1154 case 2: 1155 assert(0 && "Anisotropic filtering is not supported"); 1156 default: 1157 O << "nearest"; 1158 break; 1159 } 1160 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1161 O << ", force_unnormalized_coords = 1"; 1162 } 1163 O << " }"; 1164 } 1165 1166 O << ";\n"; 1167 return; 1168 } 1169 1170 if (GVar->hasPrivateLinkage()) { 1171 1172 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 1173 return; 1174 1175 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1176 if (!strncmp(GVar->getName().data(), "filename", 8)) 1177 return; 1178 if (GVar->use_empty()) 1179 return; 1180 } 1181 1182 const Function *demotedFunc = 0; 1183 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1184 O << "// " << GVar->getName().str() << " has been demoted\n"; 1185 if (localDecls.find(demotedFunc) != localDecls.end()) 1186 localDecls[demotedFunc].push_back(GVar); 1187 else { 1188 std::vector<const GlobalVariable *> temp; 1189 temp.push_back(GVar); 1190 localDecls[demotedFunc] = temp; 1191 } 1192 return; 1193 } 1194 1195 O << "."; 1196 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1197 if (GVar->getAlignment() == 0) 1198 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1199 else 1200 O << " .align " << GVar->getAlignment(); 1201 1202 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1203 O << " ."; 1204 // Special case: ABI requires that we use .u8 for predicates 1205 if (ETy->isIntegerTy(1)) 1206 O << "u8"; 1207 else 1208 O << getPTXFundamentalTypeStr(ETy, false); 1209 O << " "; 1210 O << *getSymbol(GVar); 1211 1212 // Ptx allows variable initilization only for constant and global state 1213 // spaces. 1214 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1215 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1216 GVar->hasInitializer()) { 1217 const Constant *Initializer = GVar->getInitializer(); 1218 if (!Initializer->isNullValue()) { 1219 O << " = "; 1220 printScalarConstant(Initializer, O); 1221 } 1222 } 1223 } else { 1224 unsigned int ElementSize = 0; 1225 1226 // Although PTX has direct support for struct type and array type and 1227 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1228 // targets that support these high level field accesses. Structs, arrays 1229 // and vectors are lowered into arrays of bytes. 1230 switch (ETy->getTypeID()) { 1231 case Type::StructTyID: 1232 case Type::ArrayTyID: 1233 case Type::VectorTyID: 1234 ElementSize = TD->getTypeStoreSize(ETy); 1235 // Ptx allows variable initilization only for constant and 1236 // global state spaces. 1237 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1238 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1239 GVar->hasInitializer()) { 1240 const Constant *Initializer = GVar->getInitializer(); 1241 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1242 AggBuffer aggBuffer(ElementSize, O, *this); 1243 bufferAggregateConstant(Initializer, &aggBuffer); 1244 if (aggBuffer.numSymbols) { 1245 if (nvptxSubtarget.is64Bit()) { 1246 O << " .u64 " << *getSymbol(GVar) << "["; 1247 O << ElementSize / 8; 1248 } else { 1249 O << " .u32 " << *getSymbol(GVar) << "["; 1250 O << ElementSize / 4; 1251 } 1252 O << "]"; 1253 } else { 1254 O << " .b8 " << *getSymbol(GVar) << "["; 1255 O << ElementSize; 1256 O << "]"; 1257 } 1258 O << " = {"; 1259 aggBuffer.print(); 1260 O << "}"; 1261 } else { 1262 O << " .b8 " << *getSymbol(GVar); 1263 if (ElementSize) { 1264 O << "["; 1265 O << ElementSize; 1266 O << "]"; 1267 } 1268 } 1269 } else { 1270 O << " .b8 " << *getSymbol(GVar); 1271 if (ElementSize) { 1272 O << "["; 1273 O << ElementSize; 1274 O << "]"; 1275 } 1276 } 1277 break; 1278 default: 1279 assert(0 && "type not supported yet"); 1280 } 1281 1282 } 1283 O << ";\n"; 1284} 1285 1286void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1287 if (localDecls.find(f) == localDecls.end()) 1288 return; 1289 1290 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 1291 1292 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1293 O << "\t// demoted variable\n\t"; 1294 printModuleLevelGV(gvars[i], O, true); 1295 } 1296} 1297 1298void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1299 raw_ostream &O) const { 1300 switch (AddressSpace) { 1301 case llvm::ADDRESS_SPACE_LOCAL: 1302 O << "local"; 1303 break; 1304 case llvm::ADDRESS_SPACE_GLOBAL: 1305 O << "global"; 1306 break; 1307 case llvm::ADDRESS_SPACE_CONST: 1308 O << "const"; 1309 break; 1310 case llvm::ADDRESS_SPACE_SHARED: 1311 O << "shared"; 1312 break; 1313 default: 1314 report_fatal_error("Bad address space found while emitting PTX"); 1315 break; 1316 } 1317} 1318 1319std::string 1320NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { 1321 switch (Ty->getTypeID()) { 1322 default: 1323 llvm_unreachable("unexpected type"); 1324 break; 1325 case Type::IntegerTyID: { 1326 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1327 if (NumBits == 1) 1328 return "pred"; 1329 else if (NumBits <= 64) { 1330 std::string name = "u"; 1331 return name + utostr(NumBits); 1332 } else { 1333 llvm_unreachable("Integer too large"); 1334 break; 1335 } 1336 break; 1337 } 1338 case Type::FloatTyID: 1339 return "f32"; 1340 case Type::DoubleTyID: 1341 return "f64"; 1342 case Type::PointerTyID: 1343 if (nvptxSubtarget.is64Bit()) 1344 if (useB4PTR) 1345 return "b64"; 1346 else 1347 return "u64"; 1348 else if (useB4PTR) 1349 return "b32"; 1350 else 1351 return "u32"; 1352 } 1353 llvm_unreachable("unexpected type"); 1354 return NULL; 1355} 1356 1357void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1358 raw_ostream &O) { 1359 1360 const DataLayout *TD = TM.getDataLayout(); 1361 1362 // GlobalVariables are always constant pointers themselves. 1363 const PointerType *PTy = GVar->getType(); 1364 Type *ETy = PTy->getElementType(); 1365 1366 O << "."; 1367 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1368 if (GVar->getAlignment() == 0) 1369 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1370 else 1371 O << " .align " << GVar->getAlignment(); 1372 1373 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1374 O << " ."; 1375 O << getPTXFundamentalTypeStr(ETy); 1376 O << " "; 1377 O << *getSymbol(GVar); 1378 return; 1379 } 1380 1381 int64_t ElementSize = 0; 1382 1383 // Although PTX has direct support for struct type and array type and LLVM IR 1384 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1385 // support these high level field accesses. Structs and arrays are lowered 1386 // into arrays of bytes. 1387 switch (ETy->getTypeID()) { 1388 case Type::StructTyID: 1389 case Type::ArrayTyID: 1390 case Type::VectorTyID: 1391 ElementSize = TD->getTypeStoreSize(ETy); 1392 O << " .b8 " << *getSymbol(GVar) << "["; 1393 if (ElementSize) { 1394 O << itostr(ElementSize); 1395 } 1396 O << "]"; 1397 break; 1398 default: 1399 assert(0 && "type not supported yet"); 1400 } 1401 return; 1402} 1403 1404static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { 1405 if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) 1406 return TD->getPrefTypeAlignment(Ty); 1407 1408 const ArrayType *ATy = dyn_cast<ArrayType>(Ty); 1409 if (ATy) 1410 return getOpenCLAlignment(TD, ATy->getElementType()); 1411 1412 const VectorType *VTy = dyn_cast<VectorType>(Ty); 1413 if (VTy) { 1414 Type *ETy = VTy->getElementType(); 1415 unsigned int numE = VTy->getNumElements(); 1416 unsigned int alignE = TD->getPrefTypeAlignment(ETy); 1417 if (numE == 3) 1418 return 4 * alignE; 1419 else 1420 return numE * alignE; 1421 } 1422 1423 const StructType *STy = dyn_cast<StructType>(Ty); 1424 if (STy) { 1425 unsigned int alignStruct = 1; 1426 // Go through each element of the struct and find the 1427 // largest alignment. 1428 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1429 Type *ETy = STy->getElementType(i); 1430 unsigned int align = getOpenCLAlignment(TD, ETy); 1431 if (align > alignStruct) 1432 alignStruct = align; 1433 } 1434 return alignStruct; 1435 } 1436 1437 const FunctionType *FTy = dyn_cast<FunctionType>(Ty); 1438 if (FTy) 1439 return TD->getPointerPrefAlignment(); 1440 return TD->getPrefTypeAlignment(Ty); 1441} 1442 1443void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1444 int paramIndex, raw_ostream &O) { 1445 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1446 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) 1447 O << *getSymbol(I->getParent()) << "_param_" << paramIndex; 1448 else { 1449 std::string argName = I->getName(); 1450 const char *p = argName.c_str(); 1451 while (*p) { 1452 if (*p == '.') 1453 O << "_"; 1454 else 1455 O << *p; 1456 p++; 1457 } 1458 } 1459} 1460 1461void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { 1462 Function::const_arg_iterator I, E; 1463 int i = 0; 1464 1465 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1466 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { 1467 O << *CurrentFnSym << "_param_" << paramIndex; 1468 return; 1469 } 1470 1471 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { 1472 if (i == paramIndex) { 1473 printParamName(I, paramIndex, O); 1474 return; 1475 } 1476 } 1477 llvm_unreachable("paramIndex out of bound"); 1478} 1479 1480void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1481 const DataLayout *TD = TM.getDataLayout(); 1482 const AttributeSet &PAL = F->getAttributes(); 1483 const TargetLowering *TLI = TM.getTargetLowering(); 1484 Function::const_arg_iterator I, E; 1485 unsigned paramIndex = 0; 1486 bool first = true; 1487 bool isKernelFunc = llvm::isKernelFunction(*F); 1488 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1489 MVT thePointerTy = TLI->getPointerTy(); 1490 1491 O << "(\n"; 1492 1493 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1494 Type *Ty = I->getType(); 1495 1496 if (!first) 1497 O << ",\n"; 1498 1499 first = false; 1500 1501 // Handle image/sampler parameters 1502 if (llvm::isSampler(*I) || llvm::isImage(*I)) { 1503 if (llvm::isImage(*I)) { 1504 std::string sname = I->getName(); 1505 if (llvm::isImageWriteOnly(*I)) 1506 O << "\t.param .surfref " << *getSymbol(F) << "_param_" 1507 << paramIndex; 1508 else // Default image is read_only 1509 O << "\t.param .texref " << *getSymbol(F) << "_param_" 1510 << paramIndex; 1511 } else // Should be llvm::isSampler(*I) 1512 O << "\t.param .samplerref " << *getSymbol(F) << "_param_" 1513 << paramIndex; 1514 continue; 1515 } 1516 1517 if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { 1518 if (Ty->isVectorTy()) { 1519 // Just print .param .b8 .align <a> .param[size]; 1520 // <a> = PAL.getparamalignment 1521 // size = typeallocsize of element type 1522 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1523 if (align == 0) 1524 align = TD->getABITypeAlignment(Ty); 1525 1526 unsigned sz = TD->getTypeAllocSize(Ty); 1527 O << "\t.param .align " << align << " .b8 "; 1528 printParamName(I, paramIndex, O); 1529 O << "[" << sz << "]"; 1530 1531 continue; 1532 } 1533 // Just a scalar 1534 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1535 if (isKernelFunc) { 1536 if (PTy) { 1537 // Special handling for pointer arguments to kernel 1538 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1539 1540 if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { 1541 Type *ETy = PTy->getElementType(); 1542 int addrSpace = PTy->getAddressSpace(); 1543 switch (addrSpace) { 1544 default: 1545 O << ".ptr "; 1546 break; 1547 case llvm::ADDRESS_SPACE_CONST: 1548 O << ".ptr .const "; 1549 break; 1550 case llvm::ADDRESS_SPACE_SHARED: 1551 O << ".ptr .shared "; 1552 break; 1553 case llvm::ADDRESS_SPACE_GLOBAL: 1554 O << ".ptr .global "; 1555 break; 1556 } 1557 O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; 1558 } 1559 printParamName(I, paramIndex, O); 1560 continue; 1561 } 1562 1563 // non-pointer scalar to kernel func 1564 O << "\t.param ."; 1565 // Special case: predicate operands become .u8 types 1566 if (Ty->isIntegerTy(1)) 1567 O << "u8"; 1568 else 1569 O << getPTXFundamentalTypeStr(Ty); 1570 O << " "; 1571 printParamName(I, paramIndex, O); 1572 continue; 1573 } 1574 // Non-kernel function, just print .param .b<size> for ABI 1575 // and .reg .b<size> for non ABY 1576 unsigned sz = 0; 1577 if (isa<IntegerType>(Ty)) { 1578 sz = cast<IntegerType>(Ty)->getBitWidth(); 1579 if (sz < 32) 1580 sz = 32; 1581 } else if (isa<PointerType>(Ty)) 1582 sz = thePointerTy.getSizeInBits(); 1583 else 1584 sz = Ty->getPrimitiveSizeInBits(); 1585 if (isABI) 1586 O << "\t.param .b" << sz << " "; 1587 else 1588 O << "\t.reg .b" << sz << " "; 1589 printParamName(I, paramIndex, O); 1590 continue; 1591 } 1592 1593 // param has byVal attribute. So should be a pointer 1594 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1595 assert(PTy && "Param with byval attribute should be a pointer type"); 1596 Type *ETy = PTy->getElementType(); 1597 1598 if (isABI || isKernelFunc) { 1599 // Just print .param .b8 .align <a> .param[size]; 1600 // <a> = PAL.getparamalignment 1601 // size = typeallocsize of element type 1602 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1603 if (align == 0) 1604 align = TD->getABITypeAlignment(ETy); 1605 1606 unsigned sz = TD->getTypeAllocSize(ETy); 1607 O << "\t.param .align " << align << " .b8 "; 1608 printParamName(I, paramIndex, O); 1609 O << "[" << sz << "]"; 1610 continue; 1611 } else { 1612 // Split the ETy into constituent parts and 1613 // print .param .b<size> <name> for each part. 1614 // Further, if a part is vector, print the above for 1615 // each vector element. 1616 SmallVector<EVT, 16> vtparts; 1617 ComputeValueVTs(*TLI, ETy, vtparts); 1618 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1619 unsigned elems = 1; 1620 EVT elemtype = vtparts[i]; 1621 if (vtparts[i].isVector()) { 1622 elems = vtparts[i].getVectorNumElements(); 1623 elemtype = vtparts[i].getVectorElementType(); 1624 } 1625 1626 for (unsigned j = 0, je = elems; j != je; ++j) { 1627 unsigned sz = elemtype.getSizeInBits(); 1628 if (elemtype.isInteger() && (sz < 32)) 1629 sz = 32; 1630 O << "\t.reg .b" << sz << " "; 1631 printParamName(I, paramIndex, O); 1632 if (j < je - 1) 1633 O << ",\n"; 1634 ++paramIndex; 1635 } 1636 if (i < e - 1) 1637 O << ",\n"; 1638 } 1639 --paramIndex; 1640 continue; 1641 } 1642 } 1643 1644 O << "\n)\n"; 1645} 1646 1647void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1648 raw_ostream &O) { 1649 const Function *F = MF.getFunction(); 1650 emitFunctionParamList(F, O); 1651} 1652 1653void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1654 const MachineFunction &MF) { 1655 SmallString<128> Str; 1656 raw_svector_ostream O(Str); 1657 1658 // Map the global virtual register number to a register class specific 1659 // virtual register number starting from 1 with that class. 1660 const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 1661 //unsigned numRegClasses = TRI->getNumRegClasses(); 1662 1663 // Emit the Fake Stack Object 1664 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1665 int NumBytes = (int) MFI->getStackSize(); 1666 if (NumBytes) { 1667 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME 1668 << getFunctionNumber() << "[" << NumBytes << "];\n"; 1669 if (nvptxSubtarget.is64Bit()) { 1670 O << "\t.reg .b64 \t%SP;\n"; 1671 O << "\t.reg .b64 \t%SPL;\n"; 1672 } else { 1673 O << "\t.reg .b32 \t%SP;\n"; 1674 O << "\t.reg .b32 \t%SPL;\n"; 1675 } 1676 } 1677 1678 // Go through all virtual registers to establish the mapping between the 1679 // global virtual 1680 // register number and the per class virtual register number. 1681 // We use the per class virtual register number in the ptx output. 1682 unsigned int numVRs = MRI->getNumVirtRegs(); 1683 for (unsigned i = 0; i < numVRs; i++) { 1684 unsigned int vr = TRI->index2VirtReg(i); 1685 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1686 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1687 int n = regmap.size(); 1688 regmap.insert(std::make_pair(vr, n + 1)); 1689 } 1690 1691 // Emit register declarations 1692 // @TODO: Extract out the real register usage 1693 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1694 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1695 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1696 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1697 // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; 1698 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1699 // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; 1700 1701 // Emit declaration of the virtual registers or 'physical' registers for 1702 // each register class 1703 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 1704 const TargetRegisterClass *RC = TRI->getRegClass(i); 1705 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1706 std::string rcname = getNVPTXRegClassName(RC); 1707 std::string rcStr = getNVPTXRegClassStr(RC); 1708 int n = regmap.size(); 1709 1710 // Only declare those registers that may be used. 1711 if (n) { 1712 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1713 << ">;\n"; 1714 } 1715 } 1716 1717 OutStreamer.EmitRawText(O.str()); 1718} 1719 1720void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1721 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1722 bool ignored; 1723 unsigned int numHex; 1724 const char *lead; 1725 1726 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 1727 numHex = 8; 1728 lead = "0f"; 1729 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); 1730 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1731 numHex = 16; 1732 lead = "0d"; 1733 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); 1734 } else 1735 llvm_unreachable("unsupported fp type"); 1736 1737 APInt API = APF.bitcastToAPInt(); 1738 std::string hexstr(utohexstr(API.getZExtValue())); 1739 O << lead; 1740 if (hexstr.length() < numHex) 1741 O << std::string(numHex - hexstr.length(), '0'); 1742 O << utohexstr(API.getZExtValue()); 1743} 1744 1745void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 1746 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1747 O << CI->getValue(); 1748 return; 1749 } 1750 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1751 printFPConstant(CFP, O); 1752 return; 1753 } 1754 if (isa<ConstantPointerNull>(CPV)) { 1755 O << "0"; 1756 return; 1757 } 1758 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1759 O << *getSymbol(GVar); 1760 return; 1761 } 1762 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1763 const Value *v = Cexpr->stripPointerCasts(); 1764 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1765 O << *getSymbol(GVar); 1766 return; 1767 } else { 1768 O << *LowerConstant(CPV, *this); 1769 return; 1770 } 1771 } 1772 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1773} 1774 1775void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 1776 AggBuffer *aggBuffer) { 1777 1778 const DataLayout *TD = TM.getDataLayout(); 1779 1780 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1781 int s = TD->getTypeAllocSize(CPV->getType()); 1782 if (s < Bytes) 1783 s = Bytes; 1784 aggBuffer->addZeros(s); 1785 return; 1786 } 1787 1788 unsigned char *ptr; 1789 switch (CPV->getType()->getTypeID()) { 1790 1791 case Type::IntegerTyID: { 1792 const Type *ETy = CPV->getType(); 1793 if (ETy == Type::getInt8Ty(CPV->getContext())) { 1794 unsigned char c = 1795 (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1796 ptr = &c; 1797 aggBuffer->addBytes(ptr, 1, Bytes); 1798 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 1799 short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1800 ptr = (unsigned char *)&int16; 1801 aggBuffer->addBytes(ptr, 2, Bytes); 1802 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 1803 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1804 int int32 = (int)(constInt->getZExtValue()); 1805 ptr = (unsigned char *)&int32; 1806 aggBuffer->addBytes(ptr, 4, Bytes); 1807 break; 1808 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1809 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1810 ConstantFoldConstantExpression(Cexpr, TD))) { 1811 int int32 = (int)(constInt->getZExtValue()); 1812 ptr = (unsigned char *)&int32; 1813 aggBuffer->addBytes(ptr, 4, Bytes); 1814 break; 1815 } 1816 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1817 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1818 aggBuffer->addSymbol(v); 1819 aggBuffer->addZeros(4); 1820 break; 1821 } 1822 } 1823 llvm_unreachable("unsupported integer const type"); 1824 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 1825 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1826 long long int64 = (long long)(constInt->getZExtValue()); 1827 ptr = (unsigned char *)&int64; 1828 aggBuffer->addBytes(ptr, 8, Bytes); 1829 break; 1830 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1831 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1832 ConstantFoldConstantExpression(Cexpr, TD))) { 1833 long long int64 = (long long)(constInt->getZExtValue()); 1834 ptr = (unsigned char *)&int64; 1835 aggBuffer->addBytes(ptr, 8, Bytes); 1836 break; 1837 } 1838 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1839 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1840 aggBuffer->addSymbol(v); 1841 aggBuffer->addZeros(8); 1842 break; 1843 } 1844 } 1845 llvm_unreachable("unsupported integer const type"); 1846 } else 1847 llvm_unreachable("unsupported integer const type"); 1848 break; 1849 } 1850 case Type::FloatTyID: 1851 case Type::DoubleTyID: { 1852 const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 1853 const Type *Ty = CFP->getType(); 1854 if (Ty == Type::getFloatTy(CPV->getContext())) { 1855 float float32 = (float) CFP->getValueAPF().convertToFloat(); 1856 ptr = (unsigned char *)&float32; 1857 aggBuffer->addBytes(ptr, 4, Bytes); 1858 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1859 double float64 = CFP->getValueAPF().convertToDouble(); 1860 ptr = (unsigned char *)&float64; 1861 aggBuffer->addBytes(ptr, 8, Bytes); 1862 } else { 1863 llvm_unreachable("unsupported fp const type"); 1864 } 1865 break; 1866 } 1867 case Type::PointerTyID: { 1868 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1869 aggBuffer->addSymbol(GVar); 1870 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1871 const Value *v = Cexpr->stripPointerCasts(); 1872 aggBuffer->addSymbol(v); 1873 } 1874 unsigned int s = TD->getTypeAllocSize(CPV->getType()); 1875 aggBuffer->addZeros(s); 1876 break; 1877 } 1878 1879 case Type::ArrayTyID: 1880 case Type::VectorTyID: 1881 case Type::StructTyID: { 1882 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 1883 isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) { 1884 int ElementSize = TD->getTypeAllocSize(CPV->getType()); 1885 bufferAggregateConstant(CPV, aggBuffer); 1886 if (Bytes > ElementSize) 1887 aggBuffer->addZeros(Bytes - ElementSize); 1888 } else if (isa<ConstantAggregateZero>(CPV)) 1889 aggBuffer->addZeros(Bytes); 1890 else 1891 llvm_unreachable("Unexpected Constant type"); 1892 break; 1893 } 1894 1895 default: 1896 llvm_unreachable("unsupported type"); 1897 } 1898} 1899 1900void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 1901 AggBuffer *aggBuffer) { 1902 const DataLayout *TD = TM.getDataLayout(); 1903 int Bytes; 1904 1905 // Old constants 1906 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1907 if (CPV->getNumOperands()) 1908 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1909 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1910 return; 1911 } 1912 1913 if (const ConstantDataSequential *CDS = 1914 dyn_cast<ConstantDataSequential>(CPV)) { 1915 if (CDS->getNumElements()) 1916 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1917 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1918 aggBuffer); 1919 return; 1920 } 1921 1922 if (isa<ConstantStruct>(CPV)) { 1923 if (CPV->getNumOperands()) { 1924 StructType *ST = cast<StructType>(CPV->getType()); 1925 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1926 if (i == (e - 1)) 1927 Bytes = TD->getStructLayout(ST)->getElementOffset(0) + 1928 TD->getTypeAllocSize(ST) - 1929 TD->getStructLayout(ST)->getElementOffset(i); 1930 else 1931 Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - 1932 TD->getStructLayout(ST)->getElementOffset(i); 1933 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 1934 } 1935 } 1936 return; 1937 } 1938 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1939} 1940 1941// buildTypeNameMap - Run through symbol table looking for type names. 1942// 1943 1944bool NVPTXAsmPrinter::isImageType(const Type *Ty) { 1945 1946 std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); 1947 1948 if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || 1949 !PI->second.compare("struct._image2d_t") || 1950 !PI->second.compare("struct._image3d_t"))) 1951 return true; 1952 1953 return false; 1954} 1955 1956 1957bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 1958 switch (MI.getOpcode()) { 1959 default: 1960 return false; 1961 case NVPTX::CallArgBeginInst: 1962 case NVPTX::CallArgEndInst0: 1963 case NVPTX::CallArgEndInst1: 1964 case NVPTX::CallArgF32: 1965 case NVPTX::CallArgF64: 1966 case NVPTX::CallArgI16: 1967 case NVPTX::CallArgI32: 1968 case NVPTX::CallArgI32imm: 1969 case NVPTX::CallArgI64: 1970 case NVPTX::CallArgParam: 1971 case NVPTX::CallVoidInst: 1972 case NVPTX::CallVoidInstReg: 1973 case NVPTX::Callseq_End: 1974 case NVPTX::CallVoidInstReg64: 1975 case NVPTX::DeclareParamInst: 1976 case NVPTX::DeclareRetMemInst: 1977 case NVPTX::DeclareRetRegInst: 1978 case NVPTX::DeclareRetScalarInst: 1979 case NVPTX::DeclareScalarParamInst: 1980 case NVPTX::DeclareScalarRegInst: 1981 case NVPTX::StoreParamF32: 1982 case NVPTX::StoreParamF64: 1983 case NVPTX::StoreParamI16: 1984 case NVPTX::StoreParamI32: 1985 case NVPTX::StoreParamI64: 1986 case NVPTX::StoreParamI8: 1987 case NVPTX::StoreRetvalF32: 1988 case NVPTX::StoreRetvalF64: 1989 case NVPTX::StoreRetvalI16: 1990 case NVPTX::StoreRetvalI32: 1991 case NVPTX::StoreRetvalI64: 1992 case NVPTX::StoreRetvalI8: 1993 case NVPTX::LastCallArgF32: 1994 case NVPTX::LastCallArgF64: 1995 case NVPTX::LastCallArgI16: 1996 case NVPTX::LastCallArgI32: 1997 case NVPTX::LastCallArgI32imm: 1998 case NVPTX::LastCallArgI64: 1999 case NVPTX::LastCallArgParam: 2000 case NVPTX::LoadParamMemF32: 2001 case NVPTX::LoadParamMemF64: 2002 case NVPTX::LoadParamMemI16: 2003 case NVPTX::LoadParamMemI32: 2004 case NVPTX::LoadParamMemI64: 2005 case NVPTX::LoadParamMemI8: 2006 case NVPTX::PrototypeInst: 2007 case NVPTX::DBG_VALUE: 2008 return true; 2009 } 2010 return false; 2011} 2012 2013/// PrintAsmOperand - Print out an operand for an inline asm expression. 2014/// 2015bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 2016 unsigned AsmVariant, 2017 const char *ExtraCode, raw_ostream &O) { 2018 if (ExtraCode && ExtraCode[0]) { 2019 if (ExtraCode[1] != 0) 2020 return true; // Unknown modifier. 2021 2022 switch (ExtraCode[0]) { 2023 default: 2024 // See if this is a generic print operand 2025 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 2026 case 'r': 2027 break; 2028 } 2029 } 2030 2031 printOperand(MI, OpNo, O); 2032 2033 return false; 2034} 2035 2036bool NVPTXAsmPrinter::PrintAsmMemoryOperand( 2037 const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, 2038 const char *ExtraCode, raw_ostream &O) { 2039 if (ExtraCode && ExtraCode[0]) 2040 return true; // Unknown modifier 2041 2042 O << '['; 2043 printMemOperand(MI, OpNo, O); 2044 O << ']'; 2045 2046 return false; 2047} 2048 2049void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 2050 raw_ostream &O, const char *Modifier) { 2051 const MachineOperand &MO = MI->getOperand(opNum); 2052 switch (MO.getType()) { 2053 case MachineOperand::MO_Register: 2054 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 2055 if (MO.getReg() == NVPTX::VRDepot) 2056 O << DEPOTNAME << getFunctionNumber(); 2057 else 2058 O << NVPTXInstPrinter::getRegisterName(MO.getReg()); 2059 } else { 2060 emitVirtualRegister(MO.getReg(), O); 2061 } 2062 return; 2063 2064 case MachineOperand::MO_Immediate: 2065 if (!Modifier) 2066 O << MO.getImm(); 2067 else if (strstr(Modifier, "vec") == Modifier) 2068 printVecModifiedImmediate(MO, Modifier, O); 2069 else 2070 llvm_unreachable( 2071 "Don't know how to handle modifier on immediate operand"); 2072 return; 2073 2074 case MachineOperand::MO_FPImmediate: 2075 printFPConstant(MO.getFPImm(), O); 2076 break; 2077 2078 case MachineOperand::MO_GlobalAddress: 2079 O << *getSymbol(MO.getGlobal()); 2080 break; 2081 2082 case MachineOperand::MO_ExternalSymbol: { 2083 const char *symbname = MO.getSymbolName(); 2084 if (strstr(symbname, ".PARAM") == symbname) { 2085 unsigned index; 2086 sscanf(symbname + 6, "%u[];", &index); 2087 printParamName(index, O); 2088 } else if (strstr(symbname, ".HLPPARAM") == symbname) { 2089 unsigned index; 2090 sscanf(symbname + 9, "%u[];", &index); 2091 O << *CurrentFnSym << "_param_" << index << "_offset"; 2092 } else 2093 O << symbname; 2094 break; 2095 } 2096 2097 case MachineOperand::MO_MachineBasicBlock: 2098 O << *MO.getMBB()->getSymbol(); 2099 return; 2100 2101 default: 2102 llvm_unreachable("Operand type not supported."); 2103 } 2104} 2105 2106void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 2107 raw_ostream &O, const char *Modifier) { 2108 printOperand(MI, opNum, O); 2109 2110 if (Modifier && !strcmp(Modifier, "add")) { 2111 O << ", "; 2112 printOperand(MI, opNum + 1, O); 2113 } else { 2114 if (MI->getOperand(opNum + 1).isImm() && 2115 MI->getOperand(opNum + 1).getImm() == 0) 2116 return; // don't print ',0' or '+0' 2117 O << "+"; 2118 printOperand(MI, opNum + 1, O); 2119 } 2120} 2121 2122 2123// Force static initialization. 2124extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { 2125 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2126 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2127} 2128 2129void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2130 std::stringstream temp; 2131 LineReader *reader = this->getReader(filename.str()); 2132 temp << "\n//"; 2133 temp << filename.str(); 2134 temp << ":"; 2135 temp << line; 2136 temp << " "; 2137 temp << reader->readLine(line); 2138 temp << "\n"; 2139 this->OutStreamer.EmitRawText(Twine(temp.str())); 2140} 2141 2142LineReader *NVPTXAsmPrinter::getReader(std::string filename) { 2143 if (reader == NULL) { 2144 reader = new LineReader(filename); 2145 } 2146 2147 if (reader->fileName() != filename) { 2148 delete reader; 2149 reader = new LineReader(filename); 2150 } 2151 2152 return reader; 2153} 2154 2155std::string LineReader::readLine(unsigned lineNum) { 2156 if (lineNum < theCurLine) { 2157 theCurLine = 0; 2158 fstr.seekg(0, std::ios::beg); 2159 } 2160 while (theCurLine < lineNum) { 2161 fstr.getline(buff, 500); 2162 theCurLine++; 2163 } 2164 return buff; 2165} 2166 2167// Force static initialization. 2168extern "C" void LLVMInitializeNVPTXAsmPrinter() { 2169 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2170 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2171} 2172