1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a printer that converts from our internal representation 11// of machine-dependent LLVM code to NVPTX assembly language. 12// 13//===----------------------------------------------------------------------===// 14 15#include "NVPTXAsmPrinter.h" 16#include "InstPrinter/NVPTXInstPrinter.h" 17#include "MCTargetDesc/NVPTXMCAsmInfo.h" 18#include "NVPTX.h" 19#include "NVPTXInstrInfo.h" 20#include "NVPTXMCExpr.h" 21#include "NVPTXMachineFunctionInfo.h" 22#include "NVPTXRegisterInfo.h" 23#include "NVPTXTargetMachine.h" 24#include "NVPTXUtilities.h" 25#include "cl_common_defines.h" 26#include "llvm/ADT/StringExtras.h" 27#include "llvm/Analysis/ConstantFolding.h" 28#include "llvm/CodeGen/Analysis.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineLoopInfo.h" 31#include "llvm/CodeGen/MachineModuleInfo.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/IR/DebugInfo.h" 34#include "llvm/IR/DerivedTypes.h" 35#include "llvm/IR/Function.h" 36#include "llvm/IR/GlobalVariable.h" 37#include "llvm/IR/Mangler.h" 38#include "llvm/IR/Module.h" 39#include "llvm/IR/Operator.h" 40#include "llvm/MC/MCStreamer.h" 41#include "llvm/MC/MCSymbol.h" 42#include "llvm/Support/CommandLine.h" 43#include "llvm/Support/ErrorHandling.h" 44#include "llvm/Support/FormattedStream.h" 45#include "llvm/Support/Path.h" 46#include "llvm/Support/TargetRegistry.h" 47#include "llvm/Support/TimeValue.h" 48#include "llvm/Target/TargetLoweringObjectFile.h" 49#include "llvm/Transforms/Utils/UnrollLoop.h" 50#include <sstream> 51using namespace llvm; 52 53#define DEPOTNAME "__local_depot" 54 55static cl::opt<bool> 56EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden, 57 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 58 cl::init(true)); 59 60static cl::opt<bool> 61InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden, 62 cl::desc("NVPTX Specific: Emit source line in ptx file"), 63 cl::init(false)); 64 65namespace { 66/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 67/// depends. 68void DiscoverDependentGlobals(const Value *V, 69 DenseSet<const GlobalVariable *> &Globals) { 70 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 71 Globals.insert(GV); 72 else { 73 if (const User *U = dyn_cast<User>(V)) { 74 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 75 DiscoverDependentGlobals(U->getOperand(i), Globals); 76 } 77 } 78 } 79} 80 81/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 82/// instances to be emitted, but only after any dependents have been added 83/// first. 84void VisitGlobalVariableForEmission( 85 const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order, 86 DenseSet<const GlobalVariable *> &Visited, 87 DenseSet<const GlobalVariable *> &Visiting) { 88 // Have we already visited this one? 89 if (Visited.count(GV)) 90 return; 91 92 // Do we have a circular dependency? 93 if (!Visiting.insert(GV).second) 94 report_fatal_error("Circular dependency found in global variable set"); 95 96 // Make sure we visit all dependents first 97 DenseSet<const GlobalVariable *> Others; 98 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 99 DiscoverDependentGlobals(GV->getOperand(i), Others); 100 101 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 102 E = Others.end(); 103 I != E; ++I) 104 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 105 106 // Now we can visit ourself 107 Order.push_back(GV); 108 Visited.insert(GV); 109 Visiting.erase(GV); 110} 111} 112 113void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { 114 if (!EmitLineNumbers) 115 return; 116 if (ignoreLoc(MI)) 117 return; 118 119 DebugLoc curLoc = MI.getDebugLoc(); 120 121 if (!prevDebugLoc && !curLoc) 122 return; 123 124 if (prevDebugLoc == curLoc) 125 return; 126 127 prevDebugLoc = curLoc; 128 129 if (!curLoc) 130 return; 131 132 auto *Scope = cast_or_null<MDScope>(curLoc.getScope()); 133 if (!Scope) 134 return; 135 136 StringRef fileName(Scope->getFilename()); 137 StringRef dirName(Scope->getDirectory()); 138 SmallString<128> FullPathName = dirName; 139 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 140 sys::path::append(FullPathName, fileName); 141 fileName = FullPathName; 142 } 143 144 if (filenameMap.find(fileName) == filenameMap.end()) 145 return; 146 147 // Emit the line from the source file. 148 if (InterleaveSrc) 149 this->emitSrcInText(fileName, curLoc.getLine()); 150 151 std::stringstream temp; 152 temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine() 153 << " " << curLoc.getCol(); 154 OutStreamer.EmitRawText(temp.str()); 155} 156 157void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 158 SmallString<128> Str; 159 raw_svector_ostream OS(Str); 160 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) 161 emitLineNumberAsDotLoc(*MI); 162 163 MCInst Inst; 164 lowerToMCInst(MI, Inst); 165 EmitToStreamer(OutStreamer, Inst); 166} 167 168// Handle symbol backtracking for targets that do not support image handles 169bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, 170 unsigned OpNo, MCOperand &MCOp) { 171 const MachineOperand &MO = MI->getOperand(OpNo); 172 const MCInstrDesc &MCID = MI->getDesc(); 173 174 if (MCID.TSFlags & NVPTXII::IsTexFlag) { 175 // This is a texture fetch, so operand 4 is a texref and operand 5 is 176 // a samplerref 177 if (OpNo == 4 && MO.isImm()) { 178 lowerImageHandleSymbol(MO.getImm(), MCOp); 179 return true; 180 } 181 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { 182 lowerImageHandleSymbol(MO.getImm(), MCOp); 183 return true; 184 } 185 186 return false; 187 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { 188 unsigned VecSize = 189 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); 190 191 // For a surface load of vector size N, the Nth operand will be the surfref 192 if (OpNo == VecSize && MO.isImm()) { 193 lowerImageHandleSymbol(MO.getImm(), MCOp); 194 return true; 195 } 196 197 return false; 198 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { 199 // This is a surface store, so operand 0 is a surfref 200 if (OpNo == 0 && MO.isImm()) { 201 lowerImageHandleSymbol(MO.getImm(), MCOp); 202 return true; 203 } 204 205 return false; 206 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { 207 // This is a query, so operand 1 is a surfref/texref 208 if (OpNo == 1 && MO.isImm()) { 209 lowerImageHandleSymbol(MO.getImm(), MCOp); 210 return true; 211 } 212 213 return false; 214 } 215 216 return false; 217} 218 219void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { 220 // Ewwww 221 TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget()); 222 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM); 223 const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>(); 224 const char *Sym = MFI->getImageHandleSymbol(Index); 225 std::string *SymNamePtr = 226 nvTM.getManagedStrPool()->getManagedString(Sym); 227 MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol( 228 StringRef(SymNamePtr->c_str()))); 229} 230 231void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 232 OutMI.setOpcode(MI->getOpcode()); 233 // Special: Do not mangle symbol operand of CALL_PROTOTYPE 234 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { 235 const MachineOperand &MO = MI->getOperand(0); 236 OutMI.addOperand(GetSymbolRef( 237 OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName())))); 238 return; 239 } 240 241 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 242 const MachineOperand &MO = MI->getOperand(i); 243 244 MCOperand MCOp; 245 if (!nvptxSubtarget->hasImageHandles()) { 246 if (lowerImageHandleOperand(MI, i, MCOp)) { 247 OutMI.addOperand(MCOp); 248 continue; 249 } 250 } 251 252 if (lowerOperand(MO, MCOp)) 253 OutMI.addOperand(MCOp); 254 } 255} 256 257bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 258 MCOperand &MCOp) { 259 switch (MO.getType()) { 260 default: llvm_unreachable("unknown operand type"); 261 case MachineOperand::MO_Register: 262 MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg())); 263 break; 264 case MachineOperand::MO_Immediate: 265 MCOp = MCOperand::CreateImm(MO.getImm()); 266 break; 267 case MachineOperand::MO_MachineBasicBlock: 268 MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( 269 MO.getMBB()->getSymbol(), OutContext)); 270 break; 271 case MachineOperand::MO_ExternalSymbol: 272 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); 273 break; 274 case MachineOperand::MO_GlobalAddress: 275 MCOp = GetSymbolRef(getSymbol(MO.getGlobal())); 276 break; 277 case MachineOperand::MO_FPImmediate: { 278 const ConstantFP *Cnt = MO.getFPImm(); 279 APFloat Val = Cnt->getValueAPF(); 280 281 switch (Cnt->getType()->getTypeID()) { 282 default: report_fatal_error("Unsupported FP type"); break; 283 case Type::FloatTyID: 284 MCOp = MCOperand::CreateExpr( 285 NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext)); 286 break; 287 case Type::DoubleTyID: 288 MCOp = MCOperand::CreateExpr( 289 NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext)); 290 break; 291 } 292 break; 293 } 294 } 295 return true; 296} 297 298unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 299 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 300 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 301 302 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 303 unsigned RegNum = RegMap[Reg]; 304 305 // Encode the register class in the upper 4 bits 306 // Must be kept in sync with NVPTXInstPrinter::printRegName 307 unsigned Ret = 0; 308 if (RC == &NVPTX::Int1RegsRegClass) { 309 Ret = (1 << 28); 310 } else if (RC == &NVPTX::Int16RegsRegClass) { 311 Ret = (2 << 28); 312 } else if (RC == &NVPTX::Int32RegsRegClass) { 313 Ret = (3 << 28); 314 } else if (RC == &NVPTX::Int64RegsRegClass) { 315 Ret = (4 << 28); 316 } else if (RC == &NVPTX::Float32RegsRegClass) { 317 Ret = (5 << 28); 318 } else if (RC == &NVPTX::Float64RegsRegClass) { 319 Ret = (6 << 28); 320 } else { 321 report_fatal_error("Bad register class"); 322 } 323 324 // Insert the vreg number 325 Ret |= (RegNum & 0x0FFFFFFF); 326 return Ret; 327 } else { 328 // Some special-use registers are actually physical registers. 329 // Encode this as the register class ID of 0 and the real register ID. 330 return Reg & 0x0FFFFFFF; 331 } 332} 333 334MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { 335 const MCExpr *Expr; 336 Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, 337 OutContext); 338 return MCOperand::CreateExpr(Expr); 339} 340 341void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 342 const DataLayout *TD = TM.getDataLayout(); 343 const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); 344 345 Type *Ty = F->getReturnType(); 346 347 bool isABI = (nvptxSubtarget->getSmVersion() >= 20); 348 349 if (Ty->getTypeID() == Type::VoidTyID) 350 return; 351 352 O << " ("; 353 354 if (isABI) { 355 if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) { 356 unsigned size = 0; 357 if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { 358 size = ITy->getBitWidth(); 359 if (size < 32) 360 size = 32; 361 } else { 362 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 363 size = Ty->getPrimitiveSizeInBits(); 364 } 365 366 O << ".param .b" << size << " func_retval0"; 367 } else if (isa<PointerType>(Ty)) { 368 O << ".param .b" << TLI->getPointerTy().getSizeInBits() 369 << " func_retval0"; 370 } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { 371 unsigned totalsz = TD->getTypeAllocSize(Ty); 372 unsigned retAlignment = 0; 373 if (!llvm::getAlign(*F, 0, retAlignment)) 374 retAlignment = TD->getABITypeAlignment(Ty); 375 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 376 << "]"; 377 } else 378 llvm_unreachable("Unknown return type"); 379 } else { 380 SmallVector<EVT, 16> vtparts; 381 ComputeValueVTs(*TLI, Ty, vtparts); 382 unsigned idx = 0; 383 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 384 unsigned elems = 1; 385 EVT elemtype = vtparts[i]; 386 if (vtparts[i].isVector()) { 387 elems = vtparts[i].getVectorNumElements(); 388 elemtype = vtparts[i].getVectorElementType(); 389 } 390 391 for (unsigned j = 0, je = elems; j != je; ++j) { 392 unsigned sz = elemtype.getSizeInBits(); 393 if (elemtype.isInteger() && (sz < 32)) 394 sz = 32; 395 O << ".reg .b" << sz << " func_retval" << idx; 396 if (j < je - 1) 397 O << ", "; 398 ++idx; 399 } 400 if (i < e - 1) 401 O << ", "; 402 } 403 } 404 O << ") "; 405 return; 406} 407 408void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 409 raw_ostream &O) { 410 const Function *F = MF.getFunction(); 411 printReturnValStr(F, O); 412} 413 414// Return true if MBB is the header of a loop marked with 415// llvm.loop.unroll.disable. 416// TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll". 417bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( 418 const MachineBasicBlock &MBB) const { 419 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); 420 // TODO: isLoopHeader() should take "const MachineBasicBlock *". 421 // We insert .pragma "nounroll" only to the loop header. 422 if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB))) 423 return false; 424 425 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, 426 // we iterate through each back edge of the loop with header MBB, and check 427 // whether its metadata contains llvm.loop.unroll.disable. 428 for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) { 429 const MachineBasicBlock *PMBB = *I; 430 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) { 431 // Edges from other loops to MBB are not back edges. 432 continue; 433 } 434 if (const BasicBlock *PBB = PMBB->getBasicBlock()) { 435 if (MDNode *LoopID = PBB->getTerminator()->getMetadata("llvm.loop")) { 436 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable")) 437 return true; 438 } 439 } 440 } 441 return false; 442} 443 444void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { 445 AsmPrinter::EmitBasicBlockStart(MBB); 446 if (isLoopHeaderOfNoUnroll(MBB)) 447 OutStreamer.EmitRawText(StringRef("\t.pragma \"nounroll\";\n")); 448} 449 450void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 451 SmallString<128> Str; 452 raw_svector_ostream O(Str); 453 454 if (!GlobalsEmitted) { 455 emitGlobals(*MF->getFunction()->getParent()); 456 GlobalsEmitted = true; 457 } 458 459 // Set up 460 MRI = &MF->getRegInfo(); 461 F = MF->getFunction(); 462 emitLinkageDirective(F, O); 463 if (llvm::isKernelFunction(*F)) 464 O << ".entry "; 465 else { 466 O << ".func "; 467 printReturnValStr(*MF, O); 468 } 469 470 O << *CurrentFnSym; 471 472 emitFunctionParamList(*MF, O); 473 474 if (llvm::isKernelFunction(*F)) 475 emitKernelFunctionDirectives(*F, O); 476 477 OutStreamer.EmitRawText(O.str()); 478 479 prevDebugLoc = DebugLoc(); 480} 481 482void NVPTXAsmPrinter::EmitFunctionBodyStart() { 483 VRegMapping.clear(); 484 OutStreamer.EmitRawText(StringRef("{\n")); 485 setAndEmitFunctionVirtualRegisters(*MF); 486 487 SmallString<128> Str; 488 raw_svector_ostream O(Str); 489 emitDemotedVars(MF->getFunction(), O); 490 OutStreamer.EmitRawText(O.str()); 491} 492 493void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 494 OutStreamer.EmitRawText(StringRef("}\n")); 495 VRegMapping.clear(); 496} 497 498void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { 499 unsigned RegNo = MI->getOperand(0).getReg(); 500 if (TargetRegisterInfo::isVirtualRegister(RegNo)) { 501 OutStreamer.AddComment(Twine("implicit-def: ") + 502 getVirtualRegisterName(RegNo)); 503 } else { 504 OutStreamer.AddComment(Twine("implicit-def: ") + 505 nvptxSubtarget->getRegisterInfo()->getName(RegNo)); 506 } 507 OutStreamer.AddBlankLine(); 508} 509 510void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 511 raw_ostream &O) const { 512 // If the NVVM IR has some of reqntid* specified, then output 513 // the reqntid directive, and set the unspecified ones to 1. 514 // If none of reqntid* is specified, don't output reqntid directive. 515 unsigned reqntidx, reqntidy, reqntidz; 516 bool specified = false; 517 if (!llvm::getReqNTIDx(F, reqntidx)) 518 reqntidx = 1; 519 else 520 specified = true; 521 if (!llvm::getReqNTIDy(F, reqntidy)) 522 reqntidy = 1; 523 else 524 specified = true; 525 if (!llvm::getReqNTIDz(F, reqntidz)) 526 reqntidz = 1; 527 else 528 specified = true; 529 530 if (specified) 531 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 532 << "\n"; 533 534 // If the NVVM IR has some of maxntid* specified, then output 535 // the maxntid directive, and set the unspecified ones to 1. 536 // If none of maxntid* is specified, don't output maxntid directive. 537 unsigned maxntidx, maxntidy, maxntidz; 538 specified = false; 539 if (!llvm::getMaxNTIDx(F, maxntidx)) 540 maxntidx = 1; 541 else 542 specified = true; 543 if (!llvm::getMaxNTIDy(F, maxntidy)) 544 maxntidy = 1; 545 else 546 specified = true; 547 if (!llvm::getMaxNTIDz(F, maxntidz)) 548 maxntidz = 1; 549 else 550 specified = true; 551 552 if (specified) 553 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 554 << "\n"; 555 556 unsigned mincta; 557 if (llvm::getMinCTASm(F, mincta)) 558 O << ".minnctapersm " << mincta << "\n"; 559} 560 561std::string 562NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { 563 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 564 565 std::string Name; 566 raw_string_ostream NameStr(Name); 567 568 VRegRCMap::const_iterator I = VRegMapping.find(RC); 569 assert(I != VRegMapping.end() && "Bad register class"); 570 const DenseMap<unsigned, unsigned> &RegMap = I->second; 571 572 VRegMap::const_iterator VI = RegMap.find(Reg); 573 assert(VI != RegMap.end() && "Bad virtual register"); 574 unsigned MappedVR = VI->second; 575 576 NameStr << getNVPTXRegClassStr(RC) << MappedVR; 577 578 NameStr.flush(); 579 return Name; 580} 581 582void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, 583 raw_ostream &O) { 584 O << getVirtualRegisterName(vr); 585} 586 587void NVPTXAsmPrinter::printVecModifiedImmediate( 588 const MachineOperand &MO, const char *Modifier, raw_ostream &O) { 589 static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; 590 int Imm = (int) MO.getImm(); 591 if (0 == strcmp(Modifier, "vecelem")) 592 O << "_" << vecelem[Imm]; 593 else if (0 == strcmp(Modifier, "vecv4comm1")) { 594 if ((Imm < 0) || (Imm > 3)) 595 O << "//"; 596 } else if (0 == strcmp(Modifier, "vecv4comm2")) { 597 if ((Imm < 4) || (Imm > 7)) 598 O << "//"; 599 } else if (0 == strcmp(Modifier, "vecv4pos")) { 600 if (Imm < 0) 601 Imm = 0; 602 O << "_" << vecelem[Imm % 4]; 603 } else if (0 == strcmp(Modifier, "vecv2comm1")) { 604 if ((Imm < 0) || (Imm > 1)) 605 O << "//"; 606 } else if (0 == strcmp(Modifier, "vecv2comm2")) { 607 if ((Imm < 2) || (Imm > 3)) 608 O << "//"; 609 } else if (0 == strcmp(Modifier, "vecv2pos")) { 610 if (Imm < 0) 611 Imm = 0; 612 O << "_" << vecelem[Imm % 2]; 613 } else 614 llvm_unreachable("Unknown Modifier on immediate operand"); 615} 616 617 618 619void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 620 621 emitLinkageDirective(F, O); 622 if (llvm::isKernelFunction(*F)) 623 O << ".entry "; 624 else 625 O << ".func "; 626 printReturnValStr(F, O); 627 O << *getSymbol(F) << "\n"; 628 emitFunctionParamList(F, O); 629 O << ";\n"; 630} 631 632static bool usedInGlobalVarDef(const Constant *C) { 633 if (!C) 634 return false; 635 636 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 637 if (GV->getName() == "llvm.used") 638 return false; 639 return true; 640 } 641 642 for (const User *U : C->users()) 643 if (const Constant *C = dyn_cast<Constant>(U)) 644 if (usedInGlobalVarDef(C)) 645 return true; 646 647 return false; 648} 649 650static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 651 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 652 if (othergv->getName() == "llvm.used") 653 return true; 654 } 655 656 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 657 if (instr->getParent() && instr->getParent()->getParent()) { 658 const Function *curFunc = instr->getParent()->getParent(); 659 if (oneFunc && (curFunc != oneFunc)) 660 return false; 661 oneFunc = curFunc; 662 return true; 663 } else 664 return false; 665 } 666 667 for (const User *UU : U->users()) 668 if (!usedInOneFunc(UU, oneFunc)) 669 return false; 670 671 return true; 672} 673 674/* Find out if a global variable can be demoted to local scope. 675 * Currently, this is valid for CUDA shared variables, which have local 676 * scope and global lifetime. So the conditions to check are : 677 * 1. Is the global variable in shared address space? 678 * 2. Does it have internal linkage? 679 * 3. Is the global variable referenced only in one function? 680 */ 681static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 682 if (!gv->hasInternalLinkage()) 683 return false; 684 const PointerType *Pty = gv->getType(); 685 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 686 return false; 687 688 const Function *oneFunc = nullptr; 689 690 bool flag = usedInOneFunc(gv, oneFunc); 691 if (!flag) 692 return false; 693 if (!oneFunc) 694 return false; 695 f = oneFunc; 696 return true; 697} 698 699static bool useFuncSeen(const Constant *C, 700 llvm::DenseMap<const Function *, bool> &seenMap) { 701 for (const User *U : C->users()) { 702 if (const Constant *cu = dyn_cast<Constant>(U)) { 703 if (useFuncSeen(cu, seenMap)) 704 return true; 705 } else if (const Instruction *I = dyn_cast<Instruction>(U)) { 706 const BasicBlock *bb = I->getParent(); 707 if (!bb) 708 continue; 709 const Function *caller = bb->getParent(); 710 if (!caller) 711 continue; 712 if (seenMap.find(caller) != seenMap.end()) 713 return true; 714 } 715 } 716 return false; 717} 718 719void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 720 llvm::DenseMap<const Function *, bool> seenMap; 721 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 722 const Function *F = FI; 723 724 if (F->isDeclaration()) { 725 if (F->use_empty()) 726 continue; 727 if (F->getIntrinsicID()) 728 continue; 729 emitDeclaration(F, O); 730 continue; 731 } 732 for (const User *U : F->users()) { 733 if (const Constant *C = dyn_cast<Constant>(U)) { 734 if (usedInGlobalVarDef(C)) { 735 // The use is in the initialization of a global variable 736 // that is a function pointer, so print a declaration 737 // for the original function 738 emitDeclaration(F, O); 739 break; 740 } 741 // Emit a declaration of this function if the function that 742 // uses this constant expr has already been seen. 743 if (useFuncSeen(C, seenMap)) { 744 emitDeclaration(F, O); 745 break; 746 } 747 } 748 749 if (!isa<Instruction>(U)) 750 continue; 751 const Instruction *instr = cast<Instruction>(U); 752 const BasicBlock *bb = instr->getParent(); 753 if (!bb) 754 continue; 755 const Function *caller = bb->getParent(); 756 if (!caller) 757 continue; 758 759 // If a caller has already been seen, then the caller is 760 // appearing in the module before the callee. so print out 761 // a declaration for the callee. 762 if (seenMap.find(caller) != seenMap.end()) { 763 emitDeclaration(F, O); 764 break; 765 } 766 } 767 seenMap[F] = true; 768 } 769} 770 771void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 772 DebugInfoFinder DbgFinder; 773 DbgFinder.processModule(M); 774 775 unsigned i = 1; 776 for (const MDCompileUnit *DIUnit : DbgFinder.compile_units()) { 777 StringRef Filename = DIUnit->getFilename(); 778 StringRef Dirname = DIUnit->getDirectory(); 779 SmallString<128> FullPathName = Dirname; 780 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 781 sys::path::append(FullPathName, Filename); 782 Filename = FullPathName; 783 } 784 if (filenameMap.find(Filename) != filenameMap.end()) 785 continue; 786 filenameMap[Filename] = i; 787 OutStreamer.EmitDwarfFileDirective(i, "", Filename); 788 ++i; 789 } 790 791 for (MDSubprogram *SP : DbgFinder.subprograms()) { 792 StringRef Filename = SP->getFilename(); 793 StringRef Dirname = SP->getDirectory(); 794 SmallString<128> FullPathName = Dirname; 795 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 796 sys::path::append(FullPathName, Filename); 797 Filename = FullPathName; 798 } 799 if (filenameMap.find(Filename) != filenameMap.end()) 800 continue; 801 filenameMap[Filename] = i; 802 ++i; 803 } 804} 805 806bool NVPTXAsmPrinter::doInitialization(Module &M) { 807 // Construct a default subtarget off of the TargetMachine defaults. The 808 // rest of NVPTX isn't friendly to change subtargets per function and 809 // so the default TargetMachine will have all of the options. 810 StringRef TT = TM.getTargetTriple(); 811 StringRef CPU = TM.getTargetCPU(); 812 StringRef FS = TM.getTargetFeatureString(); 813 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 814 const NVPTXSubtarget STI(TT, CPU, FS, NTM); 815 816 SmallString<128> Str1; 817 raw_svector_ostream OS1(Str1); 818 819 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 820 MMI->AnalyzeModule(M); 821 822 // We need to call the parent's one explicitly. 823 //bool Result = AsmPrinter::doInitialization(M); 824 825 // Initialize TargetLoweringObjectFile. 826 const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) 827 .Initialize(OutContext, TM); 828 829 Mang = new Mangler(TM.getDataLayout()); 830 831 // Emit header before any dwarf directives are emitted below. 832 emitHeader(M, OS1, STI); 833 OutStreamer.EmitRawText(OS1.str()); 834 835 // Already commented out 836 //bool Result = AsmPrinter::doInitialization(M); 837 838 // Emit module-level inline asm if it exists. 839 if (!M.getModuleInlineAsm().empty()) { 840 OutStreamer.AddComment("Start of file scope inline assembly"); 841 OutStreamer.AddBlankLine(); 842 OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm())); 843 OutStreamer.AddBlankLine(); 844 OutStreamer.AddComment("End of file scope inline assembly"); 845 OutStreamer.AddBlankLine(); 846 } 847 848 // If we're not NVCL we're CUDA, go ahead and emit filenames. 849 if (Triple(TM.getTargetTriple()).getOS() != Triple::NVCL) 850 recordAndEmitFilenames(M); 851 852 GlobalsEmitted = false; 853 854 return false; // success 855} 856 857void NVPTXAsmPrinter::emitGlobals(const Module &M) { 858 SmallString<128> Str2; 859 raw_svector_ostream OS2(Str2); 860 861 emitDeclarations(M, OS2); 862 863 // As ptxas does not support forward references of globals, we need to first 864 // sort the list of module-level globals in def-use order. We visit each 865 // global variable in order, and ensure that we emit it *after* its dependent 866 // globals. We use a little extra memory maintaining both a set and a list to 867 // have fast searches while maintaining a strict ordering. 868 SmallVector<const GlobalVariable *, 8> Globals; 869 DenseSet<const GlobalVariable *> GVVisited; 870 DenseSet<const GlobalVariable *> GVVisiting; 871 872 // Visit each global variable, in order 873 for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); 874 I != E; ++I) 875 VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); 876 877 assert(GVVisited.size() == M.getGlobalList().size() && 878 "Missed a global variable"); 879 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 880 881 // Print out module-level global variables in proper order 882 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 883 printModuleLevelGV(Globals[i], OS2); 884 885 OS2 << '\n'; 886 887 OutStreamer.EmitRawText(OS2.str()); 888} 889 890void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O, 891 const NVPTXSubtarget &STI) { 892 O << "//\n"; 893 O << "// Generated by LLVM NVPTX Back-End\n"; 894 O << "//\n"; 895 O << "\n"; 896 897 unsigned PTXVersion = STI.getPTXVersion(); 898 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 899 900 O << ".target "; 901 O << STI.getTargetName(); 902 903 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 904 if (NTM.getDrvInterface() == NVPTX::NVCL) 905 O << ", texmode_independent"; 906 else { 907 if (!STI.hasDouble()) 908 O << ", map_f64_to_f32"; 909 } 910 911 if (MAI->doesSupportDebugInformation()) 912 O << ", debug"; 913 914 O << "\n"; 915 916 O << ".address_size "; 917 if (NTM.is64Bit()) 918 O << "64"; 919 else 920 O << "32"; 921 O << "\n"; 922 923 O << "\n"; 924} 925 926bool NVPTXAsmPrinter::doFinalization(Module &M) { 927 // If we did not emit any functions, then the global declarations have not 928 // yet been emitted. 929 if (!GlobalsEmitted) { 930 emitGlobals(M); 931 GlobalsEmitted = true; 932 } 933 934 // XXX Temproarily remove global variables so that doFinalization() will not 935 // emit them again (global variables are emitted at beginning). 936 937 Module::GlobalListType &global_list = M.getGlobalList(); 938 int i, n = global_list.size(); 939 GlobalVariable **gv_array = new GlobalVariable *[n]; 940 941 // first, back-up GlobalVariable in gv_array 942 i = 0; 943 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 944 I != E; ++I) 945 gv_array[i++] = &*I; 946 947 // second, empty global_list 948 while (!global_list.empty()) 949 global_list.remove(global_list.begin()); 950 951 // call doFinalization 952 bool ret = AsmPrinter::doFinalization(M); 953 954 // now we restore global variables 955 for (i = 0; i < n; i++) 956 global_list.insert(global_list.end(), gv_array[i]); 957 958 clearAnnotationCache(&M); 959 960 delete[] gv_array; 961 return ret; 962 963 //bool Result = AsmPrinter::doFinalization(M); 964 // Instead of calling the parents doFinalization, we may 965 // clone parents doFinalization and customize here. 966 // Currently, we if NVISA out the EmitGlobals() in 967 // parent's doFinalization, which is too intrusive. 968 // 969 // Same for the doInitialization. 970 //return Result; 971} 972 973// This function emits appropriate linkage directives for 974// functions and global variables. 975// 976// extern function declaration -> .extern 977// extern function definition -> .visible 978// external global variable with init -> .visible 979// external without init -> .extern 980// appending -> not allowed, assert. 981// for any linkage other than 982// internal, private, linker_private, 983// linker_private_weak, linker_private_weak_def_auto, 984// we emit -> .weak. 985 986void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 987 raw_ostream &O) { 988 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) { 989 if (V->hasExternalLinkage()) { 990 if (isa<GlobalVariable>(V)) { 991 const GlobalVariable *GVar = cast<GlobalVariable>(V); 992 if (GVar) { 993 if (GVar->hasInitializer()) 994 O << ".visible "; 995 else 996 O << ".extern "; 997 } 998 } else if (V->isDeclaration()) 999 O << ".extern "; 1000 else 1001 O << ".visible "; 1002 } else if (V->hasAppendingLinkage()) { 1003 std::string msg; 1004 msg.append("Error: "); 1005 msg.append("Symbol "); 1006 if (V->hasName()) 1007 msg.append(V->getName()); 1008 msg.append("has unsupported appending linkage type"); 1009 llvm_unreachable(msg.c_str()); 1010 } else if (!V->hasInternalLinkage() && 1011 !V->hasPrivateLinkage()) { 1012 O << ".weak "; 1013 } 1014 } 1015} 1016 1017void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 1018 raw_ostream &O, 1019 bool processDemoted) { 1020 1021 // Skip meta data 1022 if (GVar->hasSection()) { 1023 if (GVar->getSection() == StringRef("llvm.metadata")) 1024 return; 1025 } 1026 1027 // Skip LLVM intrinsic global variables 1028 if (GVar->getName().startswith("llvm.") || 1029 GVar->getName().startswith("nvvm.")) 1030 return; 1031 1032 const DataLayout *TD = TM.getDataLayout(); 1033 1034 // GlobalVariables are always constant pointers themselves. 1035 const PointerType *PTy = GVar->getType(); 1036 Type *ETy = PTy->getElementType(); 1037 1038 if (GVar->hasExternalLinkage()) { 1039 if (GVar->hasInitializer()) 1040 O << ".visible "; 1041 else 1042 O << ".extern "; 1043 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() || 1044 GVar->hasAvailableExternallyLinkage() || 1045 GVar->hasCommonLinkage()) { 1046 O << ".weak "; 1047 } 1048 1049 if (llvm::isTexture(*GVar)) { 1050 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 1051 return; 1052 } 1053 1054 if (llvm::isSurface(*GVar)) { 1055 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 1056 return; 1057 } 1058 1059 if (GVar->isDeclaration()) { 1060 // (extern) declarations, no definition or initializer 1061 // Currently the only known declaration is for an automatic __local 1062 // (.shared) promoted to global. 1063 emitPTXGlobalVariable(GVar, O); 1064 O << ";\n"; 1065 return; 1066 } 1067 1068 if (llvm::isSampler(*GVar)) { 1069 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 1070 1071 const Constant *Initializer = nullptr; 1072 if (GVar->hasInitializer()) 1073 Initializer = GVar->getInitializer(); 1074 const ConstantInt *CI = nullptr; 1075 if (Initializer) 1076 CI = dyn_cast<ConstantInt>(Initializer); 1077 if (CI) { 1078 unsigned sample = CI->getZExtValue(); 1079 1080 O << " = { "; 1081 1082 for (int i = 0, 1083 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1084 i < 3; i++) { 1085 O << "addr_mode_" << i << " = "; 1086 switch (addr) { 1087 case 0: 1088 O << "wrap"; 1089 break; 1090 case 1: 1091 O << "clamp_to_border"; 1092 break; 1093 case 2: 1094 O << "clamp_to_edge"; 1095 break; 1096 case 3: 1097 O << "wrap"; 1098 break; 1099 case 4: 1100 O << "mirror"; 1101 break; 1102 } 1103 O << ", "; 1104 } 1105 O << "filter_mode = "; 1106 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1107 case 0: 1108 O << "nearest"; 1109 break; 1110 case 1: 1111 O << "linear"; 1112 break; 1113 case 2: 1114 llvm_unreachable("Anisotropic filtering is not supported"); 1115 default: 1116 O << "nearest"; 1117 break; 1118 } 1119 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1120 O << ", force_unnormalized_coords = 1"; 1121 } 1122 O << " }"; 1123 } 1124 1125 O << ";\n"; 1126 return; 1127 } 1128 1129 if (GVar->hasPrivateLinkage()) { 1130 1131 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 1132 return; 1133 1134 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1135 if (!strncmp(GVar->getName().data(), "filename", 8)) 1136 return; 1137 if (GVar->use_empty()) 1138 return; 1139 } 1140 1141 const Function *demotedFunc = nullptr; 1142 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1143 O << "// " << GVar->getName() << " has been demoted\n"; 1144 if (localDecls.find(demotedFunc) != localDecls.end()) 1145 localDecls[demotedFunc].push_back(GVar); 1146 else { 1147 std::vector<const GlobalVariable *> temp; 1148 temp.push_back(GVar); 1149 localDecls[demotedFunc] = temp; 1150 } 1151 return; 1152 } 1153 1154 O << "."; 1155 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1156 1157 if (isManaged(*GVar)) { 1158 O << " .attribute(.managed)"; 1159 } 1160 1161 if (GVar->getAlignment() == 0) 1162 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1163 else 1164 O << " .align " << GVar->getAlignment(); 1165 1166 if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { 1167 O << " ."; 1168 // Special case: ABI requires that we use .u8 for predicates 1169 if (ETy->isIntegerTy(1)) 1170 O << "u8"; 1171 else 1172 O << getPTXFundamentalTypeStr(ETy, false); 1173 O << " "; 1174 O << *getSymbol(GVar); 1175 1176 // Ptx allows variable initilization only for constant and global state 1177 // spaces. 1178 if (GVar->hasInitializer()) { 1179 if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1180 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) { 1181 const Constant *Initializer = GVar->getInitializer(); 1182 // 'undef' is treated as there is no value spefied. 1183 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) { 1184 O << " = "; 1185 printScalarConstant(Initializer, O); 1186 } 1187 } else { 1188 // The frontend adds zero-initializer to variables that don't have an 1189 // initial value, so skip warning for this case. 1190 if (!GVar->getInitializer()->isNullValue()) { 1191 std::string warnMsg = 1192 ("initial value of '" + GVar->getName() + 1193 "' is not allowed in addrspace(" + 1194 Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str(); 1195 report_fatal_error(warnMsg.c_str()); 1196 } 1197 } 1198 } 1199 } else { 1200 unsigned int ElementSize = 0; 1201 1202 // Although PTX has direct support for struct type and array type and 1203 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1204 // targets that support these high level field accesses. Structs, arrays 1205 // and vectors are lowered into arrays of bytes. 1206 switch (ETy->getTypeID()) { 1207 case Type::StructTyID: 1208 case Type::ArrayTyID: 1209 case Type::VectorTyID: 1210 ElementSize = TD->getTypeStoreSize(ETy); 1211 // Ptx allows variable initilization only for constant and 1212 // global state spaces. 1213 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1214 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && 1215 GVar->hasInitializer()) { 1216 const Constant *Initializer = GVar->getInitializer(); 1217 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1218 AggBuffer aggBuffer(ElementSize, O, *this); 1219 bufferAggregateConstant(Initializer, &aggBuffer); 1220 if (aggBuffer.numSymbols) { 1221 if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { 1222 O << " .u64 " << *getSymbol(GVar) << "["; 1223 O << ElementSize / 8; 1224 } else { 1225 O << " .u32 " << *getSymbol(GVar) << "["; 1226 O << ElementSize / 4; 1227 } 1228 O << "]"; 1229 } else { 1230 O << " .b8 " << *getSymbol(GVar) << "["; 1231 O << ElementSize; 1232 O << "]"; 1233 } 1234 O << " = {"; 1235 aggBuffer.print(); 1236 O << "}"; 1237 } else { 1238 O << " .b8 " << *getSymbol(GVar); 1239 if (ElementSize) { 1240 O << "["; 1241 O << ElementSize; 1242 O << "]"; 1243 } 1244 } 1245 } else { 1246 O << " .b8 " << *getSymbol(GVar); 1247 if (ElementSize) { 1248 O << "["; 1249 O << ElementSize; 1250 O << "]"; 1251 } 1252 } 1253 break; 1254 default: 1255 llvm_unreachable("type not supported yet"); 1256 } 1257 1258 } 1259 O << ";\n"; 1260} 1261 1262void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1263 if (localDecls.find(f) == localDecls.end()) 1264 return; 1265 1266 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 1267 1268 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1269 O << "\t// demoted variable\n\t"; 1270 printModuleLevelGV(gvars[i], O, true); 1271 } 1272} 1273 1274void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1275 raw_ostream &O) const { 1276 switch (AddressSpace) { 1277 case llvm::ADDRESS_SPACE_LOCAL: 1278 O << "local"; 1279 break; 1280 case llvm::ADDRESS_SPACE_GLOBAL: 1281 O << "global"; 1282 break; 1283 case llvm::ADDRESS_SPACE_CONST: 1284 O << "const"; 1285 break; 1286 case llvm::ADDRESS_SPACE_SHARED: 1287 O << "shared"; 1288 break; 1289 default: 1290 report_fatal_error("Bad address space found while emitting PTX"); 1291 break; 1292 } 1293} 1294 1295std::string 1296NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { 1297 switch (Ty->getTypeID()) { 1298 default: 1299 llvm_unreachable("unexpected type"); 1300 break; 1301 case Type::IntegerTyID: { 1302 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1303 if (NumBits == 1) 1304 return "pred"; 1305 else if (NumBits <= 64) { 1306 std::string name = "u"; 1307 return name + utostr(NumBits); 1308 } else { 1309 llvm_unreachable("Integer too large"); 1310 break; 1311 } 1312 break; 1313 } 1314 case Type::FloatTyID: 1315 return "f32"; 1316 case Type::DoubleTyID: 1317 return "f64"; 1318 case Type::PointerTyID: 1319 if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) 1320 if (useB4PTR) 1321 return "b64"; 1322 else 1323 return "u64"; 1324 else if (useB4PTR) 1325 return "b32"; 1326 else 1327 return "u32"; 1328 } 1329 llvm_unreachable("unexpected type"); 1330 return nullptr; 1331} 1332 1333void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1334 raw_ostream &O) { 1335 1336 const DataLayout *TD = TM.getDataLayout(); 1337 1338 // GlobalVariables are always constant pointers themselves. 1339 const PointerType *PTy = GVar->getType(); 1340 Type *ETy = PTy->getElementType(); 1341 1342 O << "."; 1343 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1344 if (GVar->getAlignment() == 0) 1345 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1346 else 1347 O << " .align " << GVar->getAlignment(); 1348 1349 if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) { 1350 O << " ."; 1351 O << getPTXFundamentalTypeStr(ETy); 1352 O << " "; 1353 O << *getSymbol(GVar); 1354 return; 1355 } 1356 1357 int64_t ElementSize = 0; 1358 1359 // Although PTX has direct support for struct type and array type and LLVM IR 1360 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1361 // support these high level field accesses. Structs and arrays are lowered 1362 // into arrays of bytes. 1363 switch (ETy->getTypeID()) { 1364 case Type::StructTyID: 1365 case Type::ArrayTyID: 1366 case Type::VectorTyID: 1367 ElementSize = TD->getTypeStoreSize(ETy); 1368 O << " .b8 " << *getSymbol(GVar) << "["; 1369 if (ElementSize) { 1370 O << itostr(ElementSize); 1371 } 1372 O << "]"; 1373 break; 1374 default: 1375 llvm_unreachable("type not supported yet"); 1376 } 1377 return; 1378} 1379 1380static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { 1381 if (Ty->isSingleValueType()) 1382 return TD->getPrefTypeAlignment(Ty); 1383 1384 const ArrayType *ATy = dyn_cast<ArrayType>(Ty); 1385 if (ATy) 1386 return getOpenCLAlignment(TD, ATy->getElementType()); 1387 1388 const StructType *STy = dyn_cast<StructType>(Ty); 1389 if (STy) { 1390 unsigned int alignStruct = 1; 1391 // Go through each element of the struct and find the 1392 // largest alignment. 1393 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1394 Type *ETy = STy->getElementType(i); 1395 unsigned int align = getOpenCLAlignment(TD, ETy); 1396 if (align > alignStruct) 1397 alignStruct = align; 1398 } 1399 return alignStruct; 1400 } 1401 1402 const FunctionType *FTy = dyn_cast<FunctionType>(Ty); 1403 if (FTy) 1404 return TD->getPointerPrefAlignment(); 1405 return TD->getPrefTypeAlignment(Ty); 1406} 1407 1408void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1409 int paramIndex, raw_ostream &O) { 1410 O << *getSymbol(I->getParent()) << "_param_" << paramIndex; 1411} 1412 1413void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { 1414 O << *CurrentFnSym << "_param_" << paramIndex; 1415} 1416 1417void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1418 const DataLayout *TD = TM.getDataLayout(); 1419 const AttributeSet &PAL = F->getAttributes(); 1420 const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); 1421 Function::const_arg_iterator I, E; 1422 unsigned paramIndex = 0; 1423 bool first = true; 1424 bool isKernelFunc = llvm::isKernelFunction(*F); 1425 bool isABI = (nvptxSubtarget->getSmVersion() >= 20); 1426 MVT thePointerTy = TLI->getPointerTy(); 1427 1428 O << "(\n"; 1429 1430 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1431 Type *Ty = I->getType(); 1432 1433 if (!first) 1434 O << ",\n"; 1435 1436 first = false; 1437 1438 // Handle image/sampler parameters 1439 if (isKernelFunction(*F)) { 1440 if (isSampler(*I) || isImage(*I)) { 1441 if (isImage(*I)) { 1442 std::string sname = I->getName(); 1443 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { 1444 if (nvptxSubtarget->hasImageHandles()) 1445 O << "\t.param .u64 .ptr .surfref "; 1446 else 1447 O << "\t.param .surfref "; 1448 O << *CurrentFnSym << "_param_" << paramIndex; 1449 } 1450 else { // Default image is read_only 1451 if (nvptxSubtarget->hasImageHandles()) 1452 O << "\t.param .u64 .ptr .texref "; 1453 else 1454 O << "\t.param .texref "; 1455 O << *CurrentFnSym << "_param_" << paramIndex; 1456 } 1457 } else { 1458 if (nvptxSubtarget->hasImageHandles()) 1459 O << "\t.param .u64 .ptr .samplerref "; 1460 else 1461 O << "\t.param .samplerref "; 1462 O << *CurrentFnSym << "_param_" << paramIndex; 1463 } 1464 continue; 1465 } 1466 } 1467 1468 if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) { 1469 if (Ty->isAggregateType() || Ty->isVectorTy()) { 1470 // Just print .param .align <a> .b8 .param[size]; 1471 // <a> = PAL.getparamalignment 1472 // size = typeallocsize of element type 1473 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1474 if (align == 0) 1475 align = TD->getABITypeAlignment(Ty); 1476 1477 unsigned sz = TD->getTypeAllocSize(Ty); 1478 O << "\t.param .align " << align << " .b8 "; 1479 printParamName(I, paramIndex, O); 1480 O << "[" << sz << "]"; 1481 1482 continue; 1483 } 1484 // Just a scalar 1485 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1486 if (isKernelFunc) { 1487 if (PTy) { 1488 // Special handling for pointer arguments to kernel 1489 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1490 1491 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() != 1492 NVPTX::CUDA) { 1493 Type *ETy = PTy->getElementType(); 1494 int addrSpace = PTy->getAddressSpace(); 1495 switch (addrSpace) { 1496 default: 1497 O << ".ptr "; 1498 break; 1499 case llvm::ADDRESS_SPACE_CONST: 1500 O << ".ptr .const "; 1501 break; 1502 case llvm::ADDRESS_SPACE_SHARED: 1503 O << ".ptr .shared "; 1504 break; 1505 case llvm::ADDRESS_SPACE_GLOBAL: 1506 O << ".ptr .global "; 1507 break; 1508 } 1509 O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; 1510 } 1511 printParamName(I, paramIndex, O); 1512 continue; 1513 } 1514 1515 // non-pointer scalar to kernel func 1516 O << "\t.param ."; 1517 // Special case: predicate operands become .u8 types 1518 if (Ty->isIntegerTy(1)) 1519 O << "u8"; 1520 else 1521 O << getPTXFundamentalTypeStr(Ty); 1522 O << " "; 1523 printParamName(I, paramIndex, O); 1524 continue; 1525 } 1526 // Non-kernel function, just print .param .b<size> for ABI 1527 // and .reg .b<size> for non-ABI 1528 unsigned sz = 0; 1529 if (isa<IntegerType>(Ty)) { 1530 sz = cast<IntegerType>(Ty)->getBitWidth(); 1531 if (sz < 32) 1532 sz = 32; 1533 } else if (isa<PointerType>(Ty)) 1534 sz = thePointerTy.getSizeInBits(); 1535 else 1536 sz = Ty->getPrimitiveSizeInBits(); 1537 if (isABI) 1538 O << "\t.param .b" << sz << " "; 1539 else 1540 O << "\t.reg .b" << sz << " "; 1541 printParamName(I, paramIndex, O); 1542 continue; 1543 } 1544 1545 // param has byVal attribute. So should be a pointer 1546 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1547 assert(PTy && "Param with byval attribute should be a pointer type"); 1548 Type *ETy = PTy->getElementType(); 1549 1550 if (isABI || isKernelFunc) { 1551 // Just print .param .align <a> .b8 .param[size]; 1552 // <a> = PAL.getparamalignment 1553 // size = typeallocsize of element type 1554 unsigned align = PAL.getParamAlignment(paramIndex + 1); 1555 if (align == 0) 1556 align = TD->getABITypeAlignment(ETy); 1557 1558 unsigned sz = TD->getTypeAllocSize(ETy); 1559 O << "\t.param .align " << align << " .b8 "; 1560 printParamName(I, paramIndex, O); 1561 O << "[" << sz << "]"; 1562 continue; 1563 } else { 1564 // Split the ETy into constituent parts and 1565 // print .param .b<size> <name> for each part. 1566 // Further, if a part is vector, print the above for 1567 // each vector element. 1568 SmallVector<EVT, 16> vtparts; 1569 ComputeValueVTs(*TLI, ETy, vtparts); 1570 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1571 unsigned elems = 1; 1572 EVT elemtype = vtparts[i]; 1573 if (vtparts[i].isVector()) { 1574 elems = vtparts[i].getVectorNumElements(); 1575 elemtype = vtparts[i].getVectorElementType(); 1576 } 1577 1578 for (unsigned j = 0, je = elems; j != je; ++j) { 1579 unsigned sz = elemtype.getSizeInBits(); 1580 if (elemtype.isInteger() && (sz < 32)) 1581 sz = 32; 1582 O << "\t.reg .b" << sz << " "; 1583 printParamName(I, paramIndex, O); 1584 if (j < je - 1) 1585 O << ",\n"; 1586 ++paramIndex; 1587 } 1588 if (i < e - 1) 1589 O << ",\n"; 1590 } 1591 --paramIndex; 1592 continue; 1593 } 1594 } 1595 1596 O << "\n)\n"; 1597} 1598 1599void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1600 raw_ostream &O) { 1601 const Function *F = MF.getFunction(); 1602 emitFunctionParamList(F, O); 1603} 1604 1605void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1606 const MachineFunction &MF) { 1607 SmallString<128> Str; 1608 raw_svector_ostream O(Str); 1609 1610 // Map the global virtual register number to a register class specific 1611 // virtual register number starting from 1 with that class. 1612 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1613 //unsigned numRegClasses = TRI->getNumRegClasses(); 1614 1615 // Emit the Fake Stack Object 1616 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1617 int NumBytes = (int) MFI->getStackSize(); 1618 if (NumBytes) { 1619 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME 1620 << getFunctionNumber() << "[" << NumBytes << "];\n"; 1621 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { 1622 O << "\t.reg .b64 \t%SP;\n"; 1623 O << "\t.reg .b64 \t%SPL;\n"; 1624 } else { 1625 O << "\t.reg .b32 \t%SP;\n"; 1626 O << "\t.reg .b32 \t%SPL;\n"; 1627 } 1628 } 1629 1630 // Go through all virtual registers to establish the mapping between the 1631 // global virtual 1632 // register number and the per class virtual register number. 1633 // We use the per class virtual register number in the ptx output. 1634 unsigned int numVRs = MRI->getNumVirtRegs(); 1635 for (unsigned i = 0; i < numVRs; i++) { 1636 unsigned int vr = TRI->index2VirtReg(i); 1637 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1638 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1639 int n = regmap.size(); 1640 regmap.insert(std::make_pair(vr, n + 1)); 1641 } 1642 1643 // Emit register declarations 1644 // @TODO: Extract out the real register usage 1645 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1646 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1647 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1648 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1649 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n"; 1650 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1651 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n"; 1652 1653 // Emit declaration of the virtual registers or 'physical' registers for 1654 // each register class 1655 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 1656 const TargetRegisterClass *RC = TRI->getRegClass(i); 1657 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1658 std::string rcname = getNVPTXRegClassName(RC); 1659 std::string rcStr = getNVPTXRegClassStr(RC); 1660 int n = regmap.size(); 1661 1662 // Only declare those registers that may be used. 1663 if (n) { 1664 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1665 << ">;\n"; 1666 } 1667 } 1668 1669 OutStreamer.EmitRawText(O.str()); 1670} 1671 1672void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1673 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1674 bool ignored; 1675 unsigned int numHex; 1676 const char *lead; 1677 1678 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 1679 numHex = 8; 1680 lead = "0f"; 1681 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); 1682 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1683 numHex = 16; 1684 lead = "0d"; 1685 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); 1686 } else 1687 llvm_unreachable("unsupported fp type"); 1688 1689 APInt API = APF.bitcastToAPInt(); 1690 std::string hexstr(utohexstr(API.getZExtValue())); 1691 O << lead; 1692 if (hexstr.length() < numHex) 1693 O << std::string(numHex - hexstr.length(), '0'); 1694 O << utohexstr(API.getZExtValue()); 1695} 1696 1697void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 1698 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1699 O << CI->getValue(); 1700 return; 1701 } 1702 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1703 printFPConstant(CFP, O); 1704 return; 1705 } 1706 if (isa<ConstantPointerNull>(CPV)) { 1707 O << "0"; 1708 return; 1709 } 1710 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1711 PointerType *PTy = dyn_cast<PointerType>(GVar->getType()); 1712 bool IsNonGenericPointer = false; 1713 if (PTy && PTy->getAddressSpace() != 0) { 1714 IsNonGenericPointer = true; 1715 } 1716 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { 1717 O << "generic("; 1718 O << *getSymbol(GVar); 1719 O << ")"; 1720 } else { 1721 O << *getSymbol(GVar); 1722 } 1723 return; 1724 } 1725 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1726 const Value *v = Cexpr->stripPointerCasts(); 1727 PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType()); 1728 bool IsNonGenericPointer = false; 1729 if (PTy && PTy->getAddressSpace() != 0) { 1730 IsNonGenericPointer = true; 1731 } 1732 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1733 if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { 1734 O << "generic("; 1735 O << *getSymbol(GVar); 1736 O << ")"; 1737 } else { 1738 O << *getSymbol(GVar); 1739 } 1740 return; 1741 } else { 1742 O << *lowerConstant(CPV); 1743 return; 1744 } 1745 } 1746 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1747} 1748 1749void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 1750 AggBuffer *aggBuffer) { 1751 1752 const DataLayout *TD = TM.getDataLayout(); 1753 1754 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1755 int s = TD->getTypeAllocSize(CPV->getType()); 1756 if (s < Bytes) 1757 s = Bytes; 1758 aggBuffer->addZeros(s); 1759 return; 1760 } 1761 1762 unsigned char *ptr; 1763 switch (CPV->getType()->getTypeID()) { 1764 1765 case Type::IntegerTyID: { 1766 const Type *ETy = CPV->getType(); 1767 if (ETy == Type::getInt8Ty(CPV->getContext())) { 1768 unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); 1769 ptr = &c; 1770 aggBuffer->addBytes(ptr, 1, Bytes); 1771 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 1772 short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue(); 1773 ptr = (unsigned char *)&int16; 1774 aggBuffer->addBytes(ptr, 2, Bytes); 1775 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 1776 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1777 int int32 = (int)(constInt->getZExtValue()); 1778 ptr = (unsigned char *)&int32; 1779 aggBuffer->addBytes(ptr, 4, Bytes); 1780 break; 1781 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1782 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1783 ConstantFoldConstantExpression(Cexpr, *TD))) { 1784 int int32 = (int)(constInt->getZExtValue()); 1785 ptr = (unsigned char *)&int32; 1786 aggBuffer->addBytes(ptr, 4, Bytes); 1787 break; 1788 } 1789 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1790 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1791 aggBuffer->addSymbol(v); 1792 aggBuffer->addZeros(4); 1793 break; 1794 } 1795 } 1796 llvm_unreachable("unsupported integer const type"); 1797 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 1798 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1799 long long int64 = (long long)(constInt->getZExtValue()); 1800 ptr = (unsigned char *)&int64; 1801 aggBuffer->addBytes(ptr, 8, Bytes); 1802 break; 1803 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1804 if (const ConstantInt *constInt = dyn_cast<ConstantInt>( 1805 ConstantFoldConstantExpression(Cexpr, *TD))) { 1806 long long int64 = (long long)(constInt->getZExtValue()); 1807 ptr = (unsigned char *)&int64; 1808 aggBuffer->addBytes(ptr, 8, Bytes); 1809 break; 1810 } 1811 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1812 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1813 aggBuffer->addSymbol(v); 1814 aggBuffer->addZeros(8); 1815 break; 1816 } 1817 } 1818 llvm_unreachable("unsupported integer const type"); 1819 } else 1820 llvm_unreachable("unsupported integer const type"); 1821 break; 1822 } 1823 case Type::FloatTyID: 1824 case Type::DoubleTyID: { 1825 const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 1826 const Type *Ty = CFP->getType(); 1827 if (Ty == Type::getFloatTy(CPV->getContext())) { 1828 float float32 = (float) CFP->getValueAPF().convertToFloat(); 1829 ptr = (unsigned char *)&float32; 1830 aggBuffer->addBytes(ptr, 4, Bytes); 1831 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1832 double float64 = CFP->getValueAPF().convertToDouble(); 1833 ptr = (unsigned char *)&float64; 1834 aggBuffer->addBytes(ptr, 8, Bytes); 1835 } else { 1836 llvm_unreachable("unsupported fp const type"); 1837 } 1838 break; 1839 } 1840 case Type::PointerTyID: { 1841 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1842 aggBuffer->addSymbol(GVar); 1843 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1844 const Value *v = Cexpr->stripPointerCasts(); 1845 aggBuffer->addSymbol(v); 1846 } 1847 unsigned int s = TD->getTypeAllocSize(CPV->getType()); 1848 aggBuffer->addZeros(s); 1849 break; 1850 } 1851 1852 case Type::ArrayTyID: 1853 case Type::VectorTyID: 1854 case Type::StructTyID: { 1855 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 1856 isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) { 1857 int ElementSize = TD->getTypeAllocSize(CPV->getType()); 1858 bufferAggregateConstant(CPV, aggBuffer); 1859 if (Bytes > ElementSize) 1860 aggBuffer->addZeros(Bytes - ElementSize); 1861 } else if (isa<ConstantAggregateZero>(CPV)) 1862 aggBuffer->addZeros(Bytes); 1863 else 1864 llvm_unreachable("Unexpected Constant type"); 1865 break; 1866 } 1867 1868 default: 1869 llvm_unreachable("unsupported type"); 1870 } 1871} 1872 1873void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 1874 AggBuffer *aggBuffer) { 1875 const DataLayout *TD = TM.getDataLayout(); 1876 int Bytes; 1877 1878 // Old constants 1879 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1880 if (CPV->getNumOperands()) 1881 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1882 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1883 return; 1884 } 1885 1886 if (const ConstantDataSequential *CDS = 1887 dyn_cast<ConstantDataSequential>(CPV)) { 1888 if (CDS->getNumElements()) 1889 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1890 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1891 aggBuffer); 1892 return; 1893 } 1894 1895 if (isa<ConstantStruct>(CPV)) { 1896 if (CPV->getNumOperands()) { 1897 StructType *ST = cast<StructType>(CPV->getType()); 1898 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1899 if (i == (e - 1)) 1900 Bytes = TD->getStructLayout(ST)->getElementOffset(0) + 1901 TD->getTypeAllocSize(ST) - 1902 TD->getStructLayout(ST)->getElementOffset(i); 1903 else 1904 Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - 1905 TD->getStructLayout(ST)->getElementOffset(i); 1906 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 1907 } 1908 } 1909 return; 1910 } 1911 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1912} 1913 1914// buildTypeNameMap - Run through symbol table looking for type names. 1915// 1916 1917bool NVPTXAsmPrinter::isImageType(const Type *Ty) { 1918 1919 std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); 1920 1921 if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || 1922 !PI->second.compare("struct._image2d_t") || 1923 !PI->second.compare("struct._image3d_t"))) 1924 return true; 1925 1926 return false; 1927} 1928 1929 1930bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { 1931 switch (MI.getOpcode()) { 1932 default: 1933 return false; 1934 case NVPTX::CallArgBeginInst: 1935 case NVPTX::CallArgEndInst0: 1936 case NVPTX::CallArgEndInst1: 1937 case NVPTX::CallArgF32: 1938 case NVPTX::CallArgF64: 1939 case NVPTX::CallArgI16: 1940 case NVPTX::CallArgI32: 1941 case NVPTX::CallArgI32imm: 1942 case NVPTX::CallArgI64: 1943 case NVPTX::CallArgParam: 1944 case NVPTX::CallVoidInst: 1945 case NVPTX::CallVoidInstReg: 1946 case NVPTX::Callseq_End: 1947 case NVPTX::CallVoidInstReg64: 1948 case NVPTX::DeclareParamInst: 1949 case NVPTX::DeclareRetMemInst: 1950 case NVPTX::DeclareRetRegInst: 1951 case NVPTX::DeclareRetScalarInst: 1952 case NVPTX::DeclareScalarParamInst: 1953 case NVPTX::DeclareScalarRegInst: 1954 case NVPTX::StoreParamF32: 1955 case NVPTX::StoreParamF64: 1956 case NVPTX::StoreParamI16: 1957 case NVPTX::StoreParamI32: 1958 case NVPTX::StoreParamI64: 1959 case NVPTX::StoreParamI8: 1960 case NVPTX::StoreRetvalF32: 1961 case NVPTX::StoreRetvalF64: 1962 case NVPTX::StoreRetvalI16: 1963 case NVPTX::StoreRetvalI32: 1964 case NVPTX::StoreRetvalI64: 1965 case NVPTX::StoreRetvalI8: 1966 case NVPTX::LastCallArgF32: 1967 case NVPTX::LastCallArgF64: 1968 case NVPTX::LastCallArgI16: 1969 case NVPTX::LastCallArgI32: 1970 case NVPTX::LastCallArgI32imm: 1971 case NVPTX::LastCallArgI64: 1972 case NVPTX::LastCallArgParam: 1973 case NVPTX::LoadParamMemF32: 1974 case NVPTX::LoadParamMemF64: 1975 case NVPTX::LoadParamMemI16: 1976 case NVPTX::LoadParamMemI32: 1977 case NVPTX::LoadParamMemI64: 1978 case NVPTX::LoadParamMemI8: 1979 case NVPTX::PrototypeInst: 1980 case NVPTX::DBG_VALUE: 1981 return true; 1982 } 1983 return false; 1984} 1985 1986/// PrintAsmOperand - Print out an operand for an inline asm expression. 1987/// 1988bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 1989 unsigned AsmVariant, 1990 const char *ExtraCode, raw_ostream &O) { 1991 if (ExtraCode && ExtraCode[0]) { 1992 if (ExtraCode[1] != 0) 1993 return true; // Unknown modifier. 1994 1995 switch (ExtraCode[0]) { 1996 default: 1997 // See if this is a generic print operand 1998 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 1999 case 'r': 2000 break; 2001 } 2002 } 2003 2004 printOperand(MI, OpNo, O); 2005 2006 return false; 2007} 2008 2009bool NVPTXAsmPrinter::PrintAsmMemoryOperand( 2010 const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, 2011 const char *ExtraCode, raw_ostream &O) { 2012 if (ExtraCode && ExtraCode[0]) 2013 return true; // Unknown modifier 2014 2015 O << '['; 2016 printMemOperand(MI, OpNo, O); 2017 O << ']'; 2018 2019 return false; 2020} 2021 2022void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 2023 raw_ostream &O, const char *Modifier) { 2024 const MachineOperand &MO = MI->getOperand(opNum); 2025 switch (MO.getType()) { 2026 case MachineOperand::MO_Register: 2027 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 2028 if (MO.getReg() == NVPTX::VRDepot) 2029 O << DEPOTNAME << getFunctionNumber(); 2030 else 2031 O << NVPTXInstPrinter::getRegisterName(MO.getReg()); 2032 } else { 2033 emitVirtualRegister(MO.getReg(), O); 2034 } 2035 return; 2036 2037 case MachineOperand::MO_Immediate: 2038 if (!Modifier) 2039 O << MO.getImm(); 2040 else if (strstr(Modifier, "vec") == Modifier) 2041 printVecModifiedImmediate(MO, Modifier, O); 2042 else 2043 llvm_unreachable( 2044 "Don't know how to handle modifier on immediate operand"); 2045 return; 2046 2047 case MachineOperand::MO_FPImmediate: 2048 printFPConstant(MO.getFPImm(), O); 2049 break; 2050 2051 case MachineOperand::MO_GlobalAddress: 2052 O << *getSymbol(MO.getGlobal()); 2053 break; 2054 2055 case MachineOperand::MO_MachineBasicBlock: 2056 O << *MO.getMBB()->getSymbol(); 2057 return; 2058 2059 default: 2060 llvm_unreachable("Operand type not supported."); 2061 } 2062} 2063 2064void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 2065 raw_ostream &O, const char *Modifier) { 2066 printOperand(MI, opNum, O); 2067 2068 if (Modifier && !strcmp(Modifier, "add")) { 2069 O << ", "; 2070 printOperand(MI, opNum + 1, O); 2071 } else { 2072 if (MI->getOperand(opNum + 1).isImm() && 2073 MI->getOperand(opNum + 1).getImm() == 0) 2074 return; // don't print ',0' or '+0' 2075 O << "+"; 2076 printOperand(MI, opNum + 1, O); 2077 } 2078} 2079 2080void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2081 std::stringstream temp; 2082 LineReader *reader = this->getReader(filename); 2083 temp << "\n//"; 2084 temp << filename.str(); 2085 temp << ":"; 2086 temp << line; 2087 temp << " "; 2088 temp << reader->readLine(line); 2089 temp << "\n"; 2090 this->OutStreamer.EmitRawText(temp.str()); 2091} 2092 2093LineReader *NVPTXAsmPrinter::getReader(std::string filename) { 2094 if (!reader) { 2095 reader = new LineReader(filename); 2096 } 2097 2098 if (reader->fileName() != filename) { 2099 delete reader; 2100 reader = new LineReader(filename); 2101 } 2102 2103 return reader; 2104} 2105 2106std::string LineReader::readLine(unsigned lineNum) { 2107 if (lineNum < theCurLine) { 2108 theCurLine = 0; 2109 fstr.seekg(0, std::ios::beg); 2110 } 2111 while (theCurLine < lineNum) { 2112 fstr.getline(buff, 500); 2113 theCurLine++; 2114 } 2115 return buff; 2116} 2117 2118// Force static initialization. 2119extern "C" void LLVMInitializeNVPTXAsmPrinter() { 2120 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2121 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2122} 2123