NVPTXAsmPrinter.cpp revision 7454fc2e87ef5638f3644b86a4350a44513e5185
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a printer that converts from our internal representation 11// of machine-dependent LLVM code to NVPTX assembly language. 12// 13//===----------------------------------------------------------------------===// 14 15#include "NVPTXAsmPrinter.h" 16#include "NVPTX.h" 17#include "NVPTXInstrInfo.h" 18#include "NVPTXTargetMachine.h" 19#include "NVPTXRegisterInfo.h" 20#include "NVPTXUtilities.h" 21#include "MCTargetDesc/NVPTXMCAsmInfo.h" 22#include "NVPTXNumRegisters.h" 23#include "llvm/ADT/StringExtras.h" 24#include "llvm/DebugInfo.h" 25#include "llvm/Function.h" 26#include "llvm/GlobalVariable.h" 27#include "llvm/Module.h" 28#include "llvm/CodeGen/Analysis.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineModuleInfo.h" 32#include "llvm/MC/MCStreamer.h" 33#include "llvm/MC/MCSymbol.h" 34#include "llvm/Target/Mangler.h" 35#include "llvm/Target/TargetLoweringObjectFile.h" 36#include "llvm/Support/TargetRegistry.h" 37#include "llvm/Support/ErrorHandling.h" 38#include "llvm/Support/FormattedStream.h" 39#include "llvm/DerivedTypes.h" 40#include "llvm/Support/TimeValue.h" 41#include "llvm/Support/CommandLine.h" 42#include "llvm/Analysis/ConstantFolding.h" 43#include "llvm/Support/Path.h" 44#include "llvm/Assembly/Writer.h" 45#include "cl_common_defines.h" 46#include <sstream> 47using namespace llvm; 48 49 50#include "NVPTXGenAsmWriter.inc" 51 52bool RegAllocNilUsed = true; 53 54#define DEPOTNAME "__local_depot" 55 56static cl::opt<bool> 57EmitLineNumbers("nvptx-emit-line-numbers", 58 cl::desc("NVPTX Specific: Emit Line numbers even without -G"), 59 cl::init(true)); 60 61namespace llvm { 62bool InterleaveSrcInPtx = false; 63} 64 65static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src", 66 cl::ZeroOrMore, 67 cl::desc("NVPTX Specific: Emit source line in ptx file"), 68 cl::location(llvm::InterleaveSrcInPtx)); 69 70 71 72 73// @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we 74// cannot just link to the existing version. 75/// LowerConstant - Lower the specified LLVM Constant to an MCExpr. 76/// 77using namespace nvptx; 78const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { 79 MCContext &Ctx = AP.OutContext; 80 81 if (CV->isNullValue() || isa<UndefValue>(CV)) 82 return MCConstantExpr::Create(0, Ctx); 83 84 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 85 return MCConstantExpr::Create(CI->getZExtValue(), Ctx); 86 87 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) 88 return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); 89 90 if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) 91 return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); 92 93 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 94 if (CE == 0) 95 llvm_unreachable("Unknown constant value to lower!"); 96 97 98 switch (CE->getOpcode()) { 99 default: 100 // If the code isn't optimized, there may be outstanding folding 101 // opportunities. Attempt to fold the expression using DataLayout as a 102 // last resort before giving up. 103 if (Constant *C = 104 ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) 105 if (C != CE) 106 return LowerConstant(C, AP); 107 108 // Otherwise report the problem to the user. 109 { 110 std::string S; 111 raw_string_ostream OS(S); 112 OS << "Unsupported expression in static initializer: "; 113 WriteAsOperand(OS, CE, /*PrintType=*/false, 114 !AP.MF ? 0 : AP.MF->getFunction()->getParent()); 115 report_fatal_error(OS.str()); 116 } 117 case Instruction::GetElementPtr: { 118 const DataLayout &TD = *AP.TM.getDataLayout(); 119 // Generate a symbolic expression for the byte address 120 const Constant *PtrVal = CE->getOperand(0); 121 SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); 122 int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec); 123 124 const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); 125 if (Offset == 0) 126 return Base; 127 128 // Truncate/sext the offset to the pointer size. 129 if (TD.getPointerSizeInBits() != 64) { 130 int SExtAmount = 64-TD.getPointerSizeInBits(); 131 Offset = (Offset << SExtAmount) >> SExtAmount; 132 } 133 134 return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), 135 Ctx); 136 } 137 138 case Instruction::Trunc: 139 // We emit the value and depend on the assembler to truncate the generated 140 // expression properly. This is important for differences between 141 // blockaddress labels. Since the two labels are in the same function, it 142 // is reasonable to treat their delta as a 32-bit value. 143 // FALL THROUGH. 144 case Instruction::BitCast: 145 return LowerConstant(CE->getOperand(0), AP); 146 147 case Instruction::IntToPtr: { 148 const DataLayout &TD = *AP.TM.getDataLayout(); 149 // Handle casts to pointers by changing them into casts to the appropriate 150 // integer type. This promotes constant folding and simplifies this code. 151 Constant *Op = CE->getOperand(0); 152 Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), 153 false/*ZExt*/); 154 return LowerConstant(Op, AP); 155 } 156 157 case Instruction::PtrToInt: { 158 const DataLayout &TD = *AP.TM.getDataLayout(); 159 // Support only foldable casts to/from pointers that can be eliminated by 160 // changing the pointer to the appropriately sized integer type. 161 Constant *Op = CE->getOperand(0); 162 Type *Ty = CE->getType(); 163 164 const MCExpr *OpExpr = LowerConstant(Op, AP); 165 166 // We can emit the pointer value into this slot if the slot is an 167 // integer slot equal to the size of the pointer. 168 if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) 169 return OpExpr; 170 171 // Otherwise the pointer is smaller than the resultant integer, mask off 172 // the high bits so we are sure to get a proper truncation if the input is 173 // a constant expr. 174 unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); 175 const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); 176 return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); 177 } 178 179 // The MC library also has a right-shift operator, but it isn't consistently 180 // signed or unsigned between different targets. 181 case Instruction::Add: 182 case Instruction::Sub: 183 case Instruction::Mul: 184 case Instruction::SDiv: 185 case Instruction::SRem: 186 case Instruction::Shl: 187 case Instruction::And: 188 case Instruction::Or: 189 case Instruction::Xor: { 190 const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); 191 const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); 192 switch (CE->getOpcode()) { 193 default: llvm_unreachable("Unknown binary operator constant cast expr"); 194 case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); 195 case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); 196 case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); 197 case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); 198 case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); 199 case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); 200 case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); 201 case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx); 202 case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); 203 } 204 } 205 } 206} 207 208 209void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) 210{ 211 if (!EmitLineNumbers) 212 return; 213 if (ignoreLoc(MI)) 214 return; 215 216 DebugLoc curLoc = MI.getDebugLoc(); 217 218 if (prevDebugLoc.isUnknown() && curLoc.isUnknown()) 219 return; 220 221 if (prevDebugLoc == curLoc) 222 return; 223 224 prevDebugLoc = curLoc; 225 226 if (curLoc.isUnknown()) 227 return; 228 229 230 const MachineFunction *MF = MI.getParent()->getParent(); 231 //const TargetMachine &TM = MF->getTarget(); 232 233 const LLVMContext &ctx = MF->getFunction()->getContext(); 234 DIScope Scope(curLoc.getScope(ctx)); 235 236 if (!Scope.Verify()) 237 return; 238 239 StringRef fileName(Scope.getFilename()); 240 StringRef dirName(Scope.getDirectory()); 241 SmallString<128> FullPathName = dirName; 242 if (!dirName.empty() && !sys::path::is_absolute(fileName)) { 243 sys::path::append(FullPathName, fileName); 244 fileName = FullPathName.str(); 245 } 246 247 if (filenameMap.find(fileName.str()) == filenameMap.end()) 248 return; 249 250 251 // Emit the line from the source file. 252 if (llvm::InterleaveSrcInPtx) 253 this->emitSrcInText(fileName.str(), curLoc.getLine()); 254 255 std::stringstream temp; 256 temp << "\t.loc " << filenameMap[fileName.str()] 257 << " " << curLoc.getLine() << " " << curLoc.getCol(); 258 OutStreamer.EmitRawText(Twine(temp.str().c_str())); 259} 260 261void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { 262 SmallString<128> Str; 263 raw_svector_ostream OS(Str); 264 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 265 emitLineNumberAsDotLoc(*MI); 266 printInstruction(MI, OS); 267 OutStreamer.EmitRawText(OS.str()); 268} 269 270void NVPTXAsmPrinter::printReturnValStr(const Function *F, 271 raw_ostream &O) 272{ 273 const DataLayout *TD = TM.getDataLayout(); 274 const TargetLowering *TLI = TM.getTargetLowering(); 275 276 Type *Ty = F->getReturnType(); 277 278 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 279 280 if (Ty->getTypeID() == Type::VoidTyID) 281 return; 282 283 O << " ("; 284 285 if (isABI) { 286 if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { 287 unsigned size = 0; 288 if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { 289 size = ITy->getBitWidth(); 290 if (size < 32) size = 32; 291 } else { 292 assert(Ty->isFloatingPointTy() && 293 "Floating point type expected here"); 294 size = Ty->getPrimitiveSizeInBits(); 295 } 296 297 O << ".param .b" << size << " func_retval0"; 298 } 299 else if (isa<PointerType>(Ty)) { 300 O << ".param .b" << TLI->getPointerTy().getSizeInBits() 301 << " func_retval0"; 302 } else { 303 if ((Ty->getTypeID() == Type::StructTyID) || 304 isa<VectorType>(Ty)) { 305 SmallVector<EVT, 16> vtparts; 306 ComputeValueVTs(*TLI, Ty, vtparts); 307 unsigned totalsz = 0; 308 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { 309 unsigned elems = 1; 310 EVT elemtype = vtparts[i]; 311 if (vtparts[i].isVector()) { 312 elems = vtparts[i].getVectorNumElements(); 313 elemtype = vtparts[i].getVectorElementType(); 314 } 315 for (unsigned j=0, je=elems; j!=je; ++j) { 316 unsigned sz = elemtype.getSizeInBits(); 317 if (elemtype.isInteger() && (sz < 8)) sz = 8; 318 totalsz += sz/8; 319 } 320 } 321 unsigned retAlignment = 0; 322 if (!llvm::getAlign(*F, 0, retAlignment)) 323 retAlignment = TD->getABITypeAlignment(Ty); 324 O << ".param .align " 325 << retAlignment 326 << " .b8 func_retval0[" 327 << totalsz << "]"; 328 } else 329 assert(false && 330 "Unknown return type"); 331 } 332 } else { 333 SmallVector<EVT, 16> vtparts; 334 ComputeValueVTs(*TLI, Ty, vtparts); 335 unsigned idx = 0; 336 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { 337 unsigned elems = 1; 338 EVT elemtype = vtparts[i]; 339 if (vtparts[i].isVector()) { 340 elems = vtparts[i].getVectorNumElements(); 341 elemtype = vtparts[i].getVectorElementType(); 342 } 343 344 for (unsigned j=0, je=elems; j!=je; ++j) { 345 unsigned sz = elemtype.getSizeInBits(); 346 if (elemtype.isInteger() && (sz < 32)) sz = 32; 347 O << ".reg .b" << sz << " func_retval" << idx; 348 if (j<je-1) O << ", "; 349 ++idx; 350 } 351 if (i < e-1) 352 O << ", "; 353 } 354 } 355 O << ") "; 356 return; 357} 358 359void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 360 raw_ostream &O) { 361 const Function *F = MF.getFunction(); 362 printReturnValStr(F, O); 363} 364 365void NVPTXAsmPrinter::EmitFunctionEntryLabel() { 366 SmallString<128> Str; 367 raw_svector_ostream O(Str); 368 369 // Set up 370 MRI = &MF->getRegInfo(); 371 F = MF->getFunction(); 372 emitLinkageDirective(F,O); 373 if (llvm::isKernelFunction(*F)) 374 O << ".entry "; 375 else { 376 O << ".func "; 377 printReturnValStr(*MF, O); 378 } 379 380 O << *CurrentFnSym; 381 382 emitFunctionParamList(*MF, O); 383 384 if (llvm::isKernelFunction(*F)) 385 emitKernelFunctionDirectives(*F, O); 386 387 OutStreamer.EmitRawText(O.str()); 388 389 prevDebugLoc = DebugLoc(); 390} 391 392void NVPTXAsmPrinter::EmitFunctionBodyStart() { 393 const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); 394 unsigned numRegClasses = TRI.getNumRegClasses(); 395 VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1]; 396 OutStreamer.EmitRawText(StringRef("{\n")); 397 setAndEmitFunctionVirtualRegisters(*MF); 398 399 SmallString<128> Str; 400 raw_svector_ostream O(Str); 401 emitDemotedVars(MF->getFunction(), O); 402 OutStreamer.EmitRawText(O.str()); 403} 404 405void NVPTXAsmPrinter::EmitFunctionBodyEnd() { 406 OutStreamer.EmitRawText(StringRef("}\n")); 407 delete []VRidGlobal2LocalMap; 408} 409 410 411void 412NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F, 413 raw_ostream &O) const { 414 // If the NVVM IR has some of reqntid* specified, then output 415 // the reqntid directive, and set the unspecified ones to 1. 416 // If none of reqntid* is specified, don't output reqntid directive. 417 unsigned reqntidx, reqntidy, reqntidz; 418 bool specified = false; 419 if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1; 420 else specified = true; 421 if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1; 422 else specified = true; 423 if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1; 424 else specified = true; 425 426 if (specified) 427 O << ".reqntid " << reqntidx << ", " 428 << reqntidy << ", " << reqntidz << "\n"; 429 430 // If the NVVM IR has some of maxntid* specified, then output 431 // the maxntid directive, and set the unspecified ones to 1. 432 // If none of maxntid* is specified, don't output maxntid directive. 433 unsigned maxntidx, maxntidy, maxntidz; 434 specified = false; 435 if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1; 436 else specified = true; 437 if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1; 438 else specified = true; 439 if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1; 440 else specified = true; 441 442 if (specified) 443 O << ".maxntid " << maxntidx << ", " 444 << maxntidy << ", " << maxntidz << "\n"; 445 446 unsigned mincta; 447 if (llvm::getMinCTASm(F, mincta)) 448 O << ".minnctapersm " << mincta << "\n"; 449} 450 451void 452NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, 453 raw_ostream &O) { 454 const TargetRegisterClass * RC = MRI->getRegClass(vr); 455 unsigned id = RC->getID(); 456 457 std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[id]; 458 unsigned mapped_vr = regmap[vr]; 459 460 if (!isVec) { 461 O << getNVPTXRegClassStr(RC) << mapped_vr; 462 return; 463 } 464 // Vector virtual register 465 if (getNVPTXVectorSize(RC) == 4) 466 O << "{" 467 << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " 468 << getNVPTXRegClassStr(RC) << mapped_vr << "_1, " 469 << getNVPTXRegClassStr(RC) << mapped_vr << "_2, " 470 << getNVPTXRegClassStr(RC) << mapped_vr << "_3" 471 << "}"; 472 else if (getNVPTXVectorSize(RC) == 2) 473 O << "{" 474 << getNVPTXRegClassStr(RC) << mapped_vr << "_0, " 475 << getNVPTXRegClassStr(RC) << mapped_vr << "_1" 476 << "}"; 477 else 478 llvm_unreachable("Unsupported vector size"); 479} 480 481void 482NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, 483 raw_ostream &O) { 484 getVirtualRegisterName(vr, isVec, O); 485} 486 487void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO, 488 const char *Modifier, 489 raw_ostream &O) { 490 static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'}; 491 int Imm = (int)MO.getImm(); 492 if(0 == strcmp(Modifier, "vecelem")) 493 O << "_" << vecelem[Imm]; 494 else if(0 == strcmp(Modifier, "vecv4comm1")) { 495 if((Imm < 0) || (Imm > 3)) 496 O << "//"; 497 } 498 else if(0 == strcmp(Modifier, "vecv4comm2")) { 499 if((Imm < 4) || (Imm > 7)) 500 O << "//"; 501 } 502 else if(0 == strcmp(Modifier, "vecv4pos")) { 503 if(Imm < 0) Imm = 0; 504 O << "_" << vecelem[Imm%4]; 505 } 506 else if(0 == strcmp(Modifier, "vecv2comm1")) { 507 if((Imm < 0) || (Imm > 1)) 508 O << "//"; 509 } 510 else if(0 == strcmp(Modifier, "vecv2comm2")) { 511 if((Imm < 2) || (Imm > 3)) 512 O << "//"; 513 } 514 else if(0 == strcmp(Modifier, "vecv2pos")) { 515 if(Imm < 0) Imm = 0; 516 O << "_" << vecelem[Imm%2]; 517 } 518 else 519 llvm_unreachable("Unknown Modifier on immediate operand"); 520} 521 522void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 523 raw_ostream &O, const char *Modifier) { 524 const MachineOperand &MO = MI->getOperand(opNum); 525 switch (MO.getType()) { 526 case MachineOperand::MO_Register: 527 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 528 if (MO.getReg() == NVPTX::VRDepot) 529 O << DEPOTNAME << getFunctionNumber(); 530 else 531 O << getRegisterName(MO.getReg()); 532 } else { 533 if (!Modifier) 534 emitVirtualRegister(MO.getReg(), false, O); 535 else { 536 if (strcmp(Modifier, "vecfull") == 0) 537 emitVirtualRegister(MO.getReg(), true, O); 538 else 539 llvm_unreachable( 540 "Don't know how to handle the modifier on virtual register."); 541 } 542 } 543 return; 544 545 case MachineOperand::MO_Immediate: 546 if (!Modifier) 547 O << MO.getImm(); 548 else if (strstr(Modifier, "vec") == Modifier) 549 printVecModifiedImmediate(MO, Modifier, O); 550 else 551 llvm_unreachable("Don't know how to handle modifier on immediate operand"); 552 return; 553 554 case MachineOperand::MO_FPImmediate: 555 printFPConstant(MO.getFPImm(), O); 556 break; 557 558 case MachineOperand::MO_GlobalAddress: 559 O << *Mang->getSymbol(MO.getGlobal()); 560 break; 561 562 case MachineOperand::MO_ExternalSymbol: { 563 const char * symbname = MO.getSymbolName(); 564 if (strstr(symbname, ".PARAM") == symbname) { 565 unsigned index; 566 sscanf(symbname+6, "%u[];", &index); 567 printParamName(index, O); 568 } 569 else if (strstr(symbname, ".HLPPARAM") == symbname) { 570 unsigned index; 571 sscanf(symbname+9, "%u[];", &index); 572 O << *CurrentFnSym << "_param_" << index << "_offset"; 573 } 574 else 575 O << symbname; 576 break; 577 } 578 579 case MachineOperand::MO_MachineBasicBlock: 580 O << *MO.getMBB()->getSymbol(); 581 return; 582 583 default: 584 llvm_unreachable("Operand type not supported."); 585 } 586} 587 588void NVPTXAsmPrinter:: 589printImplicitDef(const MachineInstr *MI, raw_ostream &O) const { 590#ifndef __OPTIMIZE__ 591 O << "\t// Implicit def :"; 592 //printOperand(MI, 0); 593 O << "\n"; 594#endif 595} 596 597void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 598 raw_ostream &O, const char *Modifier) { 599 printOperand(MI, opNum, O); 600 601 if (Modifier && !strcmp(Modifier, "add")) { 602 O << ", "; 603 printOperand(MI, opNum+1, O); 604 } else { 605 if (MI->getOperand(opNum+1).isImm() && 606 MI->getOperand(opNum+1).getImm() == 0) 607 return; // don't print ',0' or '+0' 608 O << "+"; 609 printOperand(MI, opNum+1, O); 610 } 611} 612 613void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, 614 raw_ostream &O, const char *Modifier) 615{ 616 if (Modifier) { 617 const MachineOperand &MO = MI->getOperand(opNum); 618 int Imm = (int)MO.getImm(); 619 if (!strcmp(Modifier, "volatile")) { 620 if (Imm) 621 O << ".volatile"; 622 } else if (!strcmp(Modifier, "addsp")) { 623 switch (Imm) { 624 case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break; 625 case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break; 626 case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break; 627 case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break; 628 case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break; 629 case NVPTX::PTXLdStInstCode::GENERIC: 630 if (!nvptxSubtarget.hasGenericLdSt()) 631 O << ".global"; 632 break; 633 default: 634 llvm_unreachable("Wrong Address Space"); 635 } 636 } 637 else if (!strcmp(Modifier, "sign")) { 638 if (Imm==NVPTX::PTXLdStInstCode::Signed) 639 O << "s"; 640 else if (Imm==NVPTX::PTXLdStInstCode::Unsigned) 641 O << "u"; 642 else 643 O << "f"; 644 } 645 else if (!strcmp(Modifier, "vec")) { 646 if (Imm==NVPTX::PTXLdStInstCode::V2) 647 O << ".v2"; 648 else if (Imm==NVPTX::PTXLdStInstCode::V4) 649 O << ".v4"; 650 } 651 else 652 llvm_unreachable("Unknown Modifier"); 653 } 654 else 655 llvm_unreachable("Empty Modifier"); 656} 657 658void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) { 659 660 emitLinkageDirective(F,O); 661 if (llvm::isKernelFunction(*F)) 662 O << ".entry "; 663 else 664 O << ".func "; 665 printReturnValStr(F, O); 666 O << *CurrentFnSym << "\n"; 667 emitFunctionParamList(F, O); 668 O << ";\n"; 669} 670 671static bool usedInGlobalVarDef(const Constant *C) 672{ 673 if (!C) 674 return false; 675 676 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 677 if (GV->getName().str() == "llvm.used") 678 return false; 679 return true; 680 } 681 682 for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); 683 ui!=ue; ++ui) { 684 const Constant *C = dyn_cast<Constant>(*ui); 685 if (usedInGlobalVarDef(C)) 686 return true; 687 } 688 return false; 689} 690 691static bool usedInOneFunc(const User *U, Function const *&oneFunc) 692{ 693 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 694 if (othergv->getName().str() == "llvm.used") 695 return true; 696 } 697 698 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 699 if (instr->getParent() && instr->getParent()->getParent()) { 700 const Function *curFunc = instr->getParent()->getParent(); 701 if (oneFunc && (curFunc != oneFunc)) 702 return false; 703 oneFunc = curFunc; 704 return true; 705 } 706 else 707 return false; 708 } 709 710 if (const MDNode *md = dyn_cast<MDNode>(U)) 711 if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || 712 (md->getName().str() == "llvm.dbg.sp"))) 713 return true; 714 715 716 for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end(); 717 ui!=ue; ++ui) { 718 if (usedInOneFunc(*ui, oneFunc) == false) 719 return false; 720 } 721 return true; 722} 723 724/* Find out if a global variable can be demoted to local scope. 725 * Currently, this is valid for CUDA shared variables, which have local 726 * scope and global lifetime. So the conditions to check are : 727 * 1. Is the global variable in shared address space? 728 * 2. Does it have internal linkage? 729 * 3. Is the global variable referenced only in one function? 730 */ 731static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 732 if (gv->hasInternalLinkage() == false) 733 return false; 734 const PointerType *Pty = gv->getType(); 735 if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) 736 return false; 737 738 const Function *oneFunc = 0; 739 740 bool flag = usedInOneFunc(gv, oneFunc); 741 if (flag == false) 742 return false; 743 if (!oneFunc) 744 return false; 745 f = oneFunc; 746 return true; 747} 748 749static bool useFuncSeen(const Constant *C, 750 llvm::DenseMap<const Function *, bool> &seenMap) { 751 for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); 752 ui!=ue; ++ui) { 753 if (const Constant *cu = dyn_cast<Constant>(*ui)) { 754 if (useFuncSeen(cu, seenMap)) 755 return true; 756 } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { 757 const BasicBlock *bb = I->getParent(); 758 if (!bb) continue; 759 const Function *caller = bb->getParent(); 760 if (!caller) continue; 761 if (seenMap.find(caller) != seenMap.end()) 762 return true; 763 } 764 } 765 return false; 766} 767 768void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { 769 llvm::DenseMap<const Function *, bool> seenMap; 770 for (Module::const_iterator FI=M.begin(), FE=M.end(); 771 FI!=FE; ++FI) { 772 const Function *F = FI; 773 774 if (F->isDeclaration()) { 775 if (F->use_empty()) 776 continue; 777 if (F->getIntrinsicID()) 778 continue; 779 CurrentFnSym = Mang->getSymbol(F); 780 emitDeclaration(F, O); 781 continue; 782 } 783 for (Value::const_use_iterator iter=F->use_begin(), 784 iterEnd=F->use_end(); iter!=iterEnd; ++iter) { 785 if (const Constant *C = dyn_cast<Constant>(*iter)) { 786 if (usedInGlobalVarDef(C)) { 787 // The use is in the initialization of a global variable 788 // that is a function pointer, so print a declaration 789 // for the original function 790 CurrentFnSym = Mang->getSymbol(F); 791 emitDeclaration(F, O); 792 break; 793 } 794 // Emit a declaration of this function if the function that 795 // uses this constant expr has already been seen. 796 if (useFuncSeen(C, seenMap)) { 797 CurrentFnSym = Mang->getSymbol(F); 798 emitDeclaration(F, O); 799 break; 800 } 801 } 802 803 if (!isa<Instruction>(*iter)) continue; 804 const Instruction *instr = cast<Instruction>(*iter); 805 const BasicBlock *bb = instr->getParent(); 806 if (!bb) continue; 807 const Function *caller = bb->getParent(); 808 if (!caller) continue; 809 810 // If a caller has already been seen, then the caller is 811 // appearing in the module before the callee. so print out 812 // a declaration for the callee. 813 if (seenMap.find(caller) != seenMap.end()) { 814 CurrentFnSym = Mang->getSymbol(F); 815 emitDeclaration(F, O); 816 break; 817 } 818 } 819 seenMap[F] = true; 820 } 821} 822 823void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { 824 DebugInfoFinder DbgFinder; 825 DbgFinder.processModule(M); 826 827 unsigned i=1; 828 for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), 829 E = DbgFinder.compile_unit_end(); I != E; ++I) { 830 DICompileUnit DIUnit(*I); 831 StringRef Filename(DIUnit.getFilename()); 832 StringRef Dirname(DIUnit.getDirectory()); 833 SmallString<128> FullPathName = Dirname; 834 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 835 sys::path::append(FullPathName, Filename); 836 Filename = FullPathName.str(); 837 } 838 if (filenameMap.find(Filename.str()) != filenameMap.end()) 839 continue; 840 filenameMap[Filename.str()] = i; 841 OutStreamer.EmitDwarfFileDirective(i, "", Filename.str()); 842 ++i; 843 } 844 845 for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), 846 E = DbgFinder.subprogram_end(); I != E; ++I) { 847 DISubprogram SP(*I); 848 StringRef Filename(SP.getFilename()); 849 StringRef Dirname(SP.getDirectory()); 850 SmallString<128> FullPathName = Dirname; 851 if (!Dirname.empty() && !sys::path::is_absolute(Filename)) { 852 sys::path::append(FullPathName, Filename); 853 Filename = FullPathName.str(); 854 } 855 if (filenameMap.find(Filename.str()) != filenameMap.end()) 856 continue; 857 filenameMap[Filename.str()] = i; 858 ++i; 859 } 860} 861 862bool NVPTXAsmPrinter::doInitialization (Module &M) { 863 864 SmallString<128> Str1; 865 raw_svector_ostream OS1(Str1); 866 867 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 868 MMI->AnalyzeModule(M); 869 870 // We need to call the parent's one explicitly. 871 //bool Result = AsmPrinter::doInitialization(M); 872 873 // Initialize TargetLoweringObjectFile. 874 const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) 875 .Initialize(OutContext, TM); 876 877 Mang = new Mangler(OutContext, *TM.getDataLayout()); 878 879 // Emit header before any dwarf directives are emitted below. 880 emitHeader(M, OS1); 881 OutStreamer.EmitRawText(OS1.str()); 882 883 884 // Already commented out 885 //bool Result = AsmPrinter::doInitialization(M); 886 887 888 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) 889 recordAndEmitFilenames(M); 890 891 SmallString<128> Str2; 892 raw_svector_ostream OS2(Str2); 893 894 emitDeclarations(M, OS2); 895 896 // Print out module-level global variables here. 897 for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 898 I != E; ++I) 899 printModuleLevelGV(I, OS2); 900 901 OS2 << '\n'; 902 903 OutStreamer.EmitRawText(OS2.str()); 904 return false; // success 905} 906 907void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) { 908 O << "//\n"; 909 O << "// Generated by LLVM NVPTX Back-End\n"; 910 O << "//\n"; 911 O << "\n"; 912 913 unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); 914 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 915 916 O << ".target "; 917 O << nvptxSubtarget.getTargetName(); 918 919 if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) 920 O << ", texmode_independent"; 921 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 922 if (!nvptxSubtarget.hasDouble()) 923 O << ", map_f64_to_f32"; 924 } 925 926 if (MAI->doesSupportDebugInformation()) 927 O << ", debug"; 928 929 O << "\n"; 930 931 O << ".address_size "; 932 if (nvptxSubtarget.is64Bit()) 933 O << "64"; 934 else 935 O << "32"; 936 O << "\n"; 937 938 O << "\n"; 939} 940 941bool NVPTXAsmPrinter::doFinalization(Module &M) { 942 // XXX Temproarily remove global variables so that doFinalization() will not 943 // emit them again (global variables are emitted at beginning). 944 945 Module::GlobalListType &global_list = M.getGlobalList(); 946 int i, n = global_list.size(); 947 GlobalVariable **gv_array = new GlobalVariable* [n]; 948 949 // first, back-up GlobalVariable in gv_array 950 i = 0; 951 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 952 I != E; ++I) 953 gv_array[i++] = &*I; 954 955 // second, empty global_list 956 while (!global_list.empty()) 957 global_list.remove(global_list.begin()); 958 959 // call doFinalization 960 bool ret = AsmPrinter::doFinalization(M); 961 962 // now we restore global variables 963 for (i = 0; i < n; i ++) 964 global_list.insert(global_list.end(), gv_array[i]); 965 966 delete[] gv_array; 967 return ret; 968 969 970 //bool Result = AsmPrinter::doFinalization(M); 971 // Instead of calling the parents doFinalization, we may 972 // clone parents doFinalization and customize here. 973 // Currently, we if NVISA out the EmitGlobals() in 974 // parent's doFinalization, which is too intrusive. 975 // 976 // Same for the doInitialization. 977 //return Result; 978} 979 980// This function emits appropriate linkage directives for 981// functions and global variables. 982// 983// extern function declaration -> .extern 984// extern function definition -> .visible 985// external global variable with init -> .visible 986// external without init -> .extern 987// appending -> not allowed, assert. 988 989void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O) 990{ 991 if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { 992 if (V->hasExternalLinkage()) { 993 if (isa<GlobalVariable>(V)) { 994 const GlobalVariable *GVar = cast<GlobalVariable>(V); 995 if (GVar) { 996 if (GVar->hasInitializer()) 997 O << ".visible "; 998 else 999 O << ".extern "; 1000 } 1001 } else if (V->isDeclaration()) 1002 O << ".extern "; 1003 else 1004 O << ".visible "; 1005 } else if (V->hasAppendingLinkage()) { 1006 std::string msg; 1007 msg.append("Error: "); 1008 msg.append("Symbol "); 1009 if (V->hasName()) 1010 msg.append(V->getName().str()); 1011 msg.append("has unsupported appending linkage type"); 1012 llvm_unreachable(msg.c_str()); 1013 } 1014 } 1015} 1016 1017 1018void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, 1019 bool processDemoted) { 1020 1021 // Skip meta data 1022 if (GVar->hasSection()) { 1023 if (GVar->getSection() == "llvm.metadata") 1024 return; 1025 } 1026 1027 const DataLayout *TD = TM.getDataLayout(); 1028 1029 // GlobalVariables are always constant pointers themselves. 1030 const PointerType *PTy = GVar->getType(); 1031 Type *ETy = PTy->getElementType(); 1032 1033 if (GVar->hasExternalLinkage()) { 1034 if (GVar->hasInitializer()) 1035 O << ".visible "; 1036 else 1037 O << ".extern "; 1038 } 1039 1040 if (llvm::isTexture(*GVar)) { 1041 O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n"; 1042 return; 1043 } 1044 1045 if (llvm::isSurface(*GVar)) { 1046 O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n"; 1047 return; 1048 } 1049 1050 if (GVar->isDeclaration()) { 1051 // (extern) declarations, no definition or initializer 1052 // Currently the only known declaration is for an automatic __local 1053 // (.shared) promoted to global. 1054 emitPTXGlobalVariable(GVar, O); 1055 O << ";\n"; 1056 return; 1057 } 1058 1059 if (llvm::isSampler(*GVar)) { 1060 O << ".global .samplerref " << llvm::getSamplerName(*GVar); 1061 1062 Constant *Initializer = NULL; 1063 if (GVar->hasInitializer()) 1064 Initializer = GVar->getInitializer(); 1065 ConstantInt *CI = NULL; 1066 if (Initializer) 1067 CI = dyn_cast<ConstantInt>(Initializer); 1068 if (CI) { 1069 unsigned sample=CI->getZExtValue(); 1070 1071 O << " = { "; 1072 1073 for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >> 1074 __CLK_ADDRESS_BASE) ; i < 3 ; i++) { 1075 O << "addr_mode_" << i << " = "; 1076 switch (addr) { 1077 case 0: O << "wrap"; break; 1078 case 1: O << "clamp_to_border"; break; 1079 case 2: O << "clamp_to_edge"; break; 1080 case 3: O << "wrap"; break; 1081 case 4: O << "mirror"; break; 1082 } 1083 O <<", "; 1084 } 1085 O << "filter_mode = "; 1086 switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) { 1087 case 0: O << "nearest"; break; 1088 case 1: O << "linear"; break; 1089 case 2: assert ( 0 && "Anisotropic filtering is not supported"); 1090 default: O << "nearest"; break; 1091 } 1092 if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) { 1093 O << ", force_unnormalized_coords = 1"; 1094 } 1095 O << " }"; 1096 } 1097 1098 O << ";\n"; 1099 return; 1100 } 1101 1102 if (GVar->hasPrivateLinkage()) { 1103 1104 if (!strncmp(GVar->getName().data(), "unrollpragma", 12)) 1105 return; 1106 1107 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1108 if (!strncmp(GVar->getName().data(), "filename", 8)) 1109 return; 1110 if (GVar->use_empty()) 1111 return; 1112 } 1113 1114 const Function *demotedFunc = 0; 1115 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1116 O << "// " << GVar->getName().str() << " has been demoted\n"; 1117 if (localDecls.find(demotedFunc) != localDecls.end()) 1118 localDecls[demotedFunc].push_back(GVar); 1119 else { 1120 std::vector<GlobalVariable *> temp; 1121 temp.push_back(GVar); 1122 localDecls[demotedFunc] = temp; 1123 } 1124 return; 1125 } 1126 1127 O << "."; 1128 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1129 if (GVar->getAlignment() == 0) 1130 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1131 else 1132 O << " .align " << GVar->getAlignment(); 1133 1134 1135 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1136 O << " ."; 1137 O << getPTXFundamentalTypeStr(ETy, false); 1138 O << " "; 1139 O << *Mang->getSymbol(GVar); 1140 1141 // Ptx allows variable initilization only for constant and global state 1142 // spaces. 1143 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1144 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || 1145 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) 1146 && GVar->hasInitializer()) { 1147 Constant *Initializer = GVar->getInitializer(); 1148 if (!Initializer->isNullValue()) { 1149 O << " = " ; 1150 printScalarConstant(Initializer, O); 1151 } 1152 } 1153 } else { 1154 unsigned int ElementSize =0; 1155 1156 // Although PTX has direct support for struct type and array type and 1157 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1158 // targets that support these high level field accesses. Structs, arrays 1159 // and vectors are lowered into arrays of bytes. 1160 switch (ETy->getTypeID()) { 1161 case Type::StructTyID: 1162 case Type::ArrayTyID: 1163 case Type::VectorTyID: 1164 ElementSize = TD->getTypeStoreSize(ETy); 1165 // Ptx allows variable initilization only for constant and 1166 // global state spaces. 1167 if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || 1168 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || 1169 (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) 1170 && GVar->hasInitializer()) { 1171 Constant *Initializer = GVar->getInitializer(); 1172 if (!isa<UndefValue>(Initializer) && 1173 !Initializer->isNullValue()) { 1174 AggBuffer aggBuffer(ElementSize, O, *this); 1175 bufferAggregateConstant(Initializer, &aggBuffer); 1176 if (aggBuffer.numSymbols) { 1177 if (nvptxSubtarget.is64Bit()) { 1178 O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ; 1179 O << ElementSize/8; 1180 } 1181 else { 1182 O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ; 1183 O << ElementSize/4; 1184 } 1185 O << "]"; 1186 } 1187 else { 1188 O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; 1189 O << ElementSize; 1190 O << "]"; 1191 } 1192 O << " = {" ; 1193 aggBuffer.print(); 1194 O << "}"; 1195 } 1196 else { 1197 O << " .b8 " << *Mang->getSymbol(GVar) ; 1198 if (ElementSize) { 1199 O <<"[" ; 1200 O << ElementSize; 1201 O << "]"; 1202 } 1203 } 1204 } 1205 else { 1206 O << " .b8 " << *Mang->getSymbol(GVar); 1207 if (ElementSize) { 1208 O <<"[" ; 1209 O << ElementSize; 1210 O << "]"; 1211 } 1212 } 1213 break; 1214 default: 1215 assert( 0 && "type not supported yet"); 1216 } 1217 1218 } 1219 O << ";\n"; 1220} 1221 1222void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1223 if (localDecls.find(f) == localDecls.end()) 1224 return; 1225 1226 std::vector<GlobalVariable *> &gvars = localDecls[f]; 1227 1228 for (unsigned i=0, e=gvars.size(); i!=e; ++i) { 1229 O << "\t// demoted variable\n\t"; 1230 printModuleLevelGV(gvars[i], O, true); 1231 } 1232} 1233 1234void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1235 raw_ostream &O) const { 1236 switch (AddressSpace) { 1237 case llvm::ADDRESS_SPACE_LOCAL: 1238 O << "local" ; 1239 break; 1240 case llvm::ADDRESS_SPACE_GLOBAL: 1241 O << "global" ; 1242 break; 1243 case llvm::ADDRESS_SPACE_CONST: 1244 // This logic should be consistent with that in 1245 // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp) 1246 if (nvptxSubtarget.hasGenericLdSt()) 1247 O << "global" ; 1248 else 1249 O << "const" ; 1250 break; 1251 case llvm::ADDRESS_SPACE_CONST_NOT_GEN: 1252 O << "const" ; 1253 break; 1254 case llvm::ADDRESS_SPACE_SHARED: 1255 O << "shared" ; 1256 break; 1257 default: 1258 llvm_unreachable("unexpected address space"); 1259 } 1260} 1261 1262std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, 1263 bool useB4PTR) const { 1264 switch (Ty->getTypeID()) { 1265 default: 1266 llvm_unreachable("unexpected type"); 1267 break; 1268 case Type::IntegerTyID: { 1269 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1270 if (NumBits == 1) 1271 return "pred"; 1272 else if (NumBits <= 64) { 1273 std::string name = "u"; 1274 return name + utostr(NumBits); 1275 } else { 1276 llvm_unreachable("Integer too large"); 1277 break; 1278 } 1279 break; 1280 } 1281 case Type::FloatTyID: 1282 return "f32"; 1283 case Type::DoubleTyID: 1284 return "f64"; 1285 case Type::PointerTyID: 1286 if (nvptxSubtarget.is64Bit()) 1287 if (useB4PTR) return "b64"; 1288 else return "u64"; 1289 else 1290 if (useB4PTR) return "b32"; 1291 else return "u32"; 1292 } 1293 llvm_unreachable("unexpected type"); 1294 return NULL; 1295} 1296 1297void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, 1298 raw_ostream &O) { 1299 1300 const DataLayout *TD = TM.getDataLayout(); 1301 1302 // GlobalVariables are always constant pointers themselves. 1303 const PointerType *PTy = GVar->getType(); 1304 Type *ETy = PTy->getElementType(); 1305 1306 O << "."; 1307 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1308 if (GVar->getAlignment() == 0) 1309 O << " .align " << (int) TD->getPrefTypeAlignment(ETy); 1310 else 1311 O << " .align " << GVar->getAlignment(); 1312 1313 if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { 1314 O << " ."; 1315 O << getPTXFundamentalTypeStr(ETy); 1316 O << " "; 1317 O << *Mang->getSymbol(GVar); 1318 return; 1319 } 1320 1321 int64_t ElementSize =0; 1322 1323 // Although PTX has direct support for struct type and array type and LLVM IR 1324 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1325 // support these high level field accesses. Structs and arrays are lowered 1326 // into arrays of bytes. 1327 switch (ETy->getTypeID()) { 1328 case Type::StructTyID: 1329 case Type::ArrayTyID: 1330 case Type::VectorTyID: 1331 ElementSize = TD->getTypeStoreSize(ETy); 1332 O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; 1333 if (ElementSize) { 1334 O << itostr(ElementSize) ; 1335 } 1336 O << "]"; 1337 break; 1338 default: 1339 assert( 0 && "type not supported yet"); 1340 } 1341 return ; 1342} 1343 1344 1345static unsigned int 1346getOpenCLAlignment(const DataLayout *TD, 1347 Type *Ty) { 1348 if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) 1349 return TD->getPrefTypeAlignment(Ty); 1350 1351 const ArrayType *ATy = dyn_cast<ArrayType>(Ty); 1352 if (ATy) 1353 return getOpenCLAlignment(TD, ATy->getElementType()); 1354 1355 const VectorType *VTy = dyn_cast<VectorType>(Ty); 1356 if (VTy) { 1357 Type *ETy = VTy->getElementType(); 1358 unsigned int numE = VTy->getNumElements(); 1359 unsigned int alignE = TD->getPrefTypeAlignment(ETy); 1360 if (numE == 3) 1361 return 4*alignE; 1362 else 1363 return numE*alignE; 1364 } 1365 1366 const StructType *STy = dyn_cast<StructType>(Ty); 1367 if (STy) { 1368 unsigned int alignStruct = 1; 1369 // Go through each element of the struct and find the 1370 // largest alignment. 1371 for (unsigned i=0, e=STy->getNumElements(); i != e; i++) { 1372 Type *ETy = STy->getElementType(i); 1373 unsigned int align = getOpenCLAlignment(TD, ETy); 1374 if (align > alignStruct) 1375 alignStruct = align; 1376 } 1377 return alignStruct; 1378 } 1379 1380 const FunctionType *FTy = dyn_cast<FunctionType>(Ty); 1381 if (FTy) 1382 return TD->getPointerPrefAlignment(); 1383 return TD->getPrefTypeAlignment(Ty); 1384} 1385 1386void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1387 int paramIndex, raw_ostream &O) { 1388 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1389 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) 1390 O << *CurrentFnSym << "_param_" << paramIndex; 1391 else { 1392 std::string argName = I->getName(); 1393 const char *p = argName.c_str(); 1394 while (*p) { 1395 if (*p == '.') 1396 O << "_"; 1397 else 1398 O << *p; 1399 p++; 1400 } 1401 } 1402} 1403 1404void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { 1405 Function::const_arg_iterator I, E; 1406 int i = 0; 1407 1408 if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || 1409 (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { 1410 O << *CurrentFnSym << "_param_" << paramIndex; 1411 return; 1412 } 1413 1414 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { 1415 if (i==paramIndex) { 1416 printParamName(I, paramIndex, O); 1417 return; 1418 } 1419 } 1420 llvm_unreachable("paramIndex out of bound"); 1421} 1422 1423void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, 1424 raw_ostream &O) { 1425 const DataLayout *TD = TM.getDataLayout(); 1426 const AttrListPtr &PAL = F->getAttributes(); 1427 const TargetLowering *TLI = TM.getTargetLowering(); 1428 Function::const_arg_iterator I, E; 1429 unsigned paramIndex = 0; 1430 bool first = true; 1431 bool isKernelFunc = llvm::isKernelFunction(*F); 1432 bool isABI = (nvptxSubtarget.getSmVersion() >= 20); 1433 MVT thePointerTy = TLI->getPointerTy(); 1434 1435 O << "(\n"; 1436 1437 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1438 const Type *Ty = I->getType(); 1439 1440 if (!first) 1441 O << ",\n"; 1442 1443 first = false; 1444 1445 // Handle image/sampler parameters 1446 if (llvm::isSampler(*I) || llvm::isImage(*I)) { 1447 if (llvm::isImage(*I)) { 1448 std::string sname = I->getName(); 1449 if (llvm::isImageWriteOnly(*I)) 1450 O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex; 1451 else // Default image is read_only 1452 O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex; 1453 } 1454 else // Should be llvm::isSampler(*I) 1455 O << "\t.param .samplerref " << *CurrentFnSym << "_param_" 1456 << paramIndex; 1457 continue; 1458 } 1459 1460 if (PAL.getParamAttributes(paramIndex+1). 1461 hasAttribute(Attributes::ByVal) == false) { 1462 // Just a scalar 1463 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1464 if (isKernelFunc) { 1465 if (PTy) { 1466 // Special handling for pointer arguments to kernel 1467 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1468 1469 if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { 1470 Type *ETy = PTy->getElementType(); 1471 int addrSpace = PTy->getAddressSpace(); 1472 switch(addrSpace) { 1473 default: 1474 O << ".ptr "; 1475 break; 1476 case llvm::ADDRESS_SPACE_CONST_NOT_GEN: 1477 O << ".ptr .const "; 1478 break; 1479 case llvm::ADDRESS_SPACE_SHARED: 1480 O << ".ptr .shared "; 1481 break; 1482 case llvm::ADDRESS_SPACE_GLOBAL: 1483 case llvm::ADDRESS_SPACE_CONST: 1484 O << ".ptr .global "; 1485 break; 1486 } 1487 O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " "; 1488 } 1489 printParamName(I, paramIndex, O); 1490 continue; 1491 } 1492 1493 // non-pointer scalar to kernel func 1494 O << "\t.param ." 1495 << getPTXFundamentalTypeStr(Ty) << " "; 1496 printParamName(I, paramIndex, O); 1497 continue; 1498 } 1499 // Non-kernel function, just print .param .b<size> for ABI 1500 // and .reg .b<size> for non ABY 1501 unsigned sz = 0; 1502 if (isa<IntegerType>(Ty)) { 1503 sz = cast<IntegerType>(Ty)->getBitWidth(); 1504 if (sz < 32) sz = 32; 1505 } 1506 else if (isa<PointerType>(Ty)) 1507 sz = thePointerTy.getSizeInBits(); 1508 else 1509 sz = Ty->getPrimitiveSizeInBits(); 1510 if (isABI) 1511 O << "\t.param .b" << sz << " "; 1512 else 1513 O << "\t.reg .b" << sz << " "; 1514 printParamName(I, paramIndex, O); 1515 continue; 1516 } 1517 1518 // param has byVal attribute. So should be a pointer 1519 const PointerType *PTy = dyn_cast<PointerType>(Ty); 1520 assert(PTy && 1521 "Param with byval attribute should be a pointer type"); 1522 Type *ETy = PTy->getElementType(); 1523 1524 if (isABI || isKernelFunc) { 1525 // Just print .param .b8 .align <a> .param[size]; 1526 // <a> = PAL.getparamalignment 1527 // size = typeallocsize of element type 1528 unsigned align = PAL.getParamAlignment(paramIndex+1); 1529 if (align == 0) 1530 align = TD->getABITypeAlignment(ETy); 1531 1532 unsigned sz = TD->getTypeAllocSize(ETy); 1533 O << "\t.param .align " << align 1534 << " .b8 "; 1535 printParamName(I, paramIndex, O); 1536 O << "[" << sz << "]"; 1537 continue; 1538 } else { 1539 // Split the ETy into constituent parts and 1540 // print .param .b<size> <name> for each part. 1541 // Further, if a part is vector, print the above for 1542 // each vector element. 1543 SmallVector<EVT, 16> vtparts; 1544 ComputeValueVTs(*TLI, ETy, vtparts); 1545 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { 1546 unsigned elems = 1; 1547 EVT elemtype = vtparts[i]; 1548 if (vtparts[i].isVector()) { 1549 elems = vtparts[i].getVectorNumElements(); 1550 elemtype = vtparts[i].getVectorElementType(); 1551 } 1552 1553 for (unsigned j=0,je=elems; j!=je; ++j) { 1554 unsigned sz = elemtype.getSizeInBits(); 1555 if (elemtype.isInteger() && (sz < 32)) sz = 32; 1556 O << "\t.reg .b" << sz << " "; 1557 printParamName(I, paramIndex, O); 1558 if (j<je-1) O << ",\n"; 1559 ++paramIndex; 1560 } 1561 if (i<e-1) 1562 O << ",\n"; 1563 } 1564 --paramIndex; 1565 continue; 1566 } 1567 } 1568 1569 O << "\n)\n"; 1570} 1571 1572void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1573 raw_ostream &O) { 1574 const Function *F = MF.getFunction(); 1575 emitFunctionParamList(F, O); 1576} 1577 1578 1579void NVPTXAsmPrinter:: 1580setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { 1581 SmallString<128> Str; 1582 raw_svector_ostream O(Str); 1583 1584 // Map the global virtual register number to a register class specific 1585 // virtual register number starting from 1 with that class. 1586 const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); 1587 //unsigned numRegClasses = TRI->getNumRegClasses(); 1588 1589 // Emit the Fake Stack Object 1590 const MachineFrameInfo *MFI = MF.getFrameInfo(); 1591 int NumBytes = (int) MFI->getStackSize(); 1592 if (NumBytes) { 1593 O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" 1594 << DEPOTNAME 1595 << getFunctionNumber() << "[" << NumBytes << "];\n"; 1596 if (nvptxSubtarget.is64Bit()) { 1597 O << "\t.reg .b64 \t%SP;\n"; 1598 O << "\t.reg .b64 \t%SPL;\n"; 1599 } 1600 else { 1601 O << "\t.reg .b32 \t%SP;\n"; 1602 O << "\t.reg .b32 \t%SPL;\n"; 1603 } 1604 } 1605 1606 // Go through all virtual registers to establish the mapping between the 1607 // global virtual 1608 // register number and the per class virtual register number. 1609 // We use the per class virtual register number in the ptx output. 1610 unsigned int numVRs = MRI->getNumVirtRegs(); 1611 for (unsigned i=0; i< numVRs; i++) { 1612 unsigned int vr = TRI->index2VirtReg(i); 1613 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1614 std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[RC->getID()]; 1615 int n = regmap.size(); 1616 regmap.insert(std::make_pair(vr, n+1)); 1617 } 1618 1619 // Emit register declarations 1620 // @TODO: Extract out the real register usage 1621 O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1622 O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1623 O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1624 O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1625 O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n"; 1626 O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1627 O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n"; 1628 1629 // Emit declaration of the virtual registers or 'physical' registers for 1630 // each register class 1631 //for (unsigned i=0; i< numRegClasses; i++) { 1632 // std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[i]; 1633 // const TargetRegisterClass *RC = TRI->getRegClass(i); 1634 // std::string rcname = getNVPTXRegClassName(RC); 1635 // std::string rcStr = getNVPTXRegClassStr(RC); 1636 // //int n = regmap.size(); 1637 // if (!isNVPTXVectorRegClass(RC)) { 1638 // O << "\t.reg " << rcname << " \t" << rcStr << "<" 1639 // << NVPTXNumRegisters << ">;\n"; 1640 // } 1641 1642 // Only declare those registers that may be used. And do not emit vector 1643 // registers as 1644 // they are all elementized to scalar registers. 1645 //if (n && !isNVPTXVectorRegClass(RC)) { 1646 // if (RegAllocNilUsed) { 1647 // O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1648 // << ">;\n"; 1649 // } 1650 // else { 1651 // O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr) 1652 // << "<" << 32 << ">;\n"; 1653 // } 1654 //} 1655 //} 1656 1657 OutStreamer.EmitRawText(O.str()); 1658} 1659 1660 1661void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1662 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1663 bool ignored; 1664 unsigned int numHex; 1665 const char *lead; 1666 1667 if (Fp->getType()->getTypeID()==Type::FloatTyID) { 1668 numHex = 8; 1669 lead = "0f"; 1670 APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, 1671 &ignored); 1672 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1673 numHex = 16; 1674 lead = "0d"; 1675 APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, 1676 &ignored); 1677 } else 1678 llvm_unreachable("unsupported fp type"); 1679 1680 APInt API = APF.bitcastToAPInt(); 1681 std::string hexstr(utohexstr(API.getZExtValue())); 1682 O << lead; 1683 if (hexstr.length() < numHex) 1684 O << std::string(numHex - hexstr.length(), '0'); 1685 O << utohexstr(API.getZExtValue()); 1686} 1687 1688void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { 1689 if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1690 O << CI->getValue(); 1691 return; 1692 } 1693 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1694 printFPConstant(CFP, O); 1695 return; 1696 } 1697 if (isa<ConstantPointerNull>(CPV)) { 1698 O << "0"; 1699 return; 1700 } 1701 if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1702 O << *Mang->getSymbol(GVar); 1703 return; 1704 } 1705 if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1706 Value *v = Cexpr->stripPointerCasts(); 1707 if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1708 O << *Mang->getSymbol(GVar); 1709 return; 1710 } else { 1711 O << *LowerConstant(CPV, *this); 1712 return; 1713 } 1714 } 1715 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1716} 1717 1718 1719void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, 1720 AggBuffer *aggBuffer) { 1721 1722 const DataLayout *TD = TM.getDataLayout(); 1723 1724 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1725 int s = TD->getTypeAllocSize(CPV->getType()); 1726 if (s<Bytes) 1727 s = Bytes; 1728 aggBuffer->addZeros(s); 1729 return; 1730 } 1731 1732 unsigned char *ptr; 1733 switch (CPV->getType()->getTypeID()) { 1734 1735 case Type::IntegerTyID: { 1736 const Type *ETy = CPV->getType(); 1737 if ( ETy == Type::getInt8Ty(CPV->getContext()) ){ 1738 unsigned char c = 1739 (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1740 ptr = &c; 1741 aggBuffer->addBytes(ptr, 1, Bytes); 1742 } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) { 1743 short int16 = 1744 (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); 1745 ptr = (unsigned char*)&int16; 1746 aggBuffer->addBytes(ptr, 2, Bytes); 1747 } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) { 1748 if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1749 int int32 =(int)(constInt->getZExtValue()); 1750 ptr = (unsigned char*)&int32; 1751 aggBuffer->addBytes(ptr, 4, Bytes); 1752 break; 1753 } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1754 if (ConstantInt *constInt = 1755 dyn_cast<ConstantInt>(ConstantFoldConstantExpression( 1756 Cexpr, TD))) { 1757 int int32 =(int)(constInt->getZExtValue()); 1758 ptr = (unsigned char*)&int32; 1759 aggBuffer->addBytes(ptr, 4, Bytes); 1760 break; 1761 } 1762 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1763 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1764 aggBuffer->addSymbol(v); 1765 aggBuffer->addZeros(4); 1766 break; 1767 } 1768 } 1769 llvm_unreachable("unsupported integer const type"); 1770 } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) { 1771 if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1772 long long int64 =(long long)(constInt->getZExtValue()); 1773 ptr = (unsigned char*)&int64; 1774 aggBuffer->addBytes(ptr, 8, Bytes); 1775 break; 1776 } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1777 if (ConstantInt *constInt = dyn_cast<ConstantInt>( 1778 ConstantFoldConstantExpression(Cexpr, TD))) { 1779 long long int64 =(long long)(constInt->getZExtValue()); 1780 ptr = (unsigned char*)&int64; 1781 aggBuffer->addBytes(ptr, 8, Bytes); 1782 break; 1783 } 1784 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1785 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1786 aggBuffer->addSymbol(v); 1787 aggBuffer->addZeros(8); 1788 break; 1789 } 1790 } 1791 llvm_unreachable("unsupported integer const type"); 1792 } else 1793 llvm_unreachable("unsupported integer const type"); 1794 break; 1795 } 1796 case Type::FloatTyID: 1797 case Type::DoubleTyID: { 1798 ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); 1799 const Type* Ty = CFP->getType(); 1800 if (Ty == Type::getFloatTy(CPV->getContext())) { 1801 float float32 = (float)CFP->getValueAPF().convertToFloat(); 1802 ptr = (unsigned char*)&float32; 1803 aggBuffer->addBytes(ptr, 4, Bytes); 1804 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1805 double float64 = CFP->getValueAPF().convertToDouble(); 1806 ptr = (unsigned char*)&float64; 1807 aggBuffer->addBytes(ptr, 8, Bytes); 1808 } 1809 else { 1810 llvm_unreachable("unsupported fp const type"); 1811 } 1812 break; 1813 } 1814 case Type::PointerTyID: { 1815 if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1816 aggBuffer->addSymbol(GVar); 1817 } 1818 else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1819 Value *v = Cexpr->stripPointerCasts(); 1820 aggBuffer->addSymbol(v); 1821 } 1822 unsigned int s = TD->getTypeAllocSize(CPV->getType()); 1823 aggBuffer->addZeros(s); 1824 break; 1825 } 1826 1827 case Type::ArrayTyID: 1828 case Type::VectorTyID: 1829 case Type::StructTyID: { 1830 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) || 1831 isa<ConstantStruct>(CPV)) { 1832 int ElementSize = TD->getTypeAllocSize(CPV->getType()); 1833 bufferAggregateConstant(CPV, aggBuffer); 1834 if ( Bytes > ElementSize ) 1835 aggBuffer->addZeros(Bytes-ElementSize); 1836 } 1837 else if (isa<ConstantAggregateZero>(CPV)) 1838 aggBuffer->addZeros(Bytes); 1839 else 1840 llvm_unreachable("Unexpected Constant type"); 1841 break; 1842 } 1843 1844 default: 1845 llvm_unreachable("unsupported type"); 1846 } 1847} 1848 1849void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, 1850 AggBuffer *aggBuffer) { 1851 const DataLayout *TD = TM.getDataLayout(); 1852 int Bytes; 1853 1854 // Old constants 1855 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1856 if (CPV->getNumOperands()) 1857 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1858 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1859 return; 1860 } 1861 1862 if (const ConstantDataSequential *CDS = 1863 dyn_cast<ConstantDataSequential>(CPV)) { 1864 if (CDS->getNumElements()) 1865 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1866 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1867 aggBuffer); 1868 return; 1869 } 1870 1871 1872 if (isa<ConstantStruct>(CPV)) { 1873 if (CPV->getNumOperands()) { 1874 StructType *ST = cast<StructType>(CPV->getType()); 1875 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1876 if ( i == (e - 1)) 1877 Bytes = TD->getStructLayout(ST)->getElementOffset(0) + 1878 TD->getTypeAllocSize(ST) 1879 - TD->getStructLayout(ST)->getElementOffset(i); 1880 else 1881 Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) - 1882 TD->getStructLayout(ST)->getElementOffset(i); 1883 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, 1884 aggBuffer); 1885 } 1886 } 1887 return; 1888 } 1889 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1890} 1891 1892// buildTypeNameMap - Run through symbol table looking for type names. 1893// 1894 1895 1896bool NVPTXAsmPrinter::isImageType(const Type *Ty) { 1897 1898 std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); 1899 1900 if (PI != TypeNameMap.end() && 1901 (!PI->second.compare("struct._image1d_t") || 1902 !PI->second.compare("struct._image2d_t") || 1903 !PI->second.compare("struct._image3d_t"))) 1904 return true; 1905 1906 return false; 1907} 1908 1909/// PrintAsmOperand - Print out an operand for an inline asm expression. 1910/// 1911bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 1912 unsigned AsmVariant, 1913 const char *ExtraCode, 1914 raw_ostream &O) { 1915 if (ExtraCode && ExtraCode[0]) { 1916 if (ExtraCode[1] != 0) return true; // Unknown modifier. 1917 1918 switch (ExtraCode[0]) { 1919 default: 1920 // See if this is a generic print operand 1921 return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O); 1922 case 'r': 1923 break; 1924 } 1925 } 1926 1927 printOperand(MI, OpNo, O); 1928 1929 return false; 1930} 1931 1932bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, 1933 unsigned OpNo, 1934 unsigned AsmVariant, 1935 const char *ExtraCode, 1936 raw_ostream &O) { 1937 if (ExtraCode && ExtraCode[0]) 1938 return true; // Unknown modifier 1939 1940 O << '['; 1941 printMemOperand(MI, OpNo, O); 1942 O << ']'; 1943 1944 return false; 1945} 1946 1947bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) 1948{ 1949 switch(MI.getOpcode()) { 1950 default: 1951 return false; 1952 case NVPTX::CallArgBeginInst: case NVPTX::CallArgEndInst0: 1953 case NVPTX::CallArgEndInst1: case NVPTX::CallArgF32: 1954 case NVPTX::CallArgF64: case NVPTX::CallArgI16: 1955 case NVPTX::CallArgI32: case NVPTX::CallArgI32imm: 1956 case NVPTX::CallArgI64: case NVPTX::CallArgI8: 1957 case NVPTX::CallArgParam: case NVPTX::CallVoidInst: 1958 case NVPTX::CallVoidInstReg: case NVPTX::Callseq_End: 1959 case NVPTX::CallVoidInstReg64: 1960 case NVPTX::DeclareParamInst: case NVPTX::DeclareRetMemInst: 1961 case NVPTX::DeclareRetRegInst: case NVPTX::DeclareRetScalarInst: 1962 case NVPTX::DeclareScalarParamInst: case NVPTX::DeclareScalarRegInst: 1963 case NVPTX::StoreParamF32: case NVPTX::StoreParamF64: 1964 case NVPTX::StoreParamI16: case NVPTX::StoreParamI32: 1965 case NVPTX::StoreParamI64: case NVPTX::StoreParamI8: 1966 case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8: 1967 case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16: 1968 case NVPTX::StoreParamScalar2F32: case NVPTX::StoreParamScalar2F64: 1969 case NVPTX::StoreParamScalar2I16: case NVPTX::StoreParamScalar2I32: 1970 case NVPTX::StoreParamScalar2I64: case NVPTX::StoreParamScalar2I8: 1971 case NVPTX::StoreParamScalar4F32: case NVPTX::StoreParamScalar4I16: 1972 case NVPTX::StoreParamScalar4I32: case NVPTX::StoreParamScalar4I8: 1973 case NVPTX::StoreParamV2F32: case NVPTX::StoreParamV2F64: 1974 case NVPTX::StoreParamV2I16: case NVPTX::StoreParamV2I32: 1975 case NVPTX::StoreParamV2I64: case NVPTX::StoreParamV2I8: 1976 case NVPTX::StoreParamV4F32: case NVPTX::StoreParamV4I16: 1977 case NVPTX::StoreParamV4I32: case NVPTX::StoreParamV4I8: 1978 case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64: 1979 case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32: 1980 case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8: 1981 case NVPTX::StoreRetvalScalar2F32: case NVPTX::StoreRetvalScalar2F64: 1982 case NVPTX::StoreRetvalScalar2I16: case NVPTX::StoreRetvalScalar2I32: 1983 case NVPTX::StoreRetvalScalar2I64: case NVPTX::StoreRetvalScalar2I8: 1984 case NVPTX::StoreRetvalScalar4F32: case NVPTX::StoreRetvalScalar4I16: 1985 case NVPTX::StoreRetvalScalar4I32: case NVPTX::StoreRetvalScalar4I8: 1986 case NVPTX::StoreRetvalV2F32: case NVPTX::StoreRetvalV2F64: 1987 case NVPTX::StoreRetvalV2I16: case NVPTX::StoreRetvalV2I32: 1988 case NVPTX::StoreRetvalV2I64: case NVPTX::StoreRetvalV2I8: 1989 case NVPTX::StoreRetvalV4F32: case NVPTX::StoreRetvalV4I16: 1990 case NVPTX::StoreRetvalV4I32: case NVPTX::StoreRetvalV4I8: 1991 case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64: 1992 case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32: 1993 case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64: 1994 case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam: 1995 case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64: 1996 case NVPTX::LoadParamMemI16: case NVPTX::LoadParamMemI32: 1997 case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8: 1998 case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64: 1999 case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32: 2000 case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8: 2001 case NVPTX::LoadParamScalar2F32: case NVPTX::LoadParamScalar2F64: 2002 case NVPTX::LoadParamScalar2I16: case NVPTX::LoadParamScalar2I32: 2003 case NVPTX::LoadParamScalar2I64: case NVPTX::LoadParamScalar2I8: 2004 case NVPTX::LoadParamScalar4F32: case NVPTX::LoadParamScalar4I16: 2005 case NVPTX::LoadParamScalar4I32: case NVPTX::LoadParamScalar4I8: 2006 case NVPTX::LoadParamV2F32: case NVPTX::LoadParamV2F64: 2007 case NVPTX::LoadParamV2I16: case NVPTX::LoadParamV2I32: 2008 case NVPTX::LoadParamV2I64: case NVPTX::LoadParamV2I8: 2009 case NVPTX::LoadParamV4F32: case NVPTX::LoadParamV4I16: 2010 case NVPTX::LoadParamV4I32: case NVPTX::LoadParamV4I8: 2011 case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: 2012 return true; 2013 } 2014 return false; 2015} 2016 2017// Force static initialization. 2018extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { 2019 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2020 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2021} 2022 2023 2024void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { 2025 std::stringstream temp; 2026 LineReader * reader = this->getReader(filename.str()); 2027 temp << "\n//"; 2028 temp << filename.str(); 2029 temp << ":"; 2030 temp << line; 2031 temp << " "; 2032 temp << reader->readLine(line); 2033 temp << "\n"; 2034 this->OutStreamer.EmitRawText(Twine(temp.str())); 2035} 2036 2037 2038LineReader *NVPTXAsmPrinter::getReader(std::string filename) { 2039 if (reader == NULL) { 2040 reader = new LineReader(filename); 2041 } 2042 2043 if (reader->fileName() != filename) { 2044 delete reader; 2045 reader = new LineReader(filename); 2046 } 2047 2048 return reader; 2049} 2050 2051 2052std::string 2053LineReader::readLine(unsigned lineNum) { 2054 if (lineNum < theCurLine) { 2055 theCurLine = 0; 2056 fstr.seekg(0,std::ios::beg); 2057 } 2058 while (theCurLine < lineNum) { 2059 fstr.getline(buff,500); 2060 theCurLine++; 2061 } 2062 return buff; 2063} 2064 2065// Force static initialization. 2066extern "C" void LLVMInitializeNVPTXAsmPrinter() { 2067 RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32); 2068 RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); 2069} 2070