1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a printer that converts from our internal representation
11// of machine-dependent LLVM code to NVPTX assembly language.
12//
13//===----------------------------------------------------------------------===//
14
15#include "NVPTXAsmPrinter.h"
16#include "InstPrinter/NVPTXInstPrinter.h"
17#include "MCTargetDesc/NVPTXMCAsmInfo.h"
18#include "NVPTX.h"
19#include "NVPTXInstrInfo.h"
20#include "NVPTXMCExpr.h"
21#include "NVPTXMachineFunctionInfo.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXTargetMachine.h"
24#include "NVPTXUtilities.h"
25#include "cl_common_defines.h"
26#include "llvm/ADT/StringExtras.h"
27#include "llvm/Analysis/ConstantFolding.h"
28#include "llvm/CodeGen/Analysis.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineLoopInfo.h"
31#include "llvm/CodeGen/MachineModuleInfo.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/IR/DebugInfo.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/GlobalVariable.h"
37#include "llvm/IR/Mangler.h"
38#include "llvm/IR/Module.h"
39#include "llvm/IR/Operator.h"
40#include "llvm/MC/MCInst.h"
41#include "llvm/MC/MCStreamer.h"
42#include "llvm/MC/MCSymbol.h"
43#include "llvm/Support/CommandLine.h"
44#include "llvm/Support/ErrorHandling.h"
45#include "llvm/Support/FormattedStream.h"
46#include "llvm/Support/Path.h"
47#include "llvm/Support/TargetRegistry.h"
48#include "llvm/Support/TimeValue.h"
49#include "llvm/Target/TargetLoweringObjectFile.h"
50#include "llvm/Transforms/Utils/UnrollLoop.h"
51#include <sstream>
52using namespace llvm;
53
54#define DEPOTNAME "__local_depot"
55
56static cl::opt<bool>
57EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden,
58                cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
59                cl::init(true));
60
61static cl::opt<bool>
62InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden,
63              cl::desc("NVPTX Specific: Emit source line in ptx file"),
64              cl::init(false));
65
66namespace {
67/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
68/// depends.
69void DiscoverDependentGlobals(const Value *V,
70                              DenseSet<const GlobalVariable *> &Globals) {
71  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
72    Globals.insert(GV);
73  else {
74    if (const User *U = dyn_cast<User>(V)) {
75      for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
76        DiscoverDependentGlobals(U->getOperand(i), Globals);
77      }
78    }
79  }
80}
81
82/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
83/// instances to be emitted, but only after any dependents have been added
84/// first.
85void VisitGlobalVariableForEmission(
86    const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
87    DenseSet<const GlobalVariable *> &Visited,
88    DenseSet<const GlobalVariable *> &Visiting) {
89  // Have we already visited this one?
90  if (Visited.count(GV))
91    return;
92
93  // Do we have a circular dependency?
94  if (!Visiting.insert(GV).second)
95    report_fatal_error("Circular dependency found in global variable set");
96
97  // Make sure we visit all dependents first
98  DenseSet<const GlobalVariable *> Others;
99  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
100    DiscoverDependentGlobals(GV->getOperand(i), Others);
101
102  for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
103                                                  E = Others.end();
104       I != E; ++I)
105    VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
106
107  // Now we can visit ourself
108  Order.push_back(GV);
109  Visited.insert(GV);
110  Visiting.erase(GV);
111}
112}
113
114void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
115  if (!EmitLineNumbers)
116    return;
117  if (ignoreLoc(MI))
118    return;
119
120  DebugLoc curLoc = MI.getDebugLoc();
121
122  if (!prevDebugLoc && !curLoc)
123    return;
124
125  if (prevDebugLoc == curLoc)
126    return;
127
128  prevDebugLoc = curLoc;
129
130  if (!curLoc)
131    return;
132
133  auto *Scope = cast_or_null<DIScope>(curLoc.getScope());
134  if (!Scope)
135     return;
136
137  StringRef fileName(Scope->getFilename());
138  StringRef dirName(Scope->getDirectory());
139  SmallString<128> FullPathName = dirName;
140  if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
141    sys::path::append(FullPathName, fileName);
142    fileName = FullPathName;
143  }
144
145  if (filenameMap.find(fileName) == filenameMap.end())
146    return;
147
148  // Emit the line from the source file.
149  if (InterleaveSrc)
150    this->emitSrcInText(fileName, curLoc.getLine());
151
152  std::stringstream temp;
153  temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine()
154       << " " << curLoc.getCol();
155  OutStreamer->EmitRawText(temp.str());
156}
157
158void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
159  SmallString<128> Str;
160  raw_svector_ostream OS(Str);
161  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA)
162    emitLineNumberAsDotLoc(*MI);
163
164  MCInst Inst;
165  lowerToMCInst(MI, Inst);
166  EmitToStreamer(*OutStreamer, Inst);
167}
168
169// Handle symbol backtracking for targets that do not support image handles
170bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
171                                           unsigned OpNo, MCOperand &MCOp) {
172  const MachineOperand &MO = MI->getOperand(OpNo);
173  const MCInstrDesc &MCID = MI->getDesc();
174
175  if (MCID.TSFlags & NVPTXII::IsTexFlag) {
176    // This is a texture fetch, so operand 4 is a texref and operand 5 is
177    // a samplerref
178    if (OpNo == 4 && MO.isImm()) {
179      lowerImageHandleSymbol(MO.getImm(), MCOp);
180      return true;
181    }
182    if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
183      lowerImageHandleSymbol(MO.getImm(), MCOp);
184      return true;
185    }
186
187    return false;
188  } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
189    unsigned VecSize =
190      1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
191
192    // For a surface load of vector size N, the Nth operand will be the surfref
193    if (OpNo == VecSize && MO.isImm()) {
194      lowerImageHandleSymbol(MO.getImm(), MCOp);
195      return true;
196    }
197
198    return false;
199  } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
200    // This is a surface store, so operand 0 is a surfref
201    if (OpNo == 0 && MO.isImm()) {
202      lowerImageHandleSymbol(MO.getImm(), MCOp);
203      return true;
204    }
205
206    return false;
207  } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
208    // This is a query, so operand 1 is a surfref/texref
209    if (OpNo == 1 && MO.isImm()) {
210      lowerImageHandleSymbol(MO.getImm(), MCOp);
211      return true;
212    }
213
214    return false;
215  }
216
217  return false;
218}
219
220void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
221  // Ewwww
222  TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
223  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
224  const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
225  const char *Sym = MFI->getImageHandleSymbol(Index);
226  std::string *SymNamePtr =
227    nvTM.getManagedStrPool()->getManagedString(Sym);
228  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(
229    StringRef(SymNamePtr->c_str())));
230}
231
232void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
233  OutMI.setOpcode(MI->getOpcode());
234  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
235  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
236    const MachineOperand &MO = MI->getOperand(0);
237    OutMI.addOperand(GetSymbolRef(
238      OutContext.getOrCreateSymbol(Twine(MO.getSymbolName()))));
239    return;
240  }
241
242  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
243    const MachineOperand &MO = MI->getOperand(i);
244
245    MCOperand MCOp;
246    if (!nvptxSubtarget->hasImageHandles()) {
247      if (lowerImageHandleOperand(MI, i, MCOp)) {
248        OutMI.addOperand(MCOp);
249        continue;
250      }
251    }
252
253    if (lowerOperand(MO, MCOp))
254      OutMI.addOperand(MCOp);
255  }
256}
257
258bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
259                                   MCOperand &MCOp) {
260  switch (MO.getType()) {
261  default: llvm_unreachable("unknown operand type");
262  case MachineOperand::MO_Register:
263    MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
264    break;
265  case MachineOperand::MO_Immediate:
266    MCOp = MCOperand::createImm(MO.getImm());
267    break;
268  case MachineOperand::MO_MachineBasicBlock:
269    MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
270        MO.getMBB()->getSymbol(), OutContext));
271    break;
272  case MachineOperand::MO_ExternalSymbol:
273    MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
274    break;
275  case MachineOperand::MO_GlobalAddress:
276    MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
277    break;
278  case MachineOperand::MO_FPImmediate: {
279    const ConstantFP *Cnt = MO.getFPImm();
280    APFloat Val = Cnt->getValueAPF();
281
282    switch (Cnt->getType()->getTypeID()) {
283    default: report_fatal_error("Unsupported FP type"); break;
284    case Type::FloatTyID:
285      MCOp = MCOperand::createExpr(
286        NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
287      break;
288    case Type::DoubleTyID:
289      MCOp = MCOperand::createExpr(
290        NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext));
291      break;
292    }
293    break;
294  }
295  }
296  return true;
297}
298
299unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
300  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
301    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
302
303    DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
304    unsigned RegNum = RegMap[Reg];
305
306    // Encode the register class in the upper 4 bits
307    // Must be kept in sync with NVPTXInstPrinter::printRegName
308    unsigned Ret = 0;
309    if (RC == &NVPTX::Int1RegsRegClass) {
310      Ret = (1 << 28);
311    } else if (RC == &NVPTX::Int16RegsRegClass) {
312      Ret = (2 << 28);
313    } else if (RC == &NVPTX::Int32RegsRegClass) {
314      Ret = (3 << 28);
315    } else if (RC == &NVPTX::Int64RegsRegClass) {
316      Ret = (4 << 28);
317    } else if (RC == &NVPTX::Float32RegsRegClass) {
318      Ret = (5 << 28);
319    } else if (RC == &NVPTX::Float64RegsRegClass) {
320      Ret = (6 << 28);
321    } else {
322      report_fatal_error("Bad register class");
323    }
324
325    // Insert the vreg number
326    Ret |= (RegNum & 0x0FFFFFFF);
327    return Ret;
328  } else {
329    // Some special-use registers are actually physical registers.
330    // Encode this as the register class ID of 0 and the real register ID.
331    return Reg & 0x0FFFFFFF;
332  }
333}
334
335MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
336  const MCExpr *Expr;
337  Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
338                                 OutContext);
339  return MCOperand::createExpr(Expr);
340}
341
342void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
343  const DataLayout &DL = getDataLayout();
344  const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
345
346  Type *Ty = F->getReturnType();
347
348  bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
349
350  if (Ty->getTypeID() == Type::VoidTyID)
351    return;
352
353  O << " (";
354
355  if (isABI) {
356    if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
357      unsigned size = 0;
358      if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
359        size = ITy->getBitWidth();
360        if (size < 32)
361          size = 32;
362      } else {
363        assert(Ty->isFloatingPointTy() && "Floating point type expected here");
364        size = Ty->getPrimitiveSizeInBits();
365      }
366
367      O << ".param .b" << size << " func_retval0";
368    } else if (isa<PointerType>(Ty)) {
369      O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
370        << " func_retval0";
371    } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
372      unsigned totalsz = DL.getTypeAllocSize(Ty);
373       unsigned retAlignment = 0;
374       if (!llvm::getAlign(*F, 0, retAlignment))
375         retAlignment = DL.getABITypeAlignment(Ty);
376       O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
377         << "]";
378    } else
379      llvm_unreachable("Unknown return type");
380  } else {
381    SmallVector<EVT, 16> vtparts;
382    ComputeValueVTs(*TLI, DL, Ty, vtparts);
383    unsigned idx = 0;
384    for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
385      unsigned elems = 1;
386      EVT elemtype = vtparts[i];
387      if (vtparts[i].isVector()) {
388        elems = vtparts[i].getVectorNumElements();
389        elemtype = vtparts[i].getVectorElementType();
390      }
391
392      for (unsigned j = 0, je = elems; j != je; ++j) {
393        unsigned sz = elemtype.getSizeInBits();
394        if (elemtype.isInteger() && (sz < 32))
395          sz = 32;
396        O << ".reg .b" << sz << " func_retval" << idx;
397        if (j < je - 1)
398          O << ", ";
399        ++idx;
400      }
401      if (i < e - 1)
402        O << ", ";
403    }
404  }
405  O << ") ";
406  return;
407}
408
409void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
410                                        raw_ostream &O) {
411  const Function *F = MF.getFunction();
412  printReturnValStr(F, O);
413}
414
415// Return true if MBB is the header of a loop marked with
416// llvm.loop.unroll.disable.
417// TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
418bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
419    const MachineBasicBlock &MBB) const {
420  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
421  // We insert .pragma "nounroll" only to the loop header.
422  if (!LI.isLoopHeader(&MBB))
423    return false;
424
425  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
426  // we iterate through each back edge of the loop with header MBB, and check
427  // whether its metadata contains llvm.loop.unroll.disable.
428  for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
429    const MachineBasicBlock *PMBB = *I;
430    if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
431      // Edges from other loops to MBB are not back edges.
432      continue;
433    }
434    if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
435      if (MDNode *LoopID = PBB->getTerminator()->getMetadata("llvm.loop")) {
436        if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
437          return true;
438      }
439    }
440  }
441  return false;
442}
443
444void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
445  AsmPrinter::EmitBasicBlockStart(MBB);
446  if (isLoopHeaderOfNoUnroll(MBB))
447    OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
448}
449
450void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
451  SmallString<128> Str;
452  raw_svector_ostream O(Str);
453
454  if (!GlobalsEmitted) {
455    emitGlobals(*MF->getFunction()->getParent());
456    GlobalsEmitted = true;
457  }
458
459  // Set up
460  MRI = &MF->getRegInfo();
461  F = MF->getFunction();
462  emitLinkageDirective(F, O);
463  if (llvm::isKernelFunction(*F))
464    O << ".entry ";
465  else {
466    O << ".func ";
467    printReturnValStr(*MF, O);
468  }
469
470  CurrentFnSym->print(O, MAI);
471
472  emitFunctionParamList(*MF, O);
473
474  if (llvm::isKernelFunction(*F))
475    emitKernelFunctionDirectives(*F, O);
476
477  OutStreamer->EmitRawText(O.str());
478
479  prevDebugLoc = DebugLoc();
480}
481
482void NVPTXAsmPrinter::EmitFunctionBodyStart() {
483  VRegMapping.clear();
484  OutStreamer->EmitRawText(StringRef("{\n"));
485  setAndEmitFunctionVirtualRegisters(*MF);
486
487  SmallString<128> Str;
488  raw_svector_ostream O(Str);
489  emitDemotedVars(MF->getFunction(), O);
490  OutStreamer->EmitRawText(O.str());
491}
492
493void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
494  OutStreamer->EmitRawText(StringRef("}\n"));
495  VRegMapping.clear();
496}
497
498void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
499  unsigned RegNo = MI->getOperand(0).getReg();
500  if (TargetRegisterInfo::isVirtualRegister(RegNo)) {
501    OutStreamer->AddComment(Twine("implicit-def: ") +
502                            getVirtualRegisterName(RegNo));
503  } else {
504    OutStreamer->AddComment(Twine("implicit-def: ") +
505                            nvptxSubtarget->getRegisterInfo()->getName(RegNo));
506  }
507  OutStreamer->AddBlankLine();
508}
509
510void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
511                                                   raw_ostream &O) const {
512  // If the NVVM IR has some of reqntid* specified, then output
513  // the reqntid directive, and set the unspecified ones to 1.
514  // If none of reqntid* is specified, don't output reqntid directive.
515  unsigned reqntidx, reqntidy, reqntidz;
516  bool specified = false;
517  if (!llvm::getReqNTIDx(F, reqntidx))
518    reqntidx = 1;
519  else
520    specified = true;
521  if (!llvm::getReqNTIDy(F, reqntidy))
522    reqntidy = 1;
523  else
524    specified = true;
525  if (!llvm::getReqNTIDz(F, reqntidz))
526    reqntidz = 1;
527  else
528    specified = true;
529
530  if (specified)
531    O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
532      << "\n";
533
534  // If the NVVM IR has some of maxntid* specified, then output
535  // the maxntid directive, and set the unspecified ones to 1.
536  // If none of maxntid* is specified, don't output maxntid directive.
537  unsigned maxntidx, maxntidy, maxntidz;
538  specified = false;
539  if (!llvm::getMaxNTIDx(F, maxntidx))
540    maxntidx = 1;
541  else
542    specified = true;
543  if (!llvm::getMaxNTIDy(F, maxntidy))
544    maxntidy = 1;
545  else
546    specified = true;
547  if (!llvm::getMaxNTIDz(F, maxntidz))
548    maxntidz = 1;
549  else
550    specified = true;
551
552  if (specified)
553    O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
554      << "\n";
555
556  unsigned mincta;
557  if (llvm::getMinCTASm(F, mincta))
558    O << ".minnctapersm " << mincta << "\n";
559}
560
561std::string
562NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
563  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
564
565  std::string Name;
566  raw_string_ostream NameStr(Name);
567
568  VRegRCMap::const_iterator I = VRegMapping.find(RC);
569  assert(I != VRegMapping.end() && "Bad register class");
570  const DenseMap<unsigned, unsigned> &RegMap = I->second;
571
572  VRegMap::const_iterator VI = RegMap.find(Reg);
573  assert(VI != RegMap.end() && "Bad virtual register");
574  unsigned MappedVR = VI->second;
575
576  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
577
578  NameStr.flush();
579  return Name;
580}
581
582void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
583                                          raw_ostream &O) {
584  O << getVirtualRegisterName(vr);
585}
586
587void NVPTXAsmPrinter::printVecModifiedImmediate(
588    const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
589  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
590  int Imm = (int) MO.getImm();
591  if (0 == strcmp(Modifier, "vecelem"))
592    O << "_" << vecelem[Imm];
593  else if (0 == strcmp(Modifier, "vecv4comm1")) {
594    if ((Imm < 0) || (Imm > 3))
595      O << "//";
596  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
597    if ((Imm < 4) || (Imm > 7))
598      O << "//";
599  } else if (0 == strcmp(Modifier, "vecv4pos")) {
600    if (Imm < 0)
601      Imm = 0;
602    O << "_" << vecelem[Imm % 4];
603  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
604    if ((Imm < 0) || (Imm > 1))
605      O << "//";
606  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
607    if ((Imm < 2) || (Imm > 3))
608      O << "//";
609  } else if (0 == strcmp(Modifier, "vecv2pos")) {
610    if (Imm < 0)
611      Imm = 0;
612    O << "_" << vecelem[Imm % 2];
613  } else
614    llvm_unreachable("Unknown Modifier on immediate operand");
615}
616
617
618
619void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
620
621  emitLinkageDirective(F, O);
622  if (llvm::isKernelFunction(*F))
623    O << ".entry ";
624  else
625    O << ".func ";
626  printReturnValStr(F, O);
627  getSymbol(F)->print(O, MAI);
628  O << "\n";
629  emitFunctionParamList(F, O);
630  O << ";\n";
631}
632
633static bool usedInGlobalVarDef(const Constant *C) {
634  if (!C)
635    return false;
636
637  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
638    return GV->getName() != "llvm.used";
639  }
640
641  for (const User *U : C->users())
642    if (const Constant *C = dyn_cast<Constant>(U))
643      if (usedInGlobalVarDef(C))
644        return true;
645
646  return false;
647}
648
649static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
650  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
651    if (othergv->getName() == "llvm.used")
652      return true;
653  }
654
655  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
656    if (instr->getParent() && instr->getParent()->getParent()) {
657      const Function *curFunc = instr->getParent()->getParent();
658      if (oneFunc && (curFunc != oneFunc))
659        return false;
660      oneFunc = curFunc;
661      return true;
662    } else
663      return false;
664  }
665
666  for (const User *UU : U->users())
667    if (!usedInOneFunc(UU, oneFunc))
668      return false;
669
670  return true;
671}
672
673/* Find out if a global variable can be demoted to local scope.
674 * Currently, this is valid for CUDA shared variables, which have local
675 * scope and global lifetime. So the conditions to check are :
676 * 1. Is the global variable in shared address space?
677 * 2. Does it have internal linkage?
678 * 3. Is the global variable referenced only in one function?
679 */
680static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
681  if (!gv->hasInternalLinkage())
682    return false;
683  PointerType *Pty = gv->getType();
684  if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
685    return false;
686
687  const Function *oneFunc = nullptr;
688
689  bool flag = usedInOneFunc(gv, oneFunc);
690  if (!flag)
691    return false;
692  if (!oneFunc)
693    return false;
694  f = oneFunc;
695  return true;
696}
697
698static bool useFuncSeen(const Constant *C,
699                        llvm::DenseMap<const Function *, bool> &seenMap) {
700  for (const User *U : C->users()) {
701    if (const Constant *cu = dyn_cast<Constant>(U)) {
702      if (useFuncSeen(cu, seenMap))
703        return true;
704    } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
705      const BasicBlock *bb = I->getParent();
706      if (!bb)
707        continue;
708      const Function *caller = bb->getParent();
709      if (!caller)
710        continue;
711      if (seenMap.find(caller) != seenMap.end())
712        return true;
713    }
714  }
715  return false;
716}
717
718void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
719  llvm::DenseMap<const Function *, bool> seenMap;
720  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
721    const Function *F = &*FI;
722
723    if (F->isDeclaration()) {
724      if (F->use_empty())
725        continue;
726      if (F->getIntrinsicID())
727        continue;
728      emitDeclaration(F, O);
729      continue;
730    }
731    for (const User *U : F->users()) {
732      if (const Constant *C = dyn_cast<Constant>(U)) {
733        if (usedInGlobalVarDef(C)) {
734          // The use is in the initialization of a global variable
735          // that is a function pointer, so print a declaration
736          // for the original function
737          emitDeclaration(F, O);
738          break;
739        }
740        // Emit a declaration of this function if the function that
741        // uses this constant expr has already been seen.
742        if (useFuncSeen(C, seenMap)) {
743          emitDeclaration(F, O);
744          break;
745        }
746      }
747
748      if (!isa<Instruction>(U))
749        continue;
750      const Instruction *instr = cast<Instruction>(U);
751      const BasicBlock *bb = instr->getParent();
752      if (!bb)
753        continue;
754      const Function *caller = bb->getParent();
755      if (!caller)
756        continue;
757
758      // If a caller has already been seen, then the caller is
759      // appearing in the module before the callee. so print out
760      // a declaration for the callee.
761      if (seenMap.find(caller) != seenMap.end()) {
762        emitDeclaration(F, O);
763        break;
764      }
765    }
766    seenMap[F] = true;
767  }
768}
769
770void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
771  DebugInfoFinder DbgFinder;
772  DbgFinder.processModule(M);
773
774  unsigned i = 1;
775  for (const DICompileUnit *DIUnit : DbgFinder.compile_units()) {
776    StringRef Filename = DIUnit->getFilename();
777    StringRef Dirname = DIUnit->getDirectory();
778    SmallString<128> FullPathName = Dirname;
779    if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
780      sys::path::append(FullPathName, Filename);
781      Filename = FullPathName;
782    }
783    if (filenameMap.find(Filename) != filenameMap.end())
784      continue;
785    filenameMap[Filename] = i;
786    OutStreamer->EmitDwarfFileDirective(i, "", Filename);
787    ++i;
788  }
789
790  for (DISubprogram *SP : DbgFinder.subprograms()) {
791    StringRef Filename = SP->getFilename();
792    StringRef Dirname = SP->getDirectory();
793    SmallString<128> FullPathName = Dirname;
794    if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
795      sys::path::append(FullPathName, Filename);
796      Filename = FullPathName;
797    }
798    if (filenameMap.find(Filename) != filenameMap.end())
799      continue;
800    filenameMap[Filename] = i;
801    ++i;
802  }
803}
804
805bool NVPTXAsmPrinter::doInitialization(Module &M) {
806  // Construct a default subtarget off of the TargetMachine defaults. The
807  // rest of NVPTX isn't friendly to change subtargets per function and
808  // so the default TargetMachine will have all of the options.
809  const Triple &TT = TM.getTargetTriple();
810  StringRef CPU = TM.getTargetCPU();
811  StringRef FS = TM.getTargetFeatureString();
812  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
813  const NVPTXSubtarget STI(TT, CPU, FS, NTM);
814
815  SmallString<128> Str1;
816  raw_svector_ostream OS1(Str1);
817
818  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
819
820  // We need to call the parent's one explicitly.
821  //bool Result = AsmPrinter::doInitialization(M);
822
823  // Initialize TargetLoweringObjectFile.
824  const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
825      .Initialize(OutContext, TM);
826
827  Mang = new Mangler();
828
829  // Emit header before any dwarf directives are emitted below.
830  emitHeader(M, OS1, STI);
831  OutStreamer->EmitRawText(OS1.str());
832
833  // Already commented out
834  //bool Result = AsmPrinter::doInitialization(M);
835
836  // Emit module-level inline asm if it exists.
837  if (!M.getModuleInlineAsm().empty()) {
838    OutStreamer->AddComment("Start of file scope inline assembly");
839    OutStreamer->AddBlankLine();
840    OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm()));
841    OutStreamer->AddBlankLine();
842    OutStreamer->AddComment("End of file scope inline assembly");
843    OutStreamer->AddBlankLine();
844  }
845
846  // If we're not NVCL we're CUDA, go ahead and emit filenames.
847  if (TM.getTargetTriple().getOS() != Triple::NVCL)
848    recordAndEmitFilenames(M);
849
850  GlobalsEmitted = false;
851
852  return false; // success
853}
854
855void NVPTXAsmPrinter::emitGlobals(const Module &M) {
856  SmallString<128> Str2;
857  raw_svector_ostream OS2(Str2);
858
859  emitDeclarations(M, OS2);
860
861  // As ptxas does not support forward references of globals, we need to first
862  // sort the list of module-level globals in def-use order. We visit each
863  // global variable in order, and ensure that we emit it *after* its dependent
864  // globals. We use a little extra memory maintaining both a set and a list to
865  // have fast searches while maintaining a strict ordering.
866  SmallVector<const GlobalVariable *, 8> Globals;
867  DenseSet<const GlobalVariable *> GVVisited;
868  DenseSet<const GlobalVariable *> GVVisiting;
869
870  // Visit each global variable, in order
871  for (const GlobalVariable &I : M.globals())
872    VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
873
874  assert(GVVisited.size() == M.getGlobalList().size() &&
875         "Missed a global variable");
876  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
877
878  // Print out module-level global variables in proper order
879  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
880    printModuleLevelGV(Globals[i], OS2);
881
882  OS2 << '\n';
883
884  OutStreamer->EmitRawText(OS2.str());
885}
886
887void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
888                                 const NVPTXSubtarget &STI) {
889  O << "//\n";
890  O << "// Generated by LLVM NVPTX Back-End\n";
891  O << "//\n";
892  O << "\n";
893
894  unsigned PTXVersion = STI.getPTXVersion();
895  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
896
897  O << ".target ";
898  O << STI.getTargetName();
899
900  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
901  if (NTM.getDrvInterface() == NVPTX::NVCL)
902    O << ", texmode_independent";
903  else {
904    if (!STI.hasDouble())
905      O << ", map_f64_to_f32";
906  }
907
908  if (MAI->doesSupportDebugInformation())
909    O << ", debug";
910
911  O << "\n";
912
913  O << ".address_size ";
914  if (NTM.is64Bit())
915    O << "64";
916  else
917    O << "32";
918  O << "\n";
919
920  O << "\n";
921}
922
923bool NVPTXAsmPrinter::doFinalization(Module &M) {
924  // If we did not emit any functions, then the global declarations have not
925  // yet been emitted.
926  if (!GlobalsEmitted) {
927    emitGlobals(M);
928    GlobalsEmitted = true;
929  }
930
931  // XXX Temproarily remove global variables so that doFinalization() will not
932  // emit them again (global variables are emitted at beginning).
933
934  Module::GlobalListType &global_list = M.getGlobalList();
935  int i, n = global_list.size();
936  GlobalVariable **gv_array = new GlobalVariable *[n];
937
938  // first, back-up GlobalVariable in gv_array
939  i = 0;
940  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
941       I != E; ++I)
942    gv_array[i++] = &*I;
943
944  // second, empty global_list
945  while (!global_list.empty())
946    global_list.remove(global_list.begin());
947
948  // call doFinalization
949  bool ret = AsmPrinter::doFinalization(M);
950
951  // now we restore global variables
952  for (i = 0; i < n; i++)
953    global_list.insert(global_list.end(), gv_array[i]);
954
955  clearAnnotationCache(&M);
956
957  delete[] gv_array;
958  return ret;
959
960  //bool Result = AsmPrinter::doFinalization(M);
961  // Instead of calling the parents doFinalization, we may
962  // clone parents doFinalization and customize here.
963  // Currently, we if NVISA out the EmitGlobals() in
964  // parent's doFinalization, which is too intrusive.
965  //
966  // Same for the doInitialization.
967  //return Result;
968}
969
970// This function emits appropriate linkage directives for
971// functions and global variables.
972//
973// extern function declaration            -> .extern
974// extern function definition             -> .visible
975// external global variable with init     -> .visible
976// external without init                  -> .extern
977// appending                              -> not allowed, assert.
978// for any linkage other than
979// internal, private, linker_private,
980// linker_private_weak, linker_private_weak_def_auto,
981// we emit                                -> .weak.
982
983void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
984                                           raw_ostream &O) {
985  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
986    if (V->hasExternalLinkage()) {
987      if (isa<GlobalVariable>(V)) {
988        const GlobalVariable *GVar = cast<GlobalVariable>(V);
989        if (GVar) {
990          if (GVar->hasInitializer())
991            O << ".visible ";
992          else
993            O << ".extern ";
994        }
995      } else if (V->isDeclaration())
996        O << ".extern ";
997      else
998        O << ".visible ";
999    } else if (V->hasAppendingLinkage()) {
1000      std::string msg;
1001      msg.append("Error: ");
1002      msg.append("Symbol ");
1003      if (V->hasName())
1004        msg.append(V->getName());
1005      msg.append("has unsupported appending linkage type");
1006      llvm_unreachable(msg.c_str());
1007    } else if (!V->hasInternalLinkage() &&
1008               !V->hasPrivateLinkage()) {
1009      O << ".weak ";
1010    }
1011  }
1012}
1013
1014void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1015                                         raw_ostream &O,
1016                                         bool processDemoted) {
1017
1018  // Skip meta data
1019  if (GVar->hasSection()) {
1020    if (GVar->getSection() == StringRef("llvm.metadata"))
1021      return;
1022  }
1023
1024  // Skip LLVM intrinsic global variables
1025  if (GVar->getName().startswith("llvm.") ||
1026      GVar->getName().startswith("nvvm."))
1027    return;
1028
1029  const DataLayout &DL = getDataLayout();
1030
1031  // GlobalVariables are always constant pointers themselves.
1032  PointerType *PTy = GVar->getType();
1033  Type *ETy = PTy->getElementType();
1034
1035  if (GVar->hasExternalLinkage()) {
1036    if (GVar->hasInitializer())
1037      O << ".visible ";
1038    else
1039      O << ".extern ";
1040  } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1041             GVar->hasAvailableExternallyLinkage() ||
1042             GVar->hasCommonLinkage()) {
1043    O << ".weak ";
1044  }
1045
1046  if (llvm::isTexture(*GVar)) {
1047    O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
1048    return;
1049  }
1050
1051  if (llvm::isSurface(*GVar)) {
1052    O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
1053    return;
1054  }
1055
1056  if (GVar->isDeclaration()) {
1057    // (extern) declarations, no definition or initializer
1058    // Currently the only known declaration is for an automatic __local
1059    // (.shared) promoted to global.
1060    emitPTXGlobalVariable(GVar, O);
1061    O << ";\n";
1062    return;
1063  }
1064
1065  if (llvm::isSampler(*GVar)) {
1066    O << ".global .samplerref " << llvm::getSamplerName(*GVar);
1067
1068    const Constant *Initializer = nullptr;
1069    if (GVar->hasInitializer())
1070      Initializer = GVar->getInitializer();
1071    const ConstantInt *CI = nullptr;
1072    if (Initializer)
1073      CI = dyn_cast<ConstantInt>(Initializer);
1074    if (CI) {
1075      unsigned sample = CI->getZExtValue();
1076
1077      O << " = { ";
1078
1079      for (int i = 0,
1080               addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1081           i < 3; i++) {
1082        O << "addr_mode_" << i << " = ";
1083        switch (addr) {
1084        case 0:
1085          O << "wrap";
1086          break;
1087        case 1:
1088          O << "clamp_to_border";
1089          break;
1090        case 2:
1091          O << "clamp_to_edge";
1092          break;
1093        case 3:
1094          O << "wrap";
1095          break;
1096        case 4:
1097          O << "mirror";
1098          break;
1099        }
1100        O << ", ";
1101      }
1102      O << "filter_mode = ";
1103      switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1104      case 0:
1105        O << "nearest";
1106        break;
1107      case 1:
1108        O << "linear";
1109        break;
1110      case 2:
1111        llvm_unreachable("Anisotropic filtering is not supported");
1112      default:
1113        O << "nearest";
1114        break;
1115      }
1116      if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1117        O << ", force_unnormalized_coords = 1";
1118      }
1119      O << " }";
1120    }
1121
1122    O << ";\n";
1123    return;
1124  }
1125
1126  if (GVar->hasPrivateLinkage()) {
1127
1128    if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
1129      return;
1130
1131    // FIXME - need better way (e.g. Metadata) to avoid generating this global
1132    if (!strncmp(GVar->getName().data(), "filename", 8))
1133      return;
1134    if (GVar->use_empty())
1135      return;
1136  }
1137
1138  const Function *demotedFunc = nullptr;
1139  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1140    O << "// " << GVar->getName() << " has been demoted\n";
1141    if (localDecls.find(demotedFunc) != localDecls.end())
1142      localDecls[demotedFunc].push_back(GVar);
1143    else {
1144      std::vector<const GlobalVariable *> temp;
1145      temp.push_back(GVar);
1146      localDecls[demotedFunc] = temp;
1147    }
1148    return;
1149  }
1150
1151  O << ".";
1152  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1153
1154  if (isManaged(*GVar)) {
1155    O << " .attribute(.managed)";
1156  }
1157
1158  if (GVar->getAlignment() == 0)
1159    O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1160  else
1161    O << " .align " << GVar->getAlignment();
1162
1163  if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
1164    O << " .";
1165    // Special case: ABI requires that we use .u8 for predicates
1166    if (ETy->isIntegerTy(1))
1167      O << "u8";
1168    else
1169      O << getPTXFundamentalTypeStr(ETy, false);
1170    O << " ";
1171    getSymbol(GVar)->print(O, MAI);
1172
1173    // Ptx allows variable initilization only for constant and global state
1174    // spaces.
1175    if (GVar->hasInitializer()) {
1176      if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
1177          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) {
1178        const Constant *Initializer = GVar->getInitializer();
1179        // 'undef' is treated as there is no value specified.
1180        if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1181          O << " = ";
1182          printScalarConstant(Initializer, O);
1183        }
1184      } else {
1185        // The frontend adds zero-initializer to device and constant variables
1186        // that don't have an initial value, and UndefValue to shared
1187        // variables, so skip warning for this case.
1188        if (!GVar->getInitializer()->isNullValue() &&
1189            !isa<UndefValue>(GVar->getInitializer())) {
1190          report_fatal_error("initial value of '" + GVar->getName() +
1191                             "' is not allowed in addrspace(" +
1192                             Twine(PTy->getAddressSpace()) + ")");
1193        }
1194      }
1195    }
1196  } else {
1197    unsigned int ElementSize = 0;
1198
1199    // Although PTX has direct support for struct type and array type and
1200    // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1201    // targets that support these high level field accesses. Structs, arrays
1202    // and vectors are lowered into arrays of bytes.
1203    switch (ETy->getTypeID()) {
1204    case Type::StructTyID:
1205    case Type::ArrayTyID:
1206    case Type::VectorTyID:
1207      ElementSize = DL.getTypeStoreSize(ETy);
1208      // Ptx allows variable initilization only for constant and
1209      // global state spaces.
1210      if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
1211           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
1212          GVar->hasInitializer()) {
1213        const Constant *Initializer = GVar->getInitializer();
1214        if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1215          AggBuffer aggBuffer(ElementSize, O, *this);
1216          bufferAggregateConstant(Initializer, &aggBuffer);
1217          if (aggBuffer.numSymbols) {
1218            if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
1219              O << " .u64 ";
1220              getSymbol(GVar)->print(O, MAI);
1221              O << "[";
1222              O << ElementSize / 8;
1223            } else {
1224              O << " .u32 ";
1225              getSymbol(GVar)->print(O, MAI);
1226              O << "[";
1227              O << ElementSize / 4;
1228            }
1229            O << "]";
1230          } else {
1231            O << " .b8 ";
1232            getSymbol(GVar)->print(O, MAI);
1233            O << "[";
1234            O << ElementSize;
1235            O << "]";
1236          }
1237          O << " = {";
1238          aggBuffer.print();
1239          O << "}";
1240        } else {
1241          O << " .b8 ";
1242          getSymbol(GVar)->print(O, MAI);
1243          if (ElementSize) {
1244            O << "[";
1245            O << ElementSize;
1246            O << "]";
1247          }
1248        }
1249      } else {
1250        O << " .b8 ";
1251        getSymbol(GVar)->print(O, MAI);
1252        if (ElementSize) {
1253          O << "[";
1254          O << ElementSize;
1255          O << "]";
1256        }
1257      }
1258      break;
1259    default:
1260      llvm_unreachable("type not supported yet");
1261    }
1262
1263  }
1264  O << ";\n";
1265}
1266
1267void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1268  if (localDecls.find(f) == localDecls.end())
1269    return;
1270
1271  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1272
1273  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
1274    O << "\t// demoted variable\n\t";
1275    printModuleLevelGV(gvars[i], O, true);
1276  }
1277}
1278
1279void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1280                                          raw_ostream &O) const {
1281  switch (AddressSpace) {
1282  case llvm::ADDRESS_SPACE_LOCAL:
1283    O << "local";
1284    break;
1285  case llvm::ADDRESS_SPACE_GLOBAL:
1286    O << "global";
1287    break;
1288  case llvm::ADDRESS_SPACE_CONST:
1289    O << "const";
1290    break;
1291  case llvm::ADDRESS_SPACE_SHARED:
1292    O << "shared";
1293    break;
1294  default:
1295    report_fatal_error("Bad address space found while emitting PTX");
1296    break;
1297  }
1298}
1299
1300std::string
1301NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1302  switch (Ty->getTypeID()) {
1303  default:
1304    llvm_unreachable("unexpected type");
1305    break;
1306  case Type::IntegerTyID: {
1307    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1308    if (NumBits == 1)
1309      return "pred";
1310    else if (NumBits <= 64) {
1311      std::string name = "u";
1312      return name + utostr(NumBits);
1313    } else {
1314      llvm_unreachable("Integer too large");
1315      break;
1316    }
1317    break;
1318  }
1319  case Type::FloatTyID:
1320    return "f32";
1321  case Type::DoubleTyID:
1322    return "f64";
1323  case Type::PointerTyID:
1324    if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
1325      if (useB4PTR)
1326        return "b64";
1327      else
1328        return "u64";
1329    else if (useB4PTR)
1330      return "b32";
1331    else
1332      return "u32";
1333  }
1334  llvm_unreachable("unexpected type");
1335  return nullptr;
1336}
1337
1338void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1339                                            raw_ostream &O) {
1340
1341  const DataLayout &DL = getDataLayout();
1342
1343  // GlobalVariables are always constant pointers themselves.
1344  PointerType *PTy = GVar->getType();
1345  Type *ETy = PTy->getElementType();
1346
1347  O << ".";
1348  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1349  if (GVar->getAlignment() == 0)
1350    O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1351  else
1352    O << " .align " << GVar->getAlignment();
1353
1354  if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
1355    O << " .";
1356    O << getPTXFundamentalTypeStr(ETy);
1357    O << " ";
1358    getSymbol(GVar)->print(O, MAI);
1359    return;
1360  }
1361
1362  int64_t ElementSize = 0;
1363
1364  // Although PTX has direct support for struct type and array type and LLVM IR
1365  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1366  // support these high level field accesses. Structs and arrays are lowered
1367  // into arrays of bytes.
1368  switch (ETy->getTypeID()) {
1369  case Type::StructTyID:
1370  case Type::ArrayTyID:
1371  case Type::VectorTyID:
1372    ElementSize = DL.getTypeStoreSize(ETy);
1373    O << " .b8 ";
1374    getSymbol(GVar)->print(O, MAI);
1375    O << "[";
1376    if (ElementSize) {
1377      O << ElementSize;
1378    }
1379    O << "]";
1380    break;
1381  default:
1382    llvm_unreachable("type not supported yet");
1383  }
1384  return;
1385}
1386
1387static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
1388  if (Ty->isSingleValueType())
1389    return DL.getPrefTypeAlignment(Ty);
1390
1391  auto *ATy = dyn_cast<ArrayType>(Ty);
1392  if (ATy)
1393    return getOpenCLAlignment(DL, ATy->getElementType());
1394
1395  auto *STy = dyn_cast<StructType>(Ty);
1396  if (STy) {
1397    unsigned int alignStruct = 1;
1398    // Go through each element of the struct and find the
1399    // largest alignment.
1400    for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1401      Type *ETy = STy->getElementType(i);
1402      unsigned int align = getOpenCLAlignment(DL, ETy);
1403      if (align > alignStruct)
1404        alignStruct = align;
1405    }
1406    return alignStruct;
1407  }
1408
1409  auto *FTy = dyn_cast<FunctionType>(Ty);
1410  if (FTy)
1411    return DL.getPointerPrefAlignment();
1412  return DL.getPrefTypeAlignment(Ty);
1413}
1414
1415void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1416                                     int paramIndex, raw_ostream &O) {
1417  getSymbol(I->getParent())->print(O, MAI);
1418  O << "_param_" << paramIndex;
1419}
1420
1421void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1422  const DataLayout &DL = getDataLayout();
1423  const AttributeSet &PAL = F->getAttributes();
1424  const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
1425  Function::const_arg_iterator I, E;
1426  unsigned paramIndex = 0;
1427  bool first = true;
1428  bool isKernelFunc = llvm::isKernelFunction(*F);
1429  bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
1430  MVT thePointerTy = TLI->getPointerTy(DL);
1431
1432  O << "(\n";
1433
1434  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1435    Type *Ty = I->getType();
1436
1437    if (!first)
1438      O << ",\n";
1439
1440    first = false;
1441
1442    // Handle image/sampler parameters
1443    if (isKernelFunction(*F)) {
1444      if (isSampler(*I) || isImage(*I)) {
1445        if (isImage(*I)) {
1446          std::string sname = I->getName();
1447          if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1448            if (nvptxSubtarget->hasImageHandles())
1449              O << "\t.param .u64 .ptr .surfref ";
1450            else
1451              O << "\t.param .surfref ";
1452            CurrentFnSym->print(O, MAI);
1453            O << "_param_" << paramIndex;
1454          }
1455          else { // Default image is read_only
1456            if (nvptxSubtarget->hasImageHandles())
1457              O << "\t.param .u64 .ptr .texref ";
1458            else
1459              O << "\t.param .texref ";
1460            CurrentFnSym->print(O, MAI);
1461            O << "_param_" << paramIndex;
1462          }
1463        } else {
1464          if (nvptxSubtarget->hasImageHandles())
1465            O << "\t.param .u64 .ptr .samplerref ";
1466          else
1467            O << "\t.param .samplerref ";
1468          CurrentFnSym->print(O, MAI);
1469          O << "_param_" << paramIndex;
1470        }
1471        continue;
1472      }
1473    }
1474
1475    if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) {
1476      if (Ty->isAggregateType() || Ty->isVectorTy()) {
1477        // Just print .param .align <a> .b8 .param[size];
1478        // <a> = PAL.getparamalignment
1479        // size = typeallocsize of element type
1480        unsigned align = PAL.getParamAlignment(paramIndex + 1);
1481        if (align == 0)
1482          align = DL.getABITypeAlignment(Ty);
1483
1484        unsigned sz = DL.getTypeAllocSize(Ty);
1485        O << "\t.param .align " << align << " .b8 ";
1486        printParamName(I, paramIndex, O);
1487        O << "[" << sz << "]";
1488
1489        continue;
1490      }
1491      // Just a scalar
1492      auto *PTy = dyn_cast<PointerType>(Ty);
1493      if (isKernelFunc) {
1494        if (PTy) {
1495          // Special handling for pointer arguments to kernel
1496          O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1497
1498          if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1499              NVPTX::CUDA) {
1500            Type *ETy = PTy->getElementType();
1501            int addrSpace = PTy->getAddressSpace();
1502            switch (addrSpace) {
1503            default:
1504              O << ".ptr ";
1505              break;
1506            case llvm::ADDRESS_SPACE_CONST:
1507              O << ".ptr .const ";
1508              break;
1509            case llvm::ADDRESS_SPACE_SHARED:
1510              O << ".ptr .shared ";
1511              break;
1512            case llvm::ADDRESS_SPACE_GLOBAL:
1513              O << ".ptr .global ";
1514              break;
1515            }
1516            O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
1517          }
1518          printParamName(I, paramIndex, O);
1519          continue;
1520        }
1521
1522        // non-pointer scalar to kernel func
1523        O << "\t.param .";
1524        // Special case: predicate operands become .u8 types
1525        if (Ty->isIntegerTy(1))
1526          O << "u8";
1527        else
1528          O << getPTXFundamentalTypeStr(Ty);
1529        O << " ";
1530        printParamName(I, paramIndex, O);
1531        continue;
1532      }
1533      // Non-kernel function, just print .param .b<size> for ABI
1534      // and .reg .b<size> for non-ABI
1535      unsigned sz = 0;
1536      if (isa<IntegerType>(Ty)) {
1537        sz = cast<IntegerType>(Ty)->getBitWidth();
1538        if (sz < 32)
1539          sz = 32;
1540      } else if (isa<PointerType>(Ty))
1541        sz = thePointerTy.getSizeInBits();
1542      else
1543        sz = Ty->getPrimitiveSizeInBits();
1544      if (isABI)
1545        O << "\t.param .b" << sz << " ";
1546      else
1547        O << "\t.reg .b" << sz << " ";
1548      printParamName(I, paramIndex, O);
1549      continue;
1550    }
1551
1552    // param has byVal attribute. So should be a pointer
1553    auto *PTy = dyn_cast<PointerType>(Ty);
1554    assert(PTy && "Param with byval attribute should be a pointer type");
1555    Type *ETy = PTy->getElementType();
1556
1557    if (isABI || isKernelFunc) {
1558      // Just print .param .align <a> .b8 .param[size];
1559      // <a> = PAL.getparamalignment
1560      // size = typeallocsize of element type
1561      unsigned align = PAL.getParamAlignment(paramIndex + 1);
1562      if (align == 0)
1563        align = DL.getABITypeAlignment(ETy);
1564
1565      unsigned sz = DL.getTypeAllocSize(ETy);
1566      O << "\t.param .align " << align << " .b8 ";
1567      printParamName(I, paramIndex, O);
1568      O << "[" << sz << "]";
1569      continue;
1570    } else {
1571      // Split the ETy into constituent parts and
1572      // print .param .b<size> <name> for each part.
1573      // Further, if a part is vector, print the above for
1574      // each vector element.
1575      SmallVector<EVT, 16> vtparts;
1576      ComputeValueVTs(*TLI, DL, ETy, vtparts);
1577      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1578        unsigned elems = 1;
1579        EVT elemtype = vtparts[i];
1580        if (vtparts[i].isVector()) {
1581          elems = vtparts[i].getVectorNumElements();
1582          elemtype = vtparts[i].getVectorElementType();
1583        }
1584
1585        for (unsigned j = 0, je = elems; j != je; ++j) {
1586          unsigned sz = elemtype.getSizeInBits();
1587          if (elemtype.isInteger() && (sz < 32))
1588            sz = 32;
1589          O << "\t.reg .b" << sz << " ";
1590          printParamName(I, paramIndex, O);
1591          if (j < je - 1)
1592            O << ",\n";
1593          ++paramIndex;
1594        }
1595        if (i < e - 1)
1596          O << ",\n";
1597      }
1598      --paramIndex;
1599      continue;
1600    }
1601  }
1602
1603  O << "\n)\n";
1604}
1605
1606void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1607                                            raw_ostream &O) {
1608  const Function *F = MF.getFunction();
1609  emitFunctionParamList(F, O);
1610}
1611
1612void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1613    const MachineFunction &MF) {
1614  SmallString<128> Str;
1615  raw_svector_ostream O(Str);
1616
1617  // Map the global virtual register number to a register class specific
1618  // virtual register number starting from 1 with that class.
1619  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1620  //unsigned numRegClasses = TRI->getNumRegClasses();
1621
1622  // Emit the Fake Stack Object
1623  const MachineFrameInfo *MFI = MF.getFrameInfo();
1624  int NumBytes = (int) MFI->getStackSize();
1625  if (NumBytes) {
1626    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
1627      << getFunctionNumber() << "[" << NumBytes << "];\n";
1628    if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1629      O << "\t.reg .b64 \t%SP;\n";
1630      O << "\t.reg .b64 \t%SPL;\n";
1631    } else {
1632      O << "\t.reg .b32 \t%SP;\n";
1633      O << "\t.reg .b32 \t%SPL;\n";
1634    }
1635  }
1636
1637  // Go through all virtual registers to establish the mapping between the
1638  // global virtual
1639  // register number and the per class virtual register number.
1640  // We use the per class virtual register number in the ptx output.
1641  unsigned int numVRs = MRI->getNumVirtRegs();
1642  for (unsigned i = 0; i < numVRs; i++) {
1643    unsigned int vr = TRI->index2VirtReg(i);
1644    const TargetRegisterClass *RC = MRI->getRegClass(vr);
1645    DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1646    int n = regmap.size();
1647    regmap.insert(std::make_pair(vr, n + 1));
1648  }
1649
1650  // Emit register declarations
1651  // @TODO: Extract out the real register usage
1652  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1653  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1654  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1655  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1656  // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1657  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1658  // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1659
1660  // Emit declaration of the virtual registers or 'physical' registers for
1661  // each register class
1662  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1663    const TargetRegisterClass *RC = TRI->getRegClass(i);
1664    DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1665    std::string rcname = getNVPTXRegClassName(RC);
1666    std::string rcStr = getNVPTXRegClassStr(RC);
1667    int n = regmap.size();
1668
1669    // Only declare those registers that may be used.
1670    if (n) {
1671       O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1672         << ">;\n";
1673    }
1674  }
1675
1676  OutStreamer->EmitRawText(O.str());
1677}
1678
1679void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1680  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1681  bool ignored;
1682  unsigned int numHex;
1683  const char *lead;
1684
1685  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1686    numHex = 8;
1687    lead = "0f";
1688    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
1689  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1690    numHex = 16;
1691    lead = "0d";
1692    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
1693  } else
1694    llvm_unreachable("unsupported fp type");
1695
1696  APInt API = APF.bitcastToAPInt();
1697  std::string hexstr(utohexstr(API.getZExtValue()));
1698  O << lead;
1699  if (hexstr.length() < numHex)
1700    O << std::string(numHex - hexstr.length(), '0');
1701  O << utohexstr(API.getZExtValue());
1702}
1703
1704void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1705  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1706    O << CI->getValue();
1707    return;
1708  }
1709  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1710    printFPConstant(CFP, O);
1711    return;
1712  }
1713  if (isa<ConstantPointerNull>(CPV)) {
1714    O << "0";
1715    return;
1716  }
1717  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1718    PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
1719    bool IsNonGenericPointer = false;
1720    if (PTy && PTy->getAddressSpace() != 0) {
1721      IsNonGenericPointer = true;
1722    }
1723    if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1724      O << "generic(";
1725      getSymbol(GVar)->print(O, MAI);
1726      O << ")";
1727    } else {
1728      getSymbol(GVar)->print(O, MAI);
1729    }
1730    return;
1731  }
1732  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1733    const Value *v = Cexpr->stripPointerCasts();
1734    PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
1735    bool IsNonGenericPointer = false;
1736    if (PTy && PTy->getAddressSpace() != 0) {
1737      IsNonGenericPointer = true;
1738    }
1739    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1740      if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
1741        O << "generic(";
1742        getSymbol(GVar)->print(O, MAI);
1743        O << ")";
1744      } else {
1745        getSymbol(GVar)->print(O, MAI);
1746      }
1747      return;
1748    } else {
1749      lowerConstant(CPV)->print(O, MAI);
1750      return;
1751    }
1752  }
1753  llvm_unreachable("Not scalar type found in printScalarConstant()");
1754}
1755
1756// These utility functions assure we get the right sequence of bytes for a given
1757// type even for big-endian machines
1758template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
1759  int64_t vp = (int64_t)val;
1760  for (unsigned i = 0; i < sizeof(T); ++i) {
1761    p[i] = (unsigned char)vp;
1762    vp >>= 8;
1763  }
1764}
1765static void ConvertFloatToBytes(unsigned char *p, float val) {
1766  int32_t *vp = (int32_t *)&val;
1767  for (unsigned i = 0; i < sizeof(int32_t); ++i) {
1768    p[i] = (unsigned char)*vp;
1769    *vp >>= 8;
1770  }
1771}
1772static void ConvertDoubleToBytes(unsigned char *p, double val) {
1773  int64_t *vp = (int64_t *)&val;
1774  for (unsigned i = 0; i < sizeof(int64_t); ++i) {
1775    p[i] = (unsigned char)*vp;
1776    *vp >>= 8;
1777  }
1778}
1779
1780void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1781                                   AggBuffer *aggBuffer) {
1782
1783  const DataLayout &DL = getDataLayout();
1784
1785  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1786    int s = DL.getTypeAllocSize(CPV->getType());
1787    if (s < Bytes)
1788      s = Bytes;
1789    aggBuffer->addZeros(s);
1790    return;
1791  }
1792
1793  unsigned char ptr[8];
1794  switch (CPV->getType()->getTypeID()) {
1795
1796  case Type::IntegerTyID: {
1797    Type *ETy = CPV->getType();
1798    if (ETy == Type::getInt8Ty(CPV->getContext())) {
1799      unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
1800      ConvertIntToBytes<>(ptr, c);
1801      aggBuffer->addBytes(ptr, 1, Bytes);
1802    } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
1803      short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
1804      ConvertIntToBytes<>(ptr, int16);
1805      aggBuffer->addBytes(ptr, 2, Bytes);
1806    } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
1807      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1808        int int32 = (int)(constInt->getZExtValue());
1809        ConvertIntToBytes<>(ptr, int32);
1810        aggBuffer->addBytes(ptr, 4, Bytes);
1811        break;
1812      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1813        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
1814                ConstantFoldConstantExpression(Cexpr, DL))) {
1815          int int32 = (int)(constInt->getZExtValue());
1816          ConvertIntToBytes<>(ptr, int32);
1817          aggBuffer->addBytes(ptr, 4, Bytes);
1818          break;
1819        }
1820        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1821          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1822          aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1823          aggBuffer->addZeros(4);
1824          break;
1825        }
1826      }
1827      llvm_unreachable("unsupported integer const type");
1828    } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
1829      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1830        long long int64 = (long long)(constInt->getZExtValue());
1831        ConvertIntToBytes<>(ptr, int64);
1832        aggBuffer->addBytes(ptr, 8, Bytes);
1833        break;
1834      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1835        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
1836                ConstantFoldConstantExpression(Cexpr, DL))) {
1837          long long int64 = (long long)(constInt->getZExtValue());
1838          ConvertIntToBytes<>(ptr, int64);
1839          aggBuffer->addBytes(ptr, 8, Bytes);
1840          break;
1841        }
1842        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1843          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1844          aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1845          aggBuffer->addZeros(8);
1846          break;
1847        }
1848      }
1849      llvm_unreachable("unsupported integer const type");
1850    } else
1851      llvm_unreachable("unsupported integer const type");
1852    break;
1853  }
1854  case Type::FloatTyID:
1855  case Type::DoubleTyID: {
1856    const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
1857    Type *Ty = CFP->getType();
1858    if (Ty == Type::getFloatTy(CPV->getContext())) {
1859      float float32 = (float) CFP->getValueAPF().convertToFloat();
1860      ConvertFloatToBytes(ptr, float32);
1861      aggBuffer->addBytes(ptr, 4, Bytes);
1862    } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
1863      double float64 = CFP->getValueAPF().convertToDouble();
1864      ConvertDoubleToBytes(ptr, float64);
1865      aggBuffer->addBytes(ptr, 8, Bytes);
1866    } else {
1867      llvm_unreachable("unsupported fp const type");
1868    }
1869    break;
1870  }
1871  case Type::PointerTyID: {
1872    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1873      aggBuffer->addSymbol(GVar, GVar);
1874    } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1875      const Value *v = Cexpr->stripPointerCasts();
1876      aggBuffer->addSymbol(v, Cexpr);
1877    }
1878    unsigned int s = DL.getTypeAllocSize(CPV->getType());
1879    aggBuffer->addZeros(s);
1880    break;
1881  }
1882
1883  case Type::ArrayTyID:
1884  case Type::VectorTyID:
1885  case Type::StructTyID: {
1886    if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
1887        isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
1888      int ElementSize = DL.getTypeAllocSize(CPV->getType());
1889      bufferAggregateConstant(CPV, aggBuffer);
1890      if (Bytes > ElementSize)
1891        aggBuffer->addZeros(Bytes - ElementSize);
1892    } else if (isa<ConstantAggregateZero>(CPV))
1893      aggBuffer->addZeros(Bytes);
1894    else
1895      llvm_unreachable("Unexpected Constant type");
1896    break;
1897  }
1898
1899  default:
1900    llvm_unreachable("unsupported type");
1901  }
1902}
1903
1904void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1905                                              AggBuffer *aggBuffer) {
1906  const DataLayout &DL = getDataLayout();
1907  int Bytes;
1908
1909  // Old constants
1910  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1911    if (CPV->getNumOperands())
1912      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1913        bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1914    return;
1915  }
1916
1917  if (const ConstantDataSequential *CDS =
1918          dyn_cast<ConstantDataSequential>(CPV)) {
1919    if (CDS->getNumElements())
1920      for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1921        bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1922                     aggBuffer);
1923    return;
1924  }
1925
1926  if (isa<ConstantStruct>(CPV)) {
1927    if (CPV->getNumOperands()) {
1928      StructType *ST = cast<StructType>(CPV->getType());
1929      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1930        if (i == (e - 1))
1931          Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1932                  DL.getTypeAllocSize(ST) -
1933                  DL.getStructLayout(ST)->getElementOffset(i);
1934        else
1935          Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1936                  DL.getStructLayout(ST)->getElementOffset(i);
1937        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1938      }
1939    }
1940    return;
1941  }
1942  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1943}
1944
1945// buildTypeNameMap - Run through symbol table looking for type names.
1946//
1947
1948
1949bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
1950  switch (MI.getOpcode()) {
1951  default:
1952    return false;
1953  case NVPTX::CallArgBeginInst:
1954  case NVPTX::CallArgEndInst0:
1955  case NVPTX::CallArgEndInst1:
1956  case NVPTX::CallArgF32:
1957  case NVPTX::CallArgF64:
1958  case NVPTX::CallArgI16:
1959  case NVPTX::CallArgI32:
1960  case NVPTX::CallArgI32imm:
1961  case NVPTX::CallArgI64:
1962  case NVPTX::CallArgParam:
1963  case NVPTX::CallVoidInst:
1964  case NVPTX::CallVoidInstReg:
1965  case NVPTX::Callseq_End:
1966  case NVPTX::CallVoidInstReg64:
1967  case NVPTX::DeclareParamInst:
1968  case NVPTX::DeclareRetMemInst:
1969  case NVPTX::DeclareRetRegInst:
1970  case NVPTX::DeclareRetScalarInst:
1971  case NVPTX::DeclareScalarParamInst:
1972  case NVPTX::DeclareScalarRegInst:
1973  case NVPTX::StoreParamF32:
1974  case NVPTX::StoreParamF64:
1975  case NVPTX::StoreParamI16:
1976  case NVPTX::StoreParamI32:
1977  case NVPTX::StoreParamI64:
1978  case NVPTX::StoreParamI8:
1979  case NVPTX::StoreRetvalF32:
1980  case NVPTX::StoreRetvalF64:
1981  case NVPTX::StoreRetvalI16:
1982  case NVPTX::StoreRetvalI32:
1983  case NVPTX::StoreRetvalI64:
1984  case NVPTX::StoreRetvalI8:
1985  case NVPTX::LastCallArgF32:
1986  case NVPTX::LastCallArgF64:
1987  case NVPTX::LastCallArgI16:
1988  case NVPTX::LastCallArgI32:
1989  case NVPTX::LastCallArgI32imm:
1990  case NVPTX::LastCallArgI64:
1991  case NVPTX::LastCallArgParam:
1992  case NVPTX::LoadParamMemF32:
1993  case NVPTX::LoadParamMemF64:
1994  case NVPTX::LoadParamMemI16:
1995  case NVPTX::LoadParamMemI32:
1996  case NVPTX::LoadParamMemI64:
1997  case NVPTX::LoadParamMemI8:
1998  case NVPTX::PrototypeInst:
1999  case NVPTX::DBG_VALUE:
2000    return true;
2001  }
2002  return false;
2003}
2004
2005/// lowerConstantForGV - Return an MCExpr for the given Constant.  This is mostly
2006/// a copy from AsmPrinter::lowerConstant, except customized to only handle
2007/// expressions that are representable in PTX and create
2008/// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
2009const MCExpr *
2010NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
2011  MCContext &Ctx = OutContext;
2012
2013  if (CV->isNullValue() || isa<UndefValue>(CV))
2014    return MCConstantExpr::create(0, Ctx);
2015
2016  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
2017    return MCConstantExpr::create(CI->getZExtValue(), Ctx);
2018
2019  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
2020    const MCSymbolRefExpr *Expr =
2021      MCSymbolRefExpr::create(getSymbol(GV), Ctx);
2022    if (ProcessingGeneric) {
2023      return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
2024    } else {
2025      return Expr;
2026    }
2027  }
2028
2029  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
2030  if (!CE) {
2031    llvm_unreachable("Unknown constant value to lower!");
2032  }
2033
2034  switch (CE->getOpcode()) {
2035  default:
2036    // If the code isn't optimized, there may be outstanding folding
2037    // opportunities. Attempt to fold the expression using DataLayout as a
2038    // last resort before giving up.
2039    if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
2040      if (C != CE)
2041        return lowerConstantForGV(C, ProcessingGeneric);
2042
2043    // Otherwise report the problem to the user.
2044    {
2045      std::string S;
2046      raw_string_ostream OS(S);
2047      OS << "Unsupported expression in static initializer: ";
2048      CE->printAsOperand(OS, /*PrintType=*/false,
2049                     !MF ? nullptr : MF->getFunction()->getParent());
2050      report_fatal_error(OS.str());
2051    }
2052
2053  case Instruction::AddrSpaceCast: {
2054    // Strip the addrspacecast and pass along the operand
2055    PointerType *DstTy = cast<PointerType>(CE->getType());
2056    if (DstTy->getAddressSpace() == 0) {
2057      return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
2058    }
2059    std::string S;
2060    raw_string_ostream OS(S);
2061    OS << "Unsupported expression in static initializer: ";
2062    CE->printAsOperand(OS, /*PrintType=*/ false,
2063                       !MF ? 0 : MF->getFunction()->getParent());
2064    report_fatal_error(OS.str());
2065  }
2066
2067  case Instruction::GetElementPtr: {
2068    const DataLayout &DL = getDataLayout();
2069
2070    // Generate a symbolic expression for the byte address
2071    APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2072    cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2073
2074    const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2075                                            ProcessingGeneric);
2076    if (!OffsetAI)
2077      return Base;
2078
2079    int64_t Offset = OffsetAI.getSExtValue();
2080    return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
2081                                   Ctx);
2082  }
2083
2084  case Instruction::Trunc:
2085    // We emit the value and depend on the assembler to truncate the generated
2086    // expression properly.  This is important for differences between
2087    // blockaddress labels.  Since the two labels are in the same function, it
2088    // is reasonable to treat their delta as a 32-bit value.
2089    // FALL THROUGH.
2090  case Instruction::BitCast:
2091    return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2092
2093  case Instruction::IntToPtr: {
2094    const DataLayout &DL = getDataLayout();
2095
2096    // Handle casts to pointers by changing them into casts to the appropriate
2097    // integer type.  This promotes constant folding and simplifies this code.
2098    Constant *Op = CE->getOperand(0);
2099    Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
2100                                      false/*ZExt*/);
2101    return lowerConstantForGV(Op, ProcessingGeneric);
2102  }
2103
2104  case Instruction::PtrToInt: {
2105    const DataLayout &DL = getDataLayout();
2106
2107    // Support only foldable casts to/from pointers that can be eliminated by
2108    // changing the pointer to the appropriately sized integer type.
2109    Constant *Op = CE->getOperand(0);
2110    Type *Ty = CE->getType();
2111
2112    const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2113
2114    // We can emit the pointer value into this slot if the slot is an
2115    // integer slot equal to the size of the pointer.
2116    if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2117      return OpExpr;
2118
2119    // Otherwise the pointer is smaller than the resultant integer, mask off
2120    // the high bits so we are sure to get a proper truncation if the input is
2121    // a constant expr.
2122    unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2123    const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2124    return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2125  }
2126
2127  // The MC library also has a right-shift operator, but it isn't consistently
2128  // signed or unsigned between different targets.
2129  case Instruction::Add: {
2130    const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2131    const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2132    switch (CE->getOpcode()) {
2133    default: llvm_unreachable("Unknown binary operator constant cast expr");
2134    case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2135    }
2136  }
2137  }
2138}
2139
2140// Copy of MCExpr::print customized for NVPTX
2141void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2142  switch (Expr.getKind()) {
2143  case MCExpr::Target:
2144    return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2145  case MCExpr::Constant:
2146    OS << cast<MCConstantExpr>(Expr).getValue();
2147    return;
2148
2149  case MCExpr::SymbolRef: {
2150    const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2151    const MCSymbol &Sym = SRE.getSymbol();
2152    Sym.print(OS, MAI);
2153    return;
2154  }
2155
2156  case MCExpr::Unary: {
2157    const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2158    switch (UE.getOpcode()) {
2159    case MCUnaryExpr::LNot:  OS << '!'; break;
2160    case MCUnaryExpr::Minus: OS << '-'; break;
2161    case MCUnaryExpr::Not:   OS << '~'; break;
2162    case MCUnaryExpr::Plus:  OS << '+'; break;
2163    }
2164    printMCExpr(*UE.getSubExpr(), OS);
2165    return;
2166  }
2167
2168  case MCExpr::Binary: {
2169    const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2170
2171    // Only print parens around the LHS if it is non-trivial.
2172    if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2173        isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2174      printMCExpr(*BE.getLHS(), OS);
2175    } else {
2176      OS << '(';
2177      printMCExpr(*BE.getLHS(), OS);
2178      OS<< ')';
2179    }
2180
2181    switch (BE.getOpcode()) {
2182    case MCBinaryExpr::Add:
2183      // Print "X-42" instead of "X+-42".
2184      if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2185        if (RHSC->getValue() < 0) {
2186          OS << RHSC->getValue();
2187          return;
2188        }
2189      }
2190
2191      OS <<  '+';
2192      break;
2193    default: llvm_unreachable("Unhandled binary operator");
2194    }
2195
2196    // Only print parens around the LHS if it is non-trivial.
2197    if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2198      printMCExpr(*BE.getRHS(), OS);
2199    } else {
2200      OS << '(';
2201      printMCExpr(*BE.getRHS(), OS);
2202      OS << ')';
2203    }
2204    return;
2205  }
2206  }
2207
2208  llvm_unreachable("Invalid expression kind!");
2209}
2210
2211/// PrintAsmOperand - Print out an operand for an inline asm expression.
2212///
2213bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2214                                      unsigned AsmVariant,
2215                                      const char *ExtraCode, raw_ostream &O) {
2216  if (ExtraCode && ExtraCode[0]) {
2217    if (ExtraCode[1] != 0)
2218      return true; // Unknown modifier.
2219
2220    switch (ExtraCode[0]) {
2221    default:
2222      // See if this is a generic print operand
2223      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
2224    case 'r':
2225      break;
2226    }
2227  }
2228
2229  printOperand(MI, OpNo, O);
2230
2231  return false;
2232}
2233
2234bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
2235    const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
2236    const char *ExtraCode, raw_ostream &O) {
2237  if (ExtraCode && ExtraCode[0])
2238    return true; // Unknown modifier
2239
2240  O << '[';
2241  printMemOperand(MI, OpNo, O);
2242  O << ']';
2243
2244  return false;
2245}
2246
2247void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2248                                   raw_ostream &O, const char *Modifier) {
2249  const MachineOperand &MO = MI->getOperand(opNum);
2250  switch (MO.getType()) {
2251  case MachineOperand::MO_Register:
2252    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
2253      if (MO.getReg() == NVPTX::VRDepot)
2254        O << DEPOTNAME << getFunctionNumber();
2255      else
2256        O << NVPTXInstPrinter::getRegisterName(MO.getReg());
2257    } else {
2258      emitVirtualRegister(MO.getReg(), O);
2259    }
2260    return;
2261
2262  case MachineOperand::MO_Immediate:
2263    if (!Modifier)
2264      O << MO.getImm();
2265    else if (strstr(Modifier, "vec") == Modifier)
2266      printVecModifiedImmediate(MO, Modifier, O);
2267    else
2268      llvm_unreachable(
2269          "Don't know how to handle modifier on immediate operand");
2270    return;
2271
2272  case MachineOperand::MO_FPImmediate:
2273    printFPConstant(MO.getFPImm(), O);
2274    break;
2275
2276  case MachineOperand::MO_GlobalAddress:
2277    getSymbol(MO.getGlobal())->print(O, MAI);
2278    break;
2279
2280  case MachineOperand::MO_MachineBasicBlock:
2281    MO.getMBB()->getSymbol()->print(O, MAI);
2282    return;
2283
2284  default:
2285    llvm_unreachable("Operand type not supported.");
2286  }
2287}
2288
2289void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2290                                      raw_ostream &O, const char *Modifier) {
2291  printOperand(MI, opNum, O);
2292
2293  if (Modifier && !strcmp(Modifier, "add")) {
2294    O << ", ";
2295    printOperand(MI, opNum + 1, O);
2296  } else {
2297    if (MI->getOperand(opNum + 1).isImm() &&
2298        MI->getOperand(opNum + 1).getImm() == 0)
2299      return; // don't print ',0' or '+0'
2300    O << "+";
2301    printOperand(MI, opNum + 1, O);
2302  }
2303}
2304
2305void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
2306  std::stringstream temp;
2307  LineReader *reader = this->getReader(filename);
2308  temp << "\n//";
2309  temp << filename.str();
2310  temp << ":";
2311  temp << line;
2312  temp << " ";
2313  temp << reader->readLine(line);
2314  temp << "\n";
2315  this->OutStreamer->EmitRawText(temp.str());
2316}
2317
2318LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
2319  if (!reader) {
2320    reader = new LineReader(filename);
2321  }
2322
2323  if (reader->fileName() != filename) {
2324    delete reader;
2325    reader = new LineReader(filename);
2326  }
2327
2328  return reader;
2329}
2330
2331std::string LineReader::readLine(unsigned lineNum) {
2332  if (lineNum < theCurLine) {
2333    theCurLine = 0;
2334    fstr.seekg(0, std::ios::beg);
2335  }
2336  while (theCurLine < lineNum) {
2337    fstr.getline(buff, 500);
2338    theCurLine++;
2339  }
2340  return buff;
2341}
2342
2343// Force static initialization.
2344extern "C" void LLVMInitializeNVPTXAsmPrinter() {
2345  RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
2346  RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
2347}
2348