1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a printer that converts from our internal representation
11// of machine-dependent LLVM code to NVPTX assembly language.
12//
13//===----------------------------------------------------------------------===//
14
15#include "NVPTXAsmPrinter.h"
16#include "InstPrinter/NVPTXInstPrinter.h"
17#include "MCTargetDesc/NVPTXMCAsmInfo.h"
18#include "NVPTX.h"
19#include "NVPTXInstrInfo.h"
20#include "NVPTXMCExpr.h"
21#include "NVPTXMachineFunctionInfo.h"
22#include "NVPTXRegisterInfo.h"
23#include "NVPTXTargetMachine.h"
24#include "NVPTXUtilities.h"
25#include "cl_common_defines.h"
26#include "llvm/ADT/StringExtras.h"
27#include "llvm/Analysis/ConstantFolding.h"
28#include "llvm/CodeGen/Analysis.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineLoopInfo.h"
31#include "llvm/CodeGen/MachineModuleInfo.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/IR/DebugInfo.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/GlobalVariable.h"
37#include "llvm/IR/Mangler.h"
38#include "llvm/IR/Module.h"
39#include "llvm/IR/Operator.h"
40#include "llvm/MC/MCStreamer.h"
41#include "llvm/MC/MCSymbol.h"
42#include "llvm/Support/CommandLine.h"
43#include "llvm/Support/ErrorHandling.h"
44#include "llvm/Support/FormattedStream.h"
45#include "llvm/Support/Path.h"
46#include "llvm/Support/TargetRegistry.h"
47#include "llvm/Support/TimeValue.h"
48#include "llvm/Target/TargetLoweringObjectFile.h"
49#include "llvm/Transforms/Utils/UnrollLoop.h"
50#include <sstream>
51using namespace llvm;
52
53#define DEPOTNAME "__local_depot"
54
55static cl::opt<bool>
56EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden,
57                cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
58                cl::init(true));
59
60static cl::opt<bool>
61InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden,
62              cl::desc("NVPTX Specific: Emit source line in ptx file"),
63              cl::init(false));
64
65namespace {
66/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
67/// depends.
68void DiscoverDependentGlobals(const Value *V,
69                              DenseSet<const GlobalVariable *> &Globals) {
70  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
71    Globals.insert(GV);
72  else {
73    if (const User *U = dyn_cast<User>(V)) {
74      for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
75        DiscoverDependentGlobals(U->getOperand(i), Globals);
76      }
77    }
78  }
79}
80
81/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
82/// instances to be emitted, but only after any dependents have been added
83/// first.
84void VisitGlobalVariableForEmission(
85    const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
86    DenseSet<const GlobalVariable *> &Visited,
87    DenseSet<const GlobalVariable *> &Visiting) {
88  // Have we already visited this one?
89  if (Visited.count(GV))
90    return;
91
92  // Do we have a circular dependency?
93  if (!Visiting.insert(GV).second)
94    report_fatal_error("Circular dependency found in global variable set");
95
96  // Make sure we visit all dependents first
97  DenseSet<const GlobalVariable *> Others;
98  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
99    DiscoverDependentGlobals(GV->getOperand(i), Others);
100
101  for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
102                                                  E = Others.end();
103       I != E; ++I)
104    VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
105
106  // Now we can visit ourself
107  Order.push_back(GV);
108  Visited.insert(GV);
109  Visiting.erase(GV);
110}
111}
112
113void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
114  if (!EmitLineNumbers)
115    return;
116  if (ignoreLoc(MI))
117    return;
118
119  DebugLoc curLoc = MI.getDebugLoc();
120
121  if (!prevDebugLoc && !curLoc)
122    return;
123
124  if (prevDebugLoc == curLoc)
125    return;
126
127  prevDebugLoc = curLoc;
128
129  if (!curLoc)
130    return;
131
132  auto *Scope = cast_or_null<MDScope>(curLoc.getScope());
133  if (!Scope)
134     return;
135
136  StringRef fileName(Scope->getFilename());
137  StringRef dirName(Scope->getDirectory());
138  SmallString<128> FullPathName = dirName;
139  if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
140    sys::path::append(FullPathName, fileName);
141    fileName = FullPathName;
142  }
143
144  if (filenameMap.find(fileName) == filenameMap.end())
145    return;
146
147  // Emit the line from the source file.
148  if (InterleaveSrc)
149    this->emitSrcInText(fileName, curLoc.getLine());
150
151  std::stringstream temp;
152  temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine()
153       << " " << curLoc.getCol();
154  OutStreamer.EmitRawText(temp.str());
155}
156
157void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
158  SmallString<128> Str;
159  raw_svector_ostream OS(Str);
160  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA)
161    emitLineNumberAsDotLoc(*MI);
162
163  MCInst Inst;
164  lowerToMCInst(MI, Inst);
165  EmitToStreamer(OutStreamer, Inst);
166}
167
168// Handle symbol backtracking for targets that do not support image handles
169bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
170                                           unsigned OpNo, MCOperand &MCOp) {
171  const MachineOperand &MO = MI->getOperand(OpNo);
172  const MCInstrDesc &MCID = MI->getDesc();
173
174  if (MCID.TSFlags & NVPTXII::IsTexFlag) {
175    // This is a texture fetch, so operand 4 is a texref and operand 5 is
176    // a samplerref
177    if (OpNo == 4 && MO.isImm()) {
178      lowerImageHandleSymbol(MO.getImm(), MCOp);
179      return true;
180    }
181    if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
182      lowerImageHandleSymbol(MO.getImm(), MCOp);
183      return true;
184    }
185
186    return false;
187  } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
188    unsigned VecSize =
189      1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
190
191    // For a surface load of vector size N, the Nth operand will be the surfref
192    if (OpNo == VecSize && MO.isImm()) {
193      lowerImageHandleSymbol(MO.getImm(), MCOp);
194      return true;
195    }
196
197    return false;
198  } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
199    // This is a surface store, so operand 0 is a surfref
200    if (OpNo == 0 && MO.isImm()) {
201      lowerImageHandleSymbol(MO.getImm(), MCOp);
202      return true;
203    }
204
205    return false;
206  } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
207    // This is a query, so operand 1 is a surfref/texref
208    if (OpNo == 1 && MO.isImm()) {
209      lowerImageHandleSymbol(MO.getImm(), MCOp);
210      return true;
211    }
212
213    return false;
214  }
215
216  return false;
217}
218
219void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
220  // Ewwww
221  TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
222  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
223  const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
224  const char *Sym = MFI->getImageHandleSymbol(Index);
225  std::string *SymNamePtr =
226    nvTM.getManagedStrPool()->getManagedString(Sym);
227  MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol(
228    StringRef(SymNamePtr->c_str())));
229}
230
231void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
232  OutMI.setOpcode(MI->getOpcode());
233  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
234  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
235    const MachineOperand &MO = MI->getOperand(0);
236    OutMI.addOperand(GetSymbolRef(
237      OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
238    return;
239  }
240
241  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
242    const MachineOperand &MO = MI->getOperand(i);
243
244    MCOperand MCOp;
245    if (!nvptxSubtarget->hasImageHandles()) {
246      if (lowerImageHandleOperand(MI, i, MCOp)) {
247        OutMI.addOperand(MCOp);
248        continue;
249      }
250    }
251
252    if (lowerOperand(MO, MCOp))
253      OutMI.addOperand(MCOp);
254  }
255}
256
257bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
258                                   MCOperand &MCOp) {
259  switch (MO.getType()) {
260  default: llvm_unreachable("unknown operand type");
261  case MachineOperand::MO_Register:
262    MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg()));
263    break;
264  case MachineOperand::MO_Immediate:
265    MCOp = MCOperand::CreateImm(MO.getImm());
266    break;
267  case MachineOperand::MO_MachineBasicBlock:
268    MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
269        MO.getMBB()->getSymbol(), OutContext));
270    break;
271  case MachineOperand::MO_ExternalSymbol:
272    MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
273    break;
274  case MachineOperand::MO_GlobalAddress:
275    MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
276    break;
277  case MachineOperand::MO_FPImmediate: {
278    const ConstantFP *Cnt = MO.getFPImm();
279    APFloat Val = Cnt->getValueAPF();
280
281    switch (Cnt->getType()->getTypeID()) {
282    default: report_fatal_error("Unsupported FP type"); break;
283    case Type::FloatTyID:
284      MCOp = MCOperand::CreateExpr(
285        NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext));
286      break;
287    case Type::DoubleTyID:
288      MCOp = MCOperand::CreateExpr(
289        NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext));
290      break;
291    }
292    break;
293  }
294  }
295  return true;
296}
297
298unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
299  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
300    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
301
302    DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
303    unsigned RegNum = RegMap[Reg];
304
305    // Encode the register class in the upper 4 bits
306    // Must be kept in sync with NVPTXInstPrinter::printRegName
307    unsigned Ret = 0;
308    if (RC == &NVPTX::Int1RegsRegClass) {
309      Ret = (1 << 28);
310    } else if (RC == &NVPTX::Int16RegsRegClass) {
311      Ret = (2 << 28);
312    } else if (RC == &NVPTX::Int32RegsRegClass) {
313      Ret = (3 << 28);
314    } else if (RC == &NVPTX::Int64RegsRegClass) {
315      Ret = (4 << 28);
316    } else if (RC == &NVPTX::Float32RegsRegClass) {
317      Ret = (5 << 28);
318    } else if (RC == &NVPTX::Float64RegsRegClass) {
319      Ret = (6 << 28);
320    } else {
321      report_fatal_error("Bad register class");
322    }
323
324    // Insert the vreg number
325    Ret |= (RegNum & 0x0FFFFFFF);
326    return Ret;
327  } else {
328    // Some special-use registers are actually physical registers.
329    // Encode this as the register class ID of 0 and the real register ID.
330    return Reg & 0x0FFFFFFF;
331  }
332}
333
334MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
335  const MCExpr *Expr;
336  Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
337                                 OutContext);
338  return MCOperand::CreateExpr(Expr);
339}
340
341void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
342  const DataLayout *TD = TM.getDataLayout();
343  const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
344
345  Type *Ty = F->getReturnType();
346
347  bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
348
349  if (Ty->getTypeID() == Type::VoidTyID)
350    return;
351
352  O << " (";
353
354  if (isABI) {
355    if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
356      unsigned size = 0;
357      if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
358        size = ITy->getBitWidth();
359        if (size < 32)
360          size = 32;
361      } else {
362        assert(Ty->isFloatingPointTy() && "Floating point type expected here");
363        size = Ty->getPrimitiveSizeInBits();
364      }
365
366      O << ".param .b" << size << " func_retval0";
367    } else if (isa<PointerType>(Ty)) {
368      O << ".param .b" << TLI->getPointerTy().getSizeInBits()
369        << " func_retval0";
370    } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
371       unsigned totalsz = TD->getTypeAllocSize(Ty);
372       unsigned retAlignment = 0;
373       if (!llvm::getAlign(*F, 0, retAlignment))
374         retAlignment = TD->getABITypeAlignment(Ty);
375       O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
376         << "]";
377    } else
378      llvm_unreachable("Unknown return type");
379  } else {
380    SmallVector<EVT, 16> vtparts;
381    ComputeValueVTs(*TLI, Ty, vtparts);
382    unsigned idx = 0;
383    for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
384      unsigned elems = 1;
385      EVT elemtype = vtparts[i];
386      if (vtparts[i].isVector()) {
387        elems = vtparts[i].getVectorNumElements();
388        elemtype = vtparts[i].getVectorElementType();
389      }
390
391      for (unsigned j = 0, je = elems; j != je; ++j) {
392        unsigned sz = elemtype.getSizeInBits();
393        if (elemtype.isInteger() && (sz < 32))
394          sz = 32;
395        O << ".reg .b" << sz << " func_retval" << idx;
396        if (j < je - 1)
397          O << ", ";
398        ++idx;
399      }
400      if (i < e - 1)
401        O << ", ";
402    }
403  }
404  O << ") ";
405  return;
406}
407
408void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
409                                        raw_ostream &O) {
410  const Function *F = MF.getFunction();
411  printReturnValStr(F, O);
412}
413
414// Return true if MBB is the header of a loop marked with
415// llvm.loop.unroll.disable.
416// TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
417bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
418    const MachineBasicBlock &MBB) const {
419  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
420  // TODO: isLoopHeader() should take "const MachineBasicBlock *".
421  // We insert .pragma "nounroll" only to the loop header.
422  if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB)))
423    return false;
424
425  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
426  // we iterate through each back edge of the loop with header MBB, and check
427  // whether its metadata contains llvm.loop.unroll.disable.
428  for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
429    const MachineBasicBlock *PMBB = *I;
430    if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
431      // Edges from other loops to MBB are not back edges.
432      continue;
433    }
434    if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
435      if (MDNode *LoopID = PBB->getTerminator()->getMetadata("llvm.loop")) {
436        if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
437          return true;
438      }
439    }
440  }
441  return false;
442}
443
444void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
445  AsmPrinter::EmitBasicBlockStart(MBB);
446  if (isLoopHeaderOfNoUnroll(MBB))
447    OutStreamer.EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
448}
449
450void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
451  SmallString<128> Str;
452  raw_svector_ostream O(Str);
453
454  if (!GlobalsEmitted) {
455    emitGlobals(*MF->getFunction()->getParent());
456    GlobalsEmitted = true;
457  }
458
459  // Set up
460  MRI = &MF->getRegInfo();
461  F = MF->getFunction();
462  emitLinkageDirective(F, O);
463  if (llvm::isKernelFunction(*F))
464    O << ".entry ";
465  else {
466    O << ".func ";
467    printReturnValStr(*MF, O);
468  }
469
470  O << *CurrentFnSym;
471
472  emitFunctionParamList(*MF, O);
473
474  if (llvm::isKernelFunction(*F))
475    emitKernelFunctionDirectives(*F, O);
476
477  OutStreamer.EmitRawText(O.str());
478
479  prevDebugLoc = DebugLoc();
480}
481
482void NVPTXAsmPrinter::EmitFunctionBodyStart() {
483  VRegMapping.clear();
484  OutStreamer.EmitRawText(StringRef("{\n"));
485  setAndEmitFunctionVirtualRegisters(*MF);
486
487  SmallString<128> Str;
488  raw_svector_ostream O(Str);
489  emitDemotedVars(MF->getFunction(), O);
490  OutStreamer.EmitRawText(O.str());
491}
492
493void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
494  OutStreamer.EmitRawText(StringRef("}\n"));
495  VRegMapping.clear();
496}
497
498void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
499  unsigned RegNo = MI->getOperand(0).getReg();
500  if (TargetRegisterInfo::isVirtualRegister(RegNo)) {
501    OutStreamer.AddComment(Twine("implicit-def: ") +
502                           getVirtualRegisterName(RegNo));
503  } else {
504    OutStreamer.AddComment(Twine("implicit-def: ") +
505                           nvptxSubtarget->getRegisterInfo()->getName(RegNo));
506  }
507  OutStreamer.AddBlankLine();
508}
509
510void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
511                                                   raw_ostream &O) const {
512  // If the NVVM IR has some of reqntid* specified, then output
513  // the reqntid directive, and set the unspecified ones to 1.
514  // If none of reqntid* is specified, don't output reqntid directive.
515  unsigned reqntidx, reqntidy, reqntidz;
516  bool specified = false;
517  if (!llvm::getReqNTIDx(F, reqntidx))
518    reqntidx = 1;
519  else
520    specified = true;
521  if (!llvm::getReqNTIDy(F, reqntidy))
522    reqntidy = 1;
523  else
524    specified = true;
525  if (!llvm::getReqNTIDz(F, reqntidz))
526    reqntidz = 1;
527  else
528    specified = true;
529
530  if (specified)
531    O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
532      << "\n";
533
534  // If the NVVM IR has some of maxntid* specified, then output
535  // the maxntid directive, and set the unspecified ones to 1.
536  // If none of maxntid* is specified, don't output maxntid directive.
537  unsigned maxntidx, maxntidy, maxntidz;
538  specified = false;
539  if (!llvm::getMaxNTIDx(F, maxntidx))
540    maxntidx = 1;
541  else
542    specified = true;
543  if (!llvm::getMaxNTIDy(F, maxntidy))
544    maxntidy = 1;
545  else
546    specified = true;
547  if (!llvm::getMaxNTIDz(F, maxntidz))
548    maxntidz = 1;
549  else
550    specified = true;
551
552  if (specified)
553    O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
554      << "\n";
555
556  unsigned mincta;
557  if (llvm::getMinCTASm(F, mincta))
558    O << ".minnctapersm " << mincta << "\n";
559}
560
561std::string
562NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
563  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
564
565  std::string Name;
566  raw_string_ostream NameStr(Name);
567
568  VRegRCMap::const_iterator I = VRegMapping.find(RC);
569  assert(I != VRegMapping.end() && "Bad register class");
570  const DenseMap<unsigned, unsigned> &RegMap = I->second;
571
572  VRegMap::const_iterator VI = RegMap.find(Reg);
573  assert(VI != RegMap.end() && "Bad virtual register");
574  unsigned MappedVR = VI->second;
575
576  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
577
578  NameStr.flush();
579  return Name;
580}
581
582void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
583                                          raw_ostream &O) {
584  O << getVirtualRegisterName(vr);
585}
586
587void NVPTXAsmPrinter::printVecModifiedImmediate(
588    const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
589  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
590  int Imm = (int) MO.getImm();
591  if (0 == strcmp(Modifier, "vecelem"))
592    O << "_" << vecelem[Imm];
593  else if (0 == strcmp(Modifier, "vecv4comm1")) {
594    if ((Imm < 0) || (Imm > 3))
595      O << "//";
596  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
597    if ((Imm < 4) || (Imm > 7))
598      O << "//";
599  } else if (0 == strcmp(Modifier, "vecv4pos")) {
600    if (Imm < 0)
601      Imm = 0;
602    O << "_" << vecelem[Imm % 4];
603  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
604    if ((Imm < 0) || (Imm > 1))
605      O << "//";
606  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
607    if ((Imm < 2) || (Imm > 3))
608      O << "//";
609  } else if (0 == strcmp(Modifier, "vecv2pos")) {
610    if (Imm < 0)
611      Imm = 0;
612    O << "_" << vecelem[Imm % 2];
613  } else
614    llvm_unreachable("Unknown Modifier on immediate operand");
615}
616
617
618
619void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
620
621  emitLinkageDirective(F, O);
622  if (llvm::isKernelFunction(*F))
623    O << ".entry ";
624  else
625    O << ".func ";
626  printReturnValStr(F, O);
627  O << *getSymbol(F) << "\n";
628  emitFunctionParamList(F, O);
629  O << ";\n";
630}
631
632static bool usedInGlobalVarDef(const Constant *C) {
633  if (!C)
634    return false;
635
636  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
637    if (GV->getName() == "llvm.used")
638      return false;
639    return true;
640  }
641
642  for (const User *U : C->users())
643    if (const Constant *C = dyn_cast<Constant>(U))
644      if (usedInGlobalVarDef(C))
645        return true;
646
647  return false;
648}
649
650static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
651  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
652    if (othergv->getName() == "llvm.used")
653      return true;
654  }
655
656  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
657    if (instr->getParent() && instr->getParent()->getParent()) {
658      const Function *curFunc = instr->getParent()->getParent();
659      if (oneFunc && (curFunc != oneFunc))
660        return false;
661      oneFunc = curFunc;
662      return true;
663    } else
664      return false;
665  }
666
667  for (const User *UU : U->users())
668    if (!usedInOneFunc(UU, oneFunc))
669      return false;
670
671  return true;
672}
673
674/* Find out if a global variable can be demoted to local scope.
675 * Currently, this is valid for CUDA shared variables, which have local
676 * scope and global lifetime. So the conditions to check are :
677 * 1. Is the global variable in shared address space?
678 * 2. Does it have internal linkage?
679 * 3. Is the global variable referenced only in one function?
680 */
681static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
682  if (!gv->hasInternalLinkage())
683    return false;
684  const PointerType *Pty = gv->getType();
685  if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
686    return false;
687
688  const Function *oneFunc = nullptr;
689
690  bool flag = usedInOneFunc(gv, oneFunc);
691  if (!flag)
692    return false;
693  if (!oneFunc)
694    return false;
695  f = oneFunc;
696  return true;
697}
698
699static bool useFuncSeen(const Constant *C,
700                        llvm::DenseMap<const Function *, bool> &seenMap) {
701  for (const User *U : C->users()) {
702    if (const Constant *cu = dyn_cast<Constant>(U)) {
703      if (useFuncSeen(cu, seenMap))
704        return true;
705    } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
706      const BasicBlock *bb = I->getParent();
707      if (!bb)
708        continue;
709      const Function *caller = bb->getParent();
710      if (!caller)
711        continue;
712      if (seenMap.find(caller) != seenMap.end())
713        return true;
714    }
715  }
716  return false;
717}
718
719void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
720  llvm::DenseMap<const Function *, bool> seenMap;
721  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
722    const Function *F = FI;
723
724    if (F->isDeclaration()) {
725      if (F->use_empty())
726        continue;
727      if (F->getIntrinsicID())
728        continue;
729      emitDeclaration(F, O);
730      continue;
731    }
732    for (const User *U : F->users()) {
733      if (const Constant *C = dyn_cast<Constant>(U)) {
734        if (usedInGlobalVarDef(C)) {
735          // The use is in the initialization of a global variable
736          // that is a function pointer, so print a declaration
737          // for the original function
738          emitDeclaration(F, O);
739          break;
740        }
741        // Emit a declaration of this function if the function that
742        // uses this constant expr has already been seen.
743        if (useFuncSeen(C, seenMap)) {
744          emitDeclaration(F, O);
745          break;
746        }
747      }
748
749      if (!isa<Instruction>(U))
750        continue;
751      const Instruction *instr = cast<Instruction>(U);
752      const BasicBlock *bb = instr->getParent();
753      if (!bb)
754        continue;
755      const Function *caller = bb->getParent();
756      if (!caller)
757        continue;
758
759      // If a caller has already been seen, then the caller is
760      // appearing in the module before the callee. so print out
761      // a declaration for the callee.
762      if (seenMap.find(caller) != seenMap.end()) {
763        emitDeclaration(F, O);
764        break;
765      }
766    }
767    seenMap[F] = true;
768  }
769}
770
771void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
772  DebugInfoFinder DbgFinder;
773  DbgFinder.processModule(M);
774
775  unsigned i = 1;
776  for (const MDCompileUnit *DIUnit : DbgFinder.compile_units()) {
777    StringRef Filename = DIUnit->getFilename();
778    StringRef Dirname = DIUnit->getDirectory();
779    SmallString<128> FullPathName = Dirname;
780    if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
781      sys::path::append(FullPathName, Filename);
782      Filename = FullPathName;
783    }
784    if (filenameMap.find(Filename) != filenameMap.end())
785      continue;
786    filenameMap[Filename] = i;
787    OutStreamer.EmitDwarfFileDirective(i, "", Filename);
788    ++i;
789  }
790
791  for (MDSubprogram *SP : DbgFinder.subprograms()) {
792    StringRef Filename = SP->getFilename();
793    StringRef Dirname = SP->getDirectory();
794    SmallString<128> FullPathName = Dirname;
795    if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
796      sys::path::append(FullPathName, Filename);
797      Filename = FullPathName;
798    }
799    if (filenameMap.find(Filename) != filenameMap.end())
800      continue;
801    filenameMap[Filename] = i;
802    ++i;
803  }
804}
805
806bool NVPTXAsmPrinter::doInitialization(Module &M) {
807  // Construct a default subtarget off of the TargetMachine defaults. The
808  // rest of NVPTX isn't friendly to change subtargets per function and
809  // so the default TargetMachine will have all of the options.
810  StringRef TT = TM.getTargetTriple();
811  StringRef CPU = TM.getTargetCPU();
812  StringRef FS = TM.getTargetFeatureString();
813  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
814  const NVPTXSubtarget STI(TT, CPU, FS, NTM);
815
816  SmallString<128> Str1;
817  raw_svector_ostream OS1(Str1);
818
819  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
820  MMI->AnalyzeModule(M);
821
822  // We need to call the parent's one explicitly.
823  //bool Result = AsmPrinter::doInitialization(M);
824
825  // Initialize TargetLoweringObjectFile.
826  const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
827      .Initialize(OutContext, TM);
828
829  Mang = new Mangler(TM.getDataLayout());
830
831  // Emit header before any dwarf directives are emitted below.
832  emitHeader(M, OS1, STI);
833  OutStreamer.EmitRawText(OS1.str());
834
835  // Already commented out
836  //bool Result = AsmPrinter::doInitialization(M);
837
838  // Emit module-level inline asm if it exists.
839  if (!M.getModuleInlineAsm().empty()) {
840    OutStreamer.AddComment("Start of file scope inline assembly");
841    OutStreamer.AddBlankLine();
842    OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm()));
843    OutStreamer.AddBlankLine();
844    OutStreamer.AddComment("End of file scope inline assembly");
845    OutStreamer.AddBlankLine();
846  }
847
848  // If we're not NVCL we're CUDA, go ahead and emit filenames.
849  if (Triple(TM.getTargetTriple()).getOS() != Triple::NVCL)
850    recordAndEmitFilenames(M);
851
852  GlobalsEmitted = false;
853
854  return false; // success
855}
856
857void NVPTXAsmPrinter::emitGlobals(const Module &M) {
858  SmallString<128> Str2;
859  raw_svector_ostream OS2(Str2);
860
861  emitDeclarations(M, OS2);
862
863  // As ptxas does not support forward references of globals, we need to first
864  // sort the list of module-level globals in def-use order. We visit each
865  // global variable in order, and ensure that we emit it *after* its dependent
866  // globals. We use a little extra memory maintaining both a set and a list to
867  // have fast searches while maintaining a strict ordering.
868  SmallVector<const GlobalVariable *, 8> Globals;
869  DenseSet<const GlobalVariable *> GVVisited;
870  DenseSet<const GlobalVariable *> GVVisiting;
871
872  // Visit each global variable, in order
873  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
874       I != E; ++I)
875    VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
876
877  assert(GVVisited.size() == M.getGlobalList().size() &&
878         "Missed a global variable");
879  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
880
881  // Print out module-level global variables in proper order
882  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
883    printModuleLevelGV(Globals[i], OS2);
884
885  OS2 << '\n';
886
887  OutStreamer.EmitRawText(OS2.str());
888}
889
890void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
891                                 const NVPTXSubtarget &STI) {
892  O << "//\n";
893  O << "// Generated by LLVM NVPTX Back-End\n";
894  O << "//\n";
895  O << "\n";
896
897  unsigned PTXVersion = STI.getPTXVersion();
898  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
899
900  O << ".target ";
901  O << STI.getTargetName();
902
903  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
904  if (NTM.getDrvInterface() == NVPTX::NVCL)
905    O << ", texmode_independent";
906  else {
907    if (!STI.hasDouble())
908      O << ", map_f64_to_f32";
909  }
910
911  if (MAI->doesSupportDebugInformation())
912    O << ", debug";
913
914  O << "\n";
915
916  O << ".address_size ";
917  if (NTM.is64Bit())
918    O << "64";
919  else
920    O << "32";
921  O << "\n";
922
923  O << "\n";
924}
925
926bool NVPTXAsmPrinter::doFinalization(Module &M) {
927  // If we did not emit any functions, then the global declarations have not
928  // yet been emitted.
929  if (!GlobalsEmitted) {
930    emitGlobals(M);
931    GlobalsEmitted = true;
932  }
933
934  // XXX Temproarily remove global variables so that doFinalization() will not
935  // emit them again (global variables are emitted at beginning).
936
937  Module::GlobalListType &global_list = M.getGlobalList();
938  int i, n = global_list.size();
939  GlobalVariable **gv_array = new GlobalVariable *[n];
940
941  // first, back-up GlobalVariable in gv_array
942  i = 0;
943  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
944       I != E; ++I)
945    gv_array[i++] = &*I;
946
947  // second, empty global_list
948  while (!global_list.empty())
949    global_list.remove(global_list.begin());
950
951  // call doFinalization
952  bool ret = AsmPrinter::doFinalization(M);
953
954  // now we restore global variables
955  for (i = 0; i < n; i++)
956    global_list.insert(global_list.end(), gv_array[i]);
957
958  clearAnnotationCache(&M);
959
960  delete[] gv_array;
961  return ret;
962
963  //bool Result = AsmPrinter::doFinalization(M);
964  // Instead of calling the parents doFinalization, we may
965  // clone parents doFinalization and customize here.
966  // Currently, we if NVISA out the EmitGlobals() in
967  // parent's doFinalization, which is too intrusive.
968  //
969  // Same for the doInitialization.
970  //return Result;
971}
972
973// This function emits appropriate linkage directives for
974// functions and global variables.
975//
976// extern function declaration            -> .extern
977// extern function definition             -> .visible
978// external global variable with init     -> .visible
979// external without init                  -> .extern
980// appending                              -> not allowed, assert.
981// for any linkage other than
982// internal, private, linker_private,
983// linker_private_weak, linker_private_weak_def_auto,
984// we emit                                -> .weak.
985
986void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
987                                           raw_ostream &O) {
988  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
989    if (V->hasExternalLinkage()) {
990      if (isa<GlobalVariable>(V)) {
991        const GlobalVariable *GVar = cast<GlobalVariable>(V);
992        if (GVar) {
993          if (GVar->hasInitializer())
994            O << ".visible ";
995          else
996            O << ".extern ";
997        }
998      } else if (V->isDeclaration())
999        O << ".extern ";
1000      else
1001        O << ".visible ";
1002    } else if (V->hasAppendingLinkage()) {
1003      std::string msg;
1004      msg.append("Error: ");
1005      msg.append("Symbol ");
1006      if (V->hasName())
1007        msg.append(V->getName());
1008      msg.append("has unsupported appending linkage type");
1009      llvm_unreachable(msg.c_str());
1010    } else if (!V->hasInternalLinkage() &&
1011               !V->hasPrivateLinkage()) {
1012      O << ".weak ";
1013    }
1014  }
1015}
1016
1017void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1018                                         raw_ostream &O,
1019                                         bool processDemoted) {
1020
1021  // Skip meta data
1022  if (GVar->hasSection()) {
1023    if (GVar->getSection() == StringRef("llvm.metadata"))
1024      return;
1025  }
1026
1027  // Skip LLVM intrinsic global variables
1028  if (GVar->getName().startswith("llvm.") ||
1029      GVar->getName().startswith("nvvm."))
1030    return;
1031
1032  const DataLayout *TD = TM.getDataLayout();
1033
1034  // GlobalVariables are always constant pointers themselves.
1035  const PointerType *PTy = GVar->getType();
1036  Type *ETy = PTy->getElementType();
1037
1038  if (GVar->hasExternalLinkage()) {
1039    if (GVar->hasInitializer())
1040      O << ".visible ";
1041    else
1042      O << ".extern ";
1043  } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1044             GVar->hasAvailableExternallyLinkage() ||
1045             GVar->hasCommonLinkage()) {
1046    O << ".weak ";
1047  }
1048
1049  if (llvm::isTexture(*GVar)) {
1050    O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
1051    return;
1052  }
1053
1054  if (llvm::isSurface(*GVar)) {
1055    O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
1056    return;
1057  }
1058
1059  if (GVar->isDeclaration()) {
1060    // (extern) declarations, no definition or initializer
1061    // Currently the only known declaration is for an automatic __local
1062    // (.shared) promoted to global.
1063    emitPTXGlobalVariable(GVar, O);
1064    O << ";\n";
1065    return;
1066  }
1067
1068  if (llvm::isSampler(*GVar)) {
1069    O << ".global .samplerref " << llvm::getSamplerName(*GVar);
1070
1071    const Constant *Initializer = nullptr;
1072    if (GVar->hasInitializer())
1073      Initializer = GVar->getInitializer();
1074    const ConstantInt *CI = nullptr;
1075    if (Initializer)
1076      CI = dyn_cast<ConstantInt>(Initializer);
1077    if (CI) {
1078      unsigned sample = CI->getZExtValue();
1079
1080      O << " = { ";
1081
1082      for (int i = 0,
1083               addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1084           i < 3; i++) {
1085        O << "addr_mode_" << i << " = ";
1086        switch (addr) {
1087        case 0:
1088          O << "wrap";
1089          break;
1090        case 1:
1091          O << "clamp_to_border";
1092          break;
1093        case 2:
1094          O << "clamp_to_edge";
1095          break;
1096        case 3:
1097          O << "wrap";
1098          break;
1099        case 4:
1100          O << "mirror";
1101          break;
1102        }
1103        O << ", ";
1104      }
1105      O << "filter_mode = ";
1106      switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1107      case 0:
1108        O << "nearest";
1109        break;
1110      case 1:
1111        O << "linear";
1112        break;
1113      case 2:
1114        llvm_unreachable("Anisotropic filtering is not supported");
1115      default:
1116        O << "nearest";
1117        break;
1118      }
1119      if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1120        O << ", force_unnormalized_coords = 1";
1121      }
1122      O << " }";
1123    }
1124
1125    O << ";\n";
1126    return;
1127  }
1128
1129  if (GVar->hasPrivateLinkage()) {
1130
1131    if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
1132      return;
1133
1134    // FIXME - need better way (e.g. Metadata) to avoid generating this global
1135    if (!strncmp(GVar->getName().data(), "filename", 8))
1136      return;
1137    if (GVar->use_empty())
1138      return;
1139  }
1140
1141  const Function *demotedFunc = nullptr;
1142  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1143    O << "// " << GVar->getName() << " has been demoted\n";
1144    if (localDecls.find(demotedFunc) != localDecls.end())
1145      localDecls[demotedFunc].push_back(GVar);
1146    else {
1147      std::vector<const GlobalVariable *> temp;
1148      temp.push_back(GVar);
1149      localDecls[demotedFunc] = temp;
1150    }
1151    return;
1152  }
1153
1154  O << ".";
1155  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1156
1157  if (isManaged(*GVar)) {
1158    O << " .attribute(.managed)";
1159  }
1160
1161  if (GVar->getAlignment() == 0)
1162    O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
1163  else
1164    O << " .align " << GVar->getAlignment();
1165
1166  if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
1167    O << " .";
1168    // Special case: ABI requires that we use .u8 for predicates
1169    if (ETy->isIntegerTy(1))
1170      O << "u8";
1171    else
1172      O << getPTXFundamentalTypeStr(ETy, false);
1173    O << " ";
1174    O << *getSymbol(GVar);
1175
1176    // Ptx allows variable initilization only for constant and global state
1177    // spaces.
1178    if (GVar->hasInitializer()) {
1179      if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
1180          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) {
1181        const Constant *Initializer = GVar->getInitializer();
1182        // 'undef' is treated as there is no value spefied.
1183        if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1184          O << " = ";
1185          printScalarConstant(Initializer, O);
1186        }
1187      } else {
1188        // The frontend adds zero-initializer to variables that don't have an
1189        // initial value, so skip warning for this case.
1190        if (!GVar->getInitializer()->isNullValue()) {
1191          std::string warnMsg =
1192              ("initial value of '" + GVar->getName() +
1193               "' is not allowed in addrspace(" +
1194               Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str();
1195          report_fatal_error(warnMsg.c_str());
1196        }
1197      }
1198    }
1199  } else {
1200    unsigned int ElementSize = 0;
1201
1202    // Although PTX has direct support for struct type and array type and
1203    // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1204    // targets that support these high level field accesses. Structs, arrays
1205    // and vectors are lowered into arrays of bytes.
1206    switch (ETy->getTypeID()) {
1207    case Type::StructTyID:
1208    case Type::ArrayTyID:
1209    case Type::VectorTyID:
1210      ElementSize = TD->getTypeStoreSize(ETy);
1211      // Ptx allows variable initilization only for constant and
1212      // global state spaces.
1213      if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
1214           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
1215          GVar->hasInitializer()) {
1216        const Constant *Initializer = GVar->getInitializer();
1217        if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1218          AggBuffer aggBuffer(ElementSize, O, *this);
1219          bufferAggregateConstant(Initializer, &aggBuffer);
1220          if (aggBuffer.numSymbols) {
1221            if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
1222              O << " .u64 " << *getSymbol(GVar) << "[";
1223              O << ElementSize / 8;
1224            } else {
1225              O << " .u32 " << *getSymbol(GVar) << "[";
1226              O << ElementSize / 4;
1227            }
1228            O << "]";
1229          } else {
1230            O << " .b8 " << *getSymbol(GVar) << "[";
1231            O << ElementSize;
1232            O << "]";
1233          }
1234          O << " = {";
1235          aggBuffer.print();
1236          O << "}";
1237        } else {
1238          O << " .b8 " << *getSymbol(GVar);
1239          if (ElementSize) {
1240            O << "[";
1241            O << ElementSize;
1242            O << "]";
1243          }
1244        }
1245      } else {
1246        O << " .b8 " << *getSymbol(GVar);
1247        if (ElementSize) {
1248          O << "[";
1249          O << ElementSize;
1250          O << "]";
1251        }
1252      }
1253      break;
1254    default:
1255      llvm_unreachable("type not supported yet");
1256    }
1257
1258  }
1259  O << ";\n";
1260}
1261
1262void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1263  if (localDecls.find(f) == localDecls.end())
1264    return;
1265
1266  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1267
1268  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
1269    O << "\t// demoted variable\n\t";
1270    printModuleLevelGV(gvars[i], O, true);
1271  }
1272}
1273
1274void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1275                                          raw_ostream &O) const {
1276  switch (AddressSpace) {
1277  case llvm::ADDRESS_SPACE_LOCAL:
1278    O << "local";
1279    break;
1280  case llvm::ADDRESS_SPACE_GLOBAL:
1281    O << "global";
1282    break;
1283  case llvm::ADDRESS_SPACE_CONST:
1284    O << "const";
1285    break;
1286  case llvm::ADDRESS_SPACE_SHARED:
1287    O << "shared";
1288    break;
1289  default:
1290    report_fatal_error("Bad address space found while emitting PTX");
1291    break;
1292  }
1293}
1294
1295std::string
1296NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
1297  switch (Ty->getTypeID()) {
1298  default:
1299    llvm_unreachable("unexpected type");
1300    break;
1301  case Type::IntegerTyID: {
1302    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1303    if (NumBits == 1)
1304      return "pred";
1305    else if (NumBits <= 64) {
1306      std::string name = "u";
1307      return name + utostr(NumBits);
1308    } else {
1309      llvm_unreachable("Integer too large");
1310      break;
1311    }
1312    break;
1313  }
1314  case Type::FloatTyID:
1315    return "f32";
1316  case Type::DoubleTyID:
1317    return "f64";
1318  case Type::PointerTyID:
1319    if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
1320      if (useB4PTR)
1321        return "b64";
1322      else
1323        return "u64";
1324    else if (useB4PTR)
1325      return "b32";
1326    else
1327      return "u32";
1328  }
1329  llvm_unreachable("unexpected type");
1330  return nullptr;
1331}
1332
1333void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1334                                            raw_ostream &O) {
1335
1336  const DataLayout *TD = TM.getDataLayout();
1337
1338  // GlobalVariables are always constant pointers themselves.
1339  const PointerType *PTy = GVar->getType();
1340  Type *ETy = PTy->getElementType();
1341
1342  O << ".";
1343  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1344  if (GVar->getAlignment() == 0)
1345    O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
1346  else
1347    O << " .align " << GVar->getAlignment();
1348
1349  if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
1350    O << " .";
1351    O << getPTXFundamentalTypeStr(ETy);
1352    O << " ";
1353    O << *getSymbol(GVar);
1354    return;
1355  }
1356
1357  int64_t ElementSize = 0;
1358
1359  // Although PTX has direct support for struct type and array type and LLVM IR
1360  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1361  // support these high level field accesses. Structs and arrays are lowered
1362  // into arrays of bytes.
1363  switch (ETy->getTypeID()) {
1364  case Type::StructTyID:
1365  case Type::ArrayTyID:
1366  case Type::VectorTyID:
1367    ElementSize = TD->getTypeStoreSize(ETy);
1368    O << " .b8 " << *getSymbol(GVar) << "[";
1369    if (ElementSize) {
1370      O << itostr(ElementSize);
1371    }
1372    O << "]";
1373    break;
1374  default:
1375    llvm_unreachable("type not supported yet");
1376  }
1377  return;
1378}
1379
1380static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
1381  if (Ty->isSingleValueType())
1382    return TD->getPrefTypeAlignment(Ty);
1383
1384  const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
1385  if (ATy)
1386    return getOpenCLAlignment(TD, ATy->getElementType());
1387
1388  const StructType *STy = dyn_cast<StructType>(Ty);
1389  if (STy) {
1390    unsigned int alignStruct = 1;
1391    // Go through each element of the struct and find the
1392    // largest alignment.
1393    for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1394      Type *ETy = STy->getElementType(i);
1395      unsigned int align = getOpenCLAlignment(TD, ETy);
1396      if (align > alignStruct)
1397        alignStruct = align;
1398    }
1399    return alignStruct;
1400  }
1401
1402  const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
1403  if (FTy)
1404    return TD->getPointerPrefAlignment();
1405  return TD->getPrefTypeAlignment(Ty);
1406}
1407
1408void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1409                                     int paramIndex, raw_ostream &O) {
1410  O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
1411}
1412
1413void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
1414  O << *CurrentFnSym << "_param_" << paramIndex;
1415}
1416
1417void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1418  const DataLayout *TD = TM.getDataLayout();
1419  const AttributeSet &PAL = F->getAttributes();
1420  const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
1421  Function::const_arg_iterator I, E;
1422  unsigned paramIndex = 0;
1423  bool first = true;
1424  bool isKernelFunc = llvm::isKernelFunction(*F);
1425  bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
1426  MVT thePointerTy = TLI->getPointerTy();
1427
1428  O << "(\n";
1429
1430  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1431    Type *Ty = I->getType();
1432
1433    if (!first)
1434      O << ",\n";
1435
1436    first = false;
1437
1438    // Handle image/sampler parameters
1439    if (isKernelFunction(*F)) {
1440      if (isSampler(*I) || isImage(*I)) {
1441        if (isImage(*I)) {
1442          std::string sname = I->getName();
1443          if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1444            if (nvptxSubtarget->hasImageHandles())
1445              O << "\t.param .u64 .ptr .surfref ";
1446            else
1447              O << "\t.param .surfref ";
1448            O << *CurrentFnSym << "_param_" << paramIndex;
1449          }
1450          else { // Default image is read_only
1451            if (nvptxSubtarget->hasImageHandles())
1452              O << "\t.param .u64 .ptr .texref ";
1453            else
1454              O << "\t.param .texref ";
1455            O << *CurrentFnSym << "_param_" << paramIndex;
1456          }
1457        } else {
1458          if (nvptxSubtarget->hasImageHandles())
1459            O << "\t.param .u64 .ptr .samplerref ";
1460          else
1461            O << "\t.param .samplerref ";
1462          O << *CurrentFnSym << "_param_" << paramIndex;
1463        }
1464        continue;
1465      }
1466    }
1467
1468    if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) {
1469      if (Ty->isAggregateType() || Ty->isVectorTy()) {
1470        // Just print .param .align <a> .b8 .param[size];
1471        // <a> = PAL.getparamalignment
1472        // size = typeallocsize of element type
1473        unsigned align = PAL.getParamAlignment(paramIndex + 1);
1474        if (align == 0)
1475          align = TD->getABITypeAlignment(Ty);
1476
1477        unsigned sz = TD->getTypeAllocSize(Ty);
1478        O << "\t.param .align " << align << " .b8 ";
1479        printParamName(I, paramIndex, O);
1480        O << "[" << sz << "]";
1481
1482        continue;
1483      }
1484      // Just a scalar
1485      const PointerType *PTy = dyn_cast<PointerType>(Ty);
1486      if (isKernelFunc) {
1487        if (PTy) {
1488          // Special handling for pointer arguments to kernel
1489          O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1490
1491          if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1492              NVPTX::CUDA) {
1493            Type *ETy = PTy->getElementType();
1494            int addrSpace = PTy->getAddressSpace();
1495            switch (addrSpace) {
1496            default:
1497              O << ".ptr ";
1498              break;
1499            case llvm::ADDRESS_SPACE_CONST:
1500              O << ".ptr .const ";
1501              break;
1502            case llvm::ADDRESS_SPACE_SHARED:
1503              O << ".ptr .shared ";
1504              break;
1505            case llvm::ADDRESS_SPACE_GLOBAL:
1506              O << ".ptr .global ";
1507              break;
1508            }
1509            O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
1510          }
1511          printParamName(I, paramIndex, O);
1512          continue;
1513        }
1514
1515        // non-pointer scalar to kernel func
1516        O << "\t.param .";
1517        // Special case: predicate operands become .u8 types
1518        if (Ty->isIntegerTy(1))
1519          O << "u8";
1520        else
1521          O << getPTXFundamentalTypeStr(Ty);
1522        O << " ";
1523        printParamName(I, paramIndex, O);
1524        continue;
1525      }
1526      // Non-kernel function, just print .param .b<size> for ABI
1527      // and .reg .b<size> for non-ABI
1528      unsigned sz = 0;
1529      if (isa<IntegerType>(Ty)) {
1530        sz = cast<IntegerType>(Ty)->getBitWidth();
1531        if (sz < 32)
1532          sz = 32;
1533      } else if (isa<PointerType>(Ty))
1534        sz = thePointerTy.getSizeInBits();
1535      else
1536        sz = Ty->getPrimitiveSizeInBits();
1537      if (isABI)
1538        O << "\t.param .b" << sz << " ";
1539      else
1540        O << "\t.reg .b" << sz << " ";
1541      printParamName(I, paramIndex, O);
1542      continue;
1543    }
1544
1545    // param has byVal attribute. So should be a pointer
1546    const PointerType *PTy = dyn_cast<PointerType>(Ty);
1547    assert(PTy && "Param with byval attribute should be a pointer type");
1548    Type *ETy = PTy->getElementType();
1549
1550    if (isABI || isKernelFunc) {
1551      // Just print .param .align <a> .b8 .param[size];
1552      // <a> = PAL.getparamalignment
1553      // size = typeallocsize of element type
1554      unsigned align = PAL.getParamAlignment(paramIndex + 1);
1555      if (align == 0)
1556        align = TD->getABITypeAlignment(ETy);
1557
1558      unsigned sz = TD->getTypeAllocSize(ETy);
1559      O << "\t.param .align " << align << " .b8 ";
1560      printParamName(I, paramIndex, O);
1561      O << "[" << sz << "]";
1562      continue;
1563    } else {
1564      // Split the ETy into constituent parts and
1565      // print .param .b<size> <name> for each part.
1566      // Further, if a part is vector, print the above for
1567      // each vector element.
1568      SmallVector<EVT, 16> vtparts;
1569      ComputeValueVTs(*TLI, ETy, vtparts);
1570      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1571        unsigned elems = 1;
1572        EVT elemtype = vtparts[i];
1573        if (vtparts[i].isVector()) {
1574          elems = vtparts[i].getVectorNumElements();
1575          elemtype = vtparts[i].getVectorElementType();
1576        }
1577
1578        for (unsigned j = 0, je = elems; j != je; ++j) {
1579          unsigned sz = elemtype.getSizeInBits();
1580          if (elemtype.isInteger() && (sz < 32))
1581            sz = 32;
1582          O << "\t.reg .b" << sz << " ";
1583          printParamName(I, paramIndex, O);
1584          if (j < je - 1)
1585            O << ",\n";
1586          ++paramIndex;
1587        }
1588        if (i < e - 1)
1589          O << ",\n";
1590      }
1591      --paramIndex;
1592      continue;
1593    }
1594  }
1595
1596  O << "\n)\n";
1597}
1598
1599void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1600                                            raw_ostream &O) {
1601  const Function *F = MF.getFunction();
1602  emitFunctionParamList(F, O);
1603}
1604
1605void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1606    const MachineFunction &MF) {
1607  SmallString<128> Str;
1608  raw_svector_ostream O(Str);
1609
1610  // Map the global virtual register number to a register class specific
1611  // virtual register number starting from 1 with that class.
1612  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1613  //unsigned numRegClasses = TRI->getNumRegClasses();
1614
1615  // Emit the Fake Stack Object
1616  const MachineFrameInfo *MFI = MF.getFrameInfo();
1617  int NumBytes = (int) MFI->getStackSize();
1618  if (NumBytes) {
1619    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
1620      << getFunctionNumber() << "[" << NumBytes << "];\n";
1621    if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1622      O << "\t.reg .b64 \t%SP;\n";
1623      O << "\t.reg .b64 \t%SPL;\n";
1624    } else {
1625      O << "\t.reg .b32 \t%SP;\n";
1626      O << "\t.reg .b32 \t%SPL;\n";
1627    }
1628  }
1629
1630  // Go through all virtual registers to establish the mapping between the
1631  // global virtual
1632  // register number and the per class virtual register number.
1633  // We use the per class virtual register number in the ptx output.
1634  unsigned int numVRs = MRI->getNumVirtRegs();
1635  for (unsigned i = 0; i < numVRs; i++) {
1636    unsigned int vr = TRI->index2VirtReg(i);
1637    const TargetRegisterClass *RC = MRI->getRegClass(vr);
1638    DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1639    int n = regmap.size();
1640    regmap.insert(std::make_pair(vr, n + 1));
1641  }
1642
1643  // Emit register declarations
1644  // @TODO: Extract out the real register usage
1645  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1646  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1647  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1648  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1649  // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1650  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1651  // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1652
1653  // Emit declaration of the virtual registers or 'physical' registers for
1654  // each register class
1655  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1656    const TargetRegisterClass *RC = TRI->getRegClass(i);
1657    DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1658    std::string rcname = getNVPTXRegClassName(RC);
1659    std::string rcStr = getNVPTXRegClassStr(RC);
1660    int n = regmap.size();
1661
1662    // Only declare those registers that may be used.
1663    if (n) {
1664       O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1665         << ">;\n";
1666    }
1667  }
1668
1669  OutStreamer.EmitRawText(O.str());
1670}
1671
1672void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1673  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1674  bool ignored;
1675  unsigned int numHex;
1676  const char *lead;
1677
1678  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1679    numHex = 8;
1680    lead = "0f";
1681    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
1682  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1683    numHex = 16;
1684    lead = "0d";
1685    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
1686  } else
1687    llvm_unreachable("unsupported fp type");
1688
1689  APInt API = APF.bitcastToAPInt();
1690  std::string hexstr(utohexstr(API.getZExtValue()));
1691  O << lead;
1692  if (hexstr.length() < numHex)
1693    O << std::string(numHex - hexstr.length(), '0');
1694  O << utohexstr(API.getZExtValue());
1695}
1696
1697void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1698  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1699    O << CI->getValue();
1700    return;
1701  }
1702  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1703    printFPConstant(CFP, O);
1704    return;
1705  }
1706  if (isa<ConstantPointerNull>(CPV)) {
1707    O << "0";
1708    return;
1709  }
1710  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1711    PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
1712    bool IsNonGenericPointer = false;
1713    if (PTy && PTy->getAddressSpace() != 0) {
1714      IsNonGenericPointer = true;
1715    }
1716    if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1717      O << "generic(";
1718      O << *getSymbol(GVar);
1719      O << ")";
1720    } else {
1721      O << *getSymbol(GVar);
1722    }
1723    return;
1724  }
1725  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1726    const Value *v = Cexpr->stripPointerCasts();
1727    PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
1728    bool IsNonGenericPointer = false;
1729    if (PTy && PTy->getAddressSpace() != 0) {
1730      IsNonGenericPointer = true;
1731    }
1732    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1733      if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
1734        O << "generic(";
1735        O << *getSymbol(GVar);
1736        O << ")";
1737      } else {
1738        O << *getSymbol(GVar);
1739      }
1740      return;
1741    } else {
1742      O << *lowerConstant(CPV);
1743      return;
1744    }
1745  }
1746  llvm_unreachable("Not scalar type found in printScalarConstant()");
1747}
1748
1749void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1750                                   AggBuffer *aggBuffer) {
1751
1752  const DataLayout *TD = TM.getDataLayout();
1753
1754  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1755    int s = TD->getTypeAllocSize(CPV->getType());
1756    if (s < Bytes)
1757      s = Bytes;
1758    aggBuffer->addZeros(s);
1759    return;
1760  }
1761
1762  unsigned char *ptr;
1763  switch (CPV->getType()->getTypeID()) {
1764
1765  case Type::IntegerTyID: {
1766    const Type *ETy = CPV->getType();
1767    if (ETy == Type::getInt8Ty(CPV->getContext())) {
1768      unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
1769      ptr = &c;
1770      aggBuffer->addBytes(ptr, 1, Bytes);
1771    } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
1772      short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
1773      ptr = (unsigned char *)&int16;
1774      aggBuffer->addBytes(ptr, 2, Bytes);
1775    } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
1776      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1777        int int32 = (int)(constInt->getZExtValue());
1778        ptr = (unsigned char *)&int32;
1779        aggBuffer->addBytes(ptr, 4, Bytes);
1780        break;
1781      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1782        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
1783                ConstantFoldConstantExpression(Cexpr, *TD))) {
1784          int int32 = (int)(constInt->getZExtValue());
1785          ptr = (unsigned char *)&int32;
1786          aggBuffer->addBytes(ptr, 4, Bytes);
1787          break;
1788        }
1789        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1790          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1791          aggBuffer->addSymbol(v);
1792          aggBuffer->addZeros(4);
1793          break;
1794        }
1795      }
1796      llvm_unreachable("unsupported integer const type");
1797    } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
1798      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1799        long long int64 = (long long)(constInt->getZExtValue());
1800        ptr = (unsigned char *)&int64;
1801        aggBuffer->addBytes(ptr, 8, Bytes);
1802        break;
1803      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1804        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
1805                ConstantFoldConstantExpression(Cexpr, *TD))) {
1806          long long int64 = (long long)(constInt->getZExtValue());
1807          ptr = (unsigned char *)&int64;
1808          aggBuffer->addBytes(ptr, 8, Bytes);
1809          break;
1810        }
1811        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1812          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1813          aggBuffer->addSymbol(v);
1814          aggBuffer->addZeros(8);
1815          break;
1816        }
1817      }
1818      llvm_unreachable("unsupported integer const type");
1819    } else
1820      llvm_unreachable("unsupported integer const type");
1821    break;
1822  }
1823  case Type::FloatTyID:
1824  case Type::DoubleTyID: {
1825    const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
1826    const Type *Ty = CFP->getType();
1827    if (Ty == Type::getFloatTy(CPV->getContext())) {
1828      float float32 = (float) CFP->getValueAPF().convertToFloat();
1829      ptr = (unsigned char *)&float32;
1830      aggBuffer->addBytes(ptr, 4, Bytes);
1831    } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
1832      double float64 = CFP->getValueAPF().convertToDouble();
1833      ptr = (unsigned char *)&float64;
1834      aggBuffer->addBytes(ptr, 8, Bytes);
1835    } else {
1836      llvm_unreachable("unsupported fp const type");
1837    }
1838    break;
1839  }
1840  case Type::PointerTyID: {
1841    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1842      aggBuffer->addSymbol(GVar);
1843    } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1844      const Value *v = Cexpr->stripPointerCasts();
1845      aggBuffer->addSymbol(v);
1846    }
1847    unsigned int s = TD->getTypeAllocSize(CPV->getType());
1848    aggBuffer->addZeros(s);
1849    break;
1850  }
1851
1852  case Type::ArrayTyID:
1853  case Type::VectorTyID:
1854  case Type::StructTyID: {
1855    if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
1856        isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
1857      int ElementSize = TD->getTypeAllocSize(CPV->getType());
1858      bufferAggregateConstant(CPV, aggBuffer);
1859      if (Bytes > ElementSize)
1860        aggBuffer->addZeros(Bytes - ElementSize);
1861    } else if (isa<ConstantAggregateZero>(CPV))
1862      aggBuffer->addZeros(Bytes);
1863    else
1864      llvm_unreachable("Unexpected Constant type");
1865    break;
1866  }
1867
1868  default:
1869    llvm_unreachable("unsupported type");
1870  }
1871}
1872
1873void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1874                                              AggBuffer *aggBuffer) {
1875  const DataLayout *TD = TM.getDataLayout();
1876  int Bytes;
1877
1878  // Old constants
1879  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1880    if (CPV->getNumOperands())
1881      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1882        bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1883    return;
1884  }
1885
1886  if (const ConstantDataSequential *CDS =
1887          dyn_cast<ConstantDataSequential>(CPV)) {
1888    if (CDS->getNumElements())
1889      for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1890        bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1891                     aggBuffer);
1892    return;
1893  }
1894
1895  if (isa<ConstantStruct>(CPV)) {
1896    if (CPV->getNumOperands()) {
1897      StructType *ST = cast<StructType>(CPV->getType());
1898      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1899        if (i == (e - 1))
1900          Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
1901                  TD->getTypeAllocSize(ST) -
1902                  TD->getStructLayout(ST)->getElementOffset(i);
1903        else
1904          Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
1905                  TD->getStructLayout(ST)->getElementOffset(i);
1906        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1907      }
1908    }
1909    return;
1910  }
1911  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1912}
1913
1914// buildTypeNameMap - Run through symbol table looking for type names.
1915//
1916
1917bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
1918
1919  std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
1920
1921  if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
1922                                  !PI->second.compare("struct._image2d_t") ||
1923                                  !PI->second.compare("struct._image3d_t")))
1924    return true;
1925
1926  return false;
1927}
1928
1929
1930bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
1931  switch (MI.getOpcode()) {
1932  default:
1933    return false;
1934  case NVPTX::CallArgBeginInst:
1935  case NVPTX::CallArgEndInst0:
1936  case NVPTX::CallArgEndInst1:
1937  case NVPTX::CallArgF32:
1938  case NVPTX::CallArgF64:
1939  case NVPTX::CallArgI16:
1940  case NVPTX::CallArgI32:
1941  case NVPTX::CallArgI32imm:
1942  case NVPTX::CallArgI64:
1943  case NVPTX::CallArgParam:
1944  case NVPTX::CallVoidInst:
1945  case NVPTX::CallVoidInstReg:
1946  case NVPTX::Callseq_End:
1947  case NVPTX::CallVoidInstReg64:
1948  case NVPTX::DeclareParamInst:
1949  case NVPTX::DeclareRetMemInst:
1950  case NVPTX::DeclareRetRegInst:
1951  case NVPTX::DeclareRetScalarInst:
1952  case NVPTX::DeclareScalarParamInst:
1953  case NVPTX::DeclareScalarRegInst:
1954  case NVPTX::StoreParamF32:
1955  case NVPTX::StoreParamF64:
1956  case NVPTX::StoreParamI16:
1957  case NVPTX::StoreParamI32:
1958  case NVPTX::StoreParamI64:
1959  case NVPTX::StoreParamI8:
1960  case NVPTX::StoreRetvalF32:
1961  case NVPTX::StoreRetvalF64:
1962  case NVPTX::StoreRetvalI16:
1963  case NVPTX::StoreRetvalI32:
1964  case NVPTX::StoreRetvalI64:
1965  case NVPTX::StoreRetvalI8:
1966  case NVPTX::LastCallArgF32:
1967  case NVPTX::LastCallArgF64:
1968  case NVPTX::LastCallArgI16:
1969  case NVPTX::LastCallArgI32:
1970  case NVPTX::LastCallArgI32imm:
1971  case NVPTX::LastCallArgI64:
1972  case NVPTX::LastCallArgParam:
1973  case NVPTX::LoadParamMemF32:
1974  case NVPTX::LoadParamMemF64:
1975  case NVPTX::LoadParamMemI16:
1976  case NVPTX::LoadParamMemI32:
1977  case NVPTX::LoadParamMemI64:
1978  case NVPTX::LoadParamMemI8:
1979  case NVPTX::PrototypeInst:
1980  case NVPTX::DBG_VALUE:
1981    return true;
1982  }
1983  return false;
1984}
1985
1986/// PrintAsmOperand - Print out an operand for an inline asm expression.
1987///
1988bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
1989                                      unsigned AsmVariant,
1990                                      const char *ExtraCode, raw_ostream &O) {
1991  if (ExtraCode && ExtraCode[0]) {
1992    if (ExtraCode[1] != 0)
1993      return true; // Unknown modifier.
1994
1995    switch (ExtraCode[0]) {
1996    default:
1997      // See if this is a generic print operand
1998      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
1999    case 'r':
2000      break;
2001    }
2002  }
2003
2004  printOperand(MI, OpNo, O);
2005
2006  return false;
2007}
2008
2009bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
2010    const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
2011    const char *ExtraCode, raw_ostream &O) {
2012  if (ExtraCode && ExtraCode[0])
2013    return true; // Unknown modifier
2014
2015  O << '[';
2016  printMemOperand(MI, OpNo, O);
2017  O << ']';
2018
2019  return false;
2020}
2021
2022void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2023                                   raw_ostream &O, const char *Modifier) {
2024  const MachineOperand &MO = MI->getOperand(opNum);
2025  switch (MO.getType()) {
2026  case MachineOperand::MO_Register:
2027    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
2028      if (MO.getReg() == NVPTX::VRDepot)
2029        O << DEPOTNAME << getFunctionNumber();
2030      else
2031        O << NVPTXInstPrinter::getRegisterName(MO.getReg());
2032    } else {
2033      emitVirtualRegister(MO.getReg(), O);
2034    }
2035    return;
2036
2037  case MachineOperand::MO_Immediate:
2038    if (!Modifier)
2039      O << MO.getImm();
2040    else if (strstr(Modifier, "vec") == Modifier)
2041      printVecModifiedImmediate(MO, Modifier, O);
2042    else
2043      llvm_unreachable(
2044          "Don't know how to handle modifier on immediate operand");
2045    return;
2046
2047  case MachineOperand::MO_FPImmediate:
2048    printFPConstant(MO.getFPImm(), O);
2049    break;
2050
2051  case MachineOperand::MO_GlobalAddress:
2052    O << *getSymbol(MO.getGlobal());
2053    break;
2054
2055  case MachineOperand::MO_MachineBasicBlock:
2056    O << *MO.getMBB()->getSymbol();
2057    return;
2058
2059  default:
2060    llvm_unreachable("Operand type not supported.");
2061  }
2062}
2063
2064void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2065                                      raw_ostream &O, const char *Modifier) {
2066  printOperand(MI, opNum, O);
2067
2068  if (Modifier && !strcmp(Modifier, "add")) {
2069    O << ", ";
2070    printOperand(MI, opNum + 1, O);
2071  } else {
2072    if (MI->getOperand(opNum + 1).isImm() &&
2073        MI->getOperand(opNum + 1).getImm() == 0)
2074      return; // don't print ',0' or '+0'
2075    O << "+";
2076    printOperand(MI, opNum + 1, O);
2077  }
2078}
2079
2080void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
2081  std::stringstream temp;
2082  LineReader *reader = this->getReader(filename);
2083  temp << "\n//";
2084  temp << filename.str();
2085  temp << ":";
2086  temp << line;
2087  temp << " ";
2088  temp << reader->readLine(line);
2089  temp << "\n";
2090  this->OutStreamer.EmitRawText(temp.str());
2091}
2092
2093LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
2094  if (!reader) {
2095    reader = new LineReader(filename);
2096  }
2097
2098  if (reader->fileName() != filename) {
2099    delete reader;
2100    reader = new LineReader(filename);
2101  }
2102
2103  return reader;
2104}
2105
2106std::string LineReader::readLine(unsigned lineNum) {
2107  if (lineNum < theCurLine) {
2108    theCurLine = 0;
2109    fstr.seekg(0, std::ios::beg);
2110  }
2111  while (theCurLine < lineNum) {
2112    fstr.getline(buff, 500);
2113    theCurLine++;
2114  }
2115  return buff;
2116}
2117
2118// Force static initialization.
2119extern "C" void LLVMInitializeNVPTXAsmPrinter() {
2120  RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
2121  RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
2122}
2123