NVPTXAsmPrinter.cpp revision 93cf0939f95b3d580d9c05375a7c84164e1ba72e
1//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a printer that converts from our internal representation
11// of machine-dependent LLVM code to NVPTX assembly language.
12//
13//===----------------------------------------------------------------------===//
14
15#include "NVPTXAsmPrinter.h"
16#include "MCTargetDesc/NVPTXMCAsmInfo.h"
17#include "NVPTX.h"
18#include "NVPTXInstrInfo.h"
19#include "NVPTXMCExpr.h"
20#include "NVPTXRegisterInfo.h"
21#include "NVPTXTargetMachine.h"
22#include "NVPTXUtilities.h"
23#include "InstPrinter/NVPTXInstPrinter.h"
24#include "cl_common_defines.h"
25#include "llvm/ADT/StringExtras.h"
26#include "llvm/Analysis/ConstantFolding.h"
27#include "llvm/Assembly/Writer.h"
28#include "llvm/CodeGen/Analysis.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineModuleInfo.h"
31#include "llvm/CodeGen/MachineRegisterInfo.h"
32#include "llvm/DebugInfo.h"
33#include "llvm/IR/DerivedTypes.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/GlobalVariable.h"
36#include "llvm/IR/Module.h"
37#include "llvm/IR/Operator.h"
38#include "llvm/MC/MCStreamer.h"
39#include "llvm/MC/MCSymbol.h"
40#include "llvm/Support/CommandLine.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Support/FormattedStream.h"
43#include "llvm/Support/Path.h"
44#include "llvm/Support/TargetRegistry.h"
45#include "llvm/Support/TimeValue.h"
46#include "llvm/Target/Mangler.h"
47#include "llvm/Target/TargetLoweringObjectFile.h"
48#include <sstream>
49using namespace llvm;
50
51#define DEPOTNAME "__local_depot"
52
53static cl::opt<bool>
54EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden,
55                cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
56                cl::init(true));
57
58static cl::opt<bool>
59InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden,
60              cl::desc("NVPTX Specific: Emit source line in ptx file"),
61              cl::init(false));
62
63namespace {
64/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
65/// depends.
66void DiscoverDependentGlobals(const Value *V,
67                              DenseSet<const GlobalVariable *> &Globals) {
68  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
69    Globals.insert(GV);
70  else {
71    if (const User *U = dyn_cast<User>(V)) {
72      for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
73        DiscoverDependentGlobals(U->getOperand(i), Globals);
74      }
75    }
76  }
77}
78
79/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
80/// instances to be emitted, but only after any dependents have been added
81/// first.
82void VisitGlobalVariableForEmission(
83    const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
84    DenseSet<const GlobalVariable *> &Visited,
85    DenseSet<const GlobalVariable *> &Visiting) {
86  // Have we already visited this one?
87  if (Visited.count(GV))
88    return;
89
90  // Do we have a circular dependency?
91  if (Visiting.count(GV))
92    report_fatal_error("Circular dependency found in global variable set");
93
94  // Start visiting this global
95  Visiting.insert(GV);
96
97  // Make sure we visit all dependents first
98  DenseSet<const GlobalVariable *> Others;
99  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
100    DiscoverDependentGlobals(GV->getOperand(i), Others);
101
102  for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
103                                                  E = Others.end();
104       I != E; ++I)
105    VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
106
107  // Now we can visit ourself
108  Order.push_back(GV);
109  Visited.insert(GV);
110  Visiting.erase(GV);
111}
112}
113
114// @TODO: This is a copy from AsmPrinter.cpp.  The function is static, so we
115// cannot just link to the existing version.
116/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
117///
118using namespace nvptx;
119const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
120  MCContext &Ctx = AP.OutContext;
121
122  if (CV->isNullValue() || isa<UndefValue>(CV))
123    return MCConstantExpr::Create(0, Ctx);
124
125  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
126    return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
127
128  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
129    return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
130
131  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
132    return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
133
134  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
135  if (CE == 0)
136    llvm_unreachable("Unknown constant value to lower!");
137
138  switch (CE->getOpcode()) {
139  default:
140    // If the code isn't optimized, there may be outstanding folding
141    // opportunities. Attempt to fold the expression using DataLayout as a
142    // last resort before giving up.
143    if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
144      if (C != CE)
145        return LowerConstant(C, AP);
146
147    // Otherwise report the problem to the user.
148    {
149      std::string S;
150      raw_string_ostream OS(S);
151      OS << "Unsupported expression in static initializer: ";
152      WriteAsOperand(OS, CE, /*PrintType=*/ false,
153                     !AP.MF ? 0 : AP.MF->getFunction()->getParent());
154      report_fatal_error(OS.str());
155    }
156  case Instruction::GetElementPtr: {
157    const DataLayout &TD = *AP.TM.getDataLayout();
158    // Generate a symbolic expression for the byte address
159    APInt OffsetAI(TD.getPointerSizeInBits(), 0);
160    cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
161
162    const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
163    if (!OffsetAI)
164      return Base;
165
166    int64_t Offset = OffsetAI.getSExtValue();
167    return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
168                                   Ctx);
169  }
170
171  case Instruction::Trunc:
172    // We emit the value and depend on the assembler to truncate the generated
173    // expression properly.  This is important for differences between
174    // blockaddress labels.  Since the two labels are in the same function, it
175    // is reasonable to treat their delta as a 32-bit value.
176  // FALL THROUGH.
177  case Instruction::BitCast:
178    return LowerConstant(CE->getOperand(0), AP);
179
180  case Instruction::IntToPtr: {
181    const DataLayout &TD = *AP.TM.getDataLayout();
182    // Handle casts to pointers by changing them into casts to the appropriate
183    // integer type.  This promotes constant folding and simplifies this code.
184    Constant *Op = CE->getOperand(0);
185    Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
186                                      false /*ZExt*/);
187    return LowerConstant(Op, AP);
188  }
189
190  case Instruction::PtrToInt: {
191    const DataLayout &TD = *AP.TM.getDataLayout();
192    // Support only foldable casts to/from pointers that can be eliminated by
193    // changing the pointer to the appropriately sized integer type.
194    Constant *Op = CE->getOperand(0);
195    Type *Ty = CE->getType();
196
197    const MCExpr *OpExpr = LowerConstant(Op, AP);
198
199    // We can emit the pointer value into this slot if the slot is an
200    // integer slot equal to the size of the pointer.
201    if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
202      return OpExpr;
203
204    // Otherwise the pointer is smaller than the resultant integer, mask off
205    // the high bits so we are sure to get a proper truncation if the input is
206    // a constant expr.
207    unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
208    const MCExpr *MaskExpr =
209        MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
210    return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
211  }
212
213    // The MC library also has a right-shift operator, but it isn't consistently
214  // signed or unsigned between different targets.
215  case Instruction::Add:
216  case Instruction::Sub:
217  case Instruction::Mul:
218  case Instruction::SDiv:
219  case Instruction::SRem:
220  case Instruction::Shl:
221  case Instruction::And:
222  case Instruction::Or:
223  case Instruction::Xor: {
224    const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
225    const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
226    switch (CE->getOpcode()) {
227    default:
228      llvm_unreachable("Unknown binary operator constant cast expr");
229    case Instruction::Add:
230      return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
231    case Instruction::Sub:
232      return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
233    case Instruction::Mul:
234      return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
235    case Instruction::SDiv:
236      return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
237    case Instruction::SRem:
238      return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
239    case Instruction::Shl:
240      return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
241    case Instruction::And:
242      return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
243    case Instruction::Or:
244      return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
245    case Instruction::Xor:
246      return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
247    }
248  }
249  }
250}
251
252void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
253  if (!EmitLineNumbers)
254    return;
255  if (ignoreLoc(MI))
256    return;
257
258  DebugLoc curLoc = MI.getDebugLoc();
259
260  if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
261    return;
262
263  if (prevDebugLoc == curLoc)
264    return;
265
266  prevDebugLoc = curLoc;
267
268  if (curLoc.isUnknown())
269    return;
270
271  const MachineFunction *MF = MI.getParent()->getParent();
272  //const TargetMachine &TM = MF->getTarget();
273
274  const LLVMContext &ctx = MF->getFunction()->getContext();
275  DIScope Scope(curLoc.getScope(ctx));
276
277  assert((!Scope || Scope.isScope()) &&
278    "Scope of a DebugLoc should be null or a DIScope.");
279  if (!Scope)
280     return;
281
282  StringRef fileName(Scope.getFilename());
283  StringRef dirName(Scope.getDirectory());
284  SmallString<128> FullPathName = dirName;
285  if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
286    sys::path::append(FullPathName, fileName);
287    fileName = FullPathName.str();
288  }
289
290  if (filenameMap.find(fileName.str()) == filenameMap.end())
291    return;
292
293  // Emit the line from the source file.
294  if (InterleaveSrc)
295    this->emitSrcInText(fileName.str(), curLoc.getLine());
296
297  std::stringstream temp;
298  temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
299       << " " << curLoc.getCol();
300  OutStreamer.EmitRawText(Twine(temp.str().c_str()));
301}
302
303void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
304  SmallString<128> Str;
305  raw_svector_ostream OS(Str);
306  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
307    emitLineNumberAsDotLoc(*MI);
308
309  MCInst Inst;
310  lowerToMCInst(MI, Inst);
311  OutStreamer.EmitInstruction(Inst);
312}
313
314void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
315  OutMI.setOpcode(MI->getOpcode());
316
317  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
318    const MachineOperand &MO = MI->getOperand(i);
319
320    MCOperand MCOp;
321    if (lowerOperand(MO, MCOp))
322      OutMI.addOperand(MCOp);
323  }
324}
325
326bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
327                                   MCOperand &MCOp) {
328  switch (MO.getType()) {
329  default: llvm_unreachable("unknown operand type");
330  case MachineOperand::MO_Register:
331    MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg()));
332    break;
333  case MachineOperand::MO_Immediate:
334    MCOp = MCOperand::CreateImm(MO.getImm());
335    break;
336  case MachineOperand::MO_MachineBasicBlock:
337    MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
338        MO.getMBB()->getSymbol(), OutContext));
339    break;
340  case MachineOperand::MO_ExternalSymbol:
341    MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
342    break;
343  case MachineOperand::MO_GlobalAddress:
344    MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal()));
345    break;
346  case MachineOperand::MO_FPImmediate: {
347    const ConstantFP *Cnt = MO.getFPImm();
348    APFloat Val = Cnt->getValueAPF();
349
350    switch (Cnt->getType()->getTypeID()) {
351    default: report_fatal_error("Unsupported FP type"); break;
352    case Type::FloatTyID:
353      MCOp = MCOperand::CreateExpr(
354        NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext));
355      break;
356    case Type::DoubleTyID:
357      MCOp = MCOperand::CreateExpr(
358        NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext));
359      break;
360    }
361    break;
362  }
363  }
364  return true;
365}
366
367unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
368  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
369    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
370
371    DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
372    unsigned RegNum = RegMap[Reg];
373
374    // Encode the register class in the upper 4 bits
375    // Must be kept in sync with NVPTXInstPrinter::printRegName
376    unsigned Ret = 0;
377    if (RC == &NVPTX::Int1RegsRegClass) {
378      Ret = (1 << 28);
379    } else if (RC == &NVPTX::Int16RegsRegClass) {
380      Ret = (2 << 28);
381    } else if (RC == &NVPTX::Int32RegsRegClass) {
382      Ret = (3 << 28);
383    } else if (RC == &NVPTX::Int64RegsRegClass) {
384      Ret = (4 << 28);
385    } else if (RC == &NVPTX::Float32RegsRegClass) {
386      Ret = (5 << 28);
387    } else if (RC == &NVPTX::Float64RegsRegClass) {
388      Ret = (6 << 28);
389    } else {
390      report_fatal_error("Bad register class");
391    }
392
393    // Insert the vreg number
394    Ret |= (RegNum & 0x0FFFFFFF);
395    return Ret;
396  } else {
397    // Some special-use registers are actually physical registers.
398    // Encode this as the register class ID of 0 and the real register ID.
399    return Reg & 0x0FFFFFFF;
400  }
401}
402
403MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
404                                        const MCSymbol *Symbol) {
405  const MCExpr *Expr;
406  Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
407                                 OutContext);
408  return MCOperand::CreateExpr(Expr);
409}
410
411void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
412  const DataLayout *TD = TM.getDataLayout();
413  const TargetLowering *TLI = TM.getTargetLowering();
414
415  Type *Ty = F->getReturnType();
416
417  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
418
419  if (Ty->getTypeID() == Type::VoidTyID)
420    return;
421
422  O << " (";
423
424  if (isABI) {
425    if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
426      unsigned size = 0;
427      if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
428        size = ITy->getBitWidth();
429        if (size < 32)
430          size = 32;
431      } else {
432        assert(Ty->isFloatingPointTy() && "Floating point type expected here");
433        size = Ty->getPrimitiveSizeInBits();
434      }
435
436      O << ".param .b" << size << " func_retval0";
437    } else if (isa<PointerType>(Ty)) {
438      O << ".param .b" << TLI->getPointerTy().getSizeInBits()
439        << " func_retval0";
440    } else {
441      if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
442        SmallVector<EVT, 16> vtparts;
443        ComputeValueVTs(*TLI, Ty, vtparts);
444        unsigned totalsz = 0;
445        for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
446          unsigned elems = 1;
447          EVT elemtype = vtparts[i];
448          if (vtparts[i].isVector()) {
449            elems = vtparts[i].getVectorNumElements();
450            elemtype = vtparts[i].getVectorElementType();
451          }
452          for (unsigned j = 0, je = elems; j != je; ++j) {
453            unsigned sz = elemtype.getSizeInBits();
454            if (elemtype.isInteger() && (sz < 8))
455              sz = 8;
456            totalsz += sz / 8;
457          }
458        }
459        unsigned retAlignment = 0;
460        if (!llvm::getAlign(*F, 0, retAlignment))
461          retAlignment = TD->getABITypeAlignment(Ty);
462        O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
463          << "]";
464      } else
465        assert(false && "Unknown return type");
466    }
467  } else {
468    SmallVector<EVT, 16> vtparts;
469    ComputeValueVTs(*TLI, Ty, vtparts);
470    unsigned idx = 0;
471    for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
472      unsigned elems = 1;
473      EVT elemtype = vtparts[i];
474      if (vtparts[i].isVector()) {
475        elems = vtparts[i].getVectorNumElements();
476        elemtype = vtparts[i].getVectorElementType();
477      }
478
479      for (unsigned j = 0, je = elems; j != je; ++j) {
480        unsigned sz = elemtype.getSizeInBits();
481        if (elemtype.isInteger() && (sz < 32))
482          sz = 32;
483        O << ".reg .b" << sz << " func_retval" << idx;
484        if (j < je - 1)
485          O << ", ";
486        ++idx;
487      }
488      if (i < e - 1)
489        O << ", ";
490    }
491  }
492  O << ") ";
493  return;
494}
495
496void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
497                                        raw_ostream &O) {
498  const Function *F = MF.getFunction();
499  printReturnValStr(F, O);
500}
501
502void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
503  SmallString<128> Str;
504  raw_svector_ostream O(Str);
505
506  if (!GlobalsEmitted) {
507    emitGlobals(*MF->getFunction()->getParent());
508    GlobalsEmitted = true;
509  }
510
511  // Set up
512  MRI = &MF->getRegInfo();
513  F = MF->getFunction();
514  emitLinkageDirective(F, O);
515  if (llvm::isKernelFunction(*F))
516    O << ".entry ";
517  else {
518    O << ".func ";
519    printReturnValStr(*MF, O);
520  }
521
522  O << *CurrentFnSym;
523
524  emitFunctionParamList(*MF, O);
525
526  if (llvm::isKernelFunction(*F))
527    emitKernelFunctionDirectives(*F, O);
528
529  OutStreamer.EmitRawText(O.str());
530
531  prevDebugLoc = DebugLoc();
532}
533
534void NVPTXAsmPrinter::EmitFunctionBodyStart() {
535  VRegMapping.clear();
536  OutStreamer.EmitRawText(StringRef("{\n"));
537  setAndEmitFunctionVirtualRegisters(*MF);
538
539  SmallString<128> Str;
540  raw_svector_ostream O(Str);
541  emitDemotedVars(MF->getFunction(), O);
542  OutStreamer.EmitRawText(O.str());
543}
544
545void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
546  OutStreamer.EmitRawText(StringRef("}\n"));
547  VRegMapping.clear();
548}
549
550void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
551  unsigned RegNo = MI->getOperand(0).getReg();
552  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
553  if (TRI->isVirtualRegister(RegNo)) {
554    OutStreamer.AddComment(Twine("implicit-def: ") +
555                           getVirtualRegisterName(RegNo));
556  } else {
557    OutStreamer.AddComment(Twine("implicit-def: ") +
558                           TM.getRegisterInfo()->getName(RegNo));
559  }
560  OutStreamer.AddBlankLine();
561}
562
563void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
564                                                   raw_ostream &O) const {
565  // If the NVVM IR has some of reqntid* specified, then output
566  // the reqntid directive, and set the unspecified ones to 1.
567  // If none of reqntid* is specified, don't output reqntid directive.
568  unsigned reqntidx, reqntidy, reqntidz;
569  bool specified = false;
570  if (llvm::getReqNTIDx(F, reqntidx) == false)
571    reqntidx = 1;
572  else
573    specified = true;
574  if (llvm::getReqNTIDy(F, reqntidy) == false)
575    reqntidy = 1;
576  else
577    specified = true;
578  if (llvm::getReqNTIDz(F, reqntidz) == false)
579    reqntidz = 1;
580  else
581    specified = true;
582
583  if (specified)
584    O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
585      << "\n";
586
587  // If the NVVM IR has some of maxntid* specified, then output
588  // the maxntid directive, and set the unspecified ones to 1.
589  // If none of maxntid* is specified, don't output maxntid directive.
590  unsigned maxntidx, maxntidy, maxntidz;
591  specified = false;
592  if (llvm::getMaxNTIDx(F, maxntidx) == false)
593    maxntidx = 1;
594  else
595    specified = true;
596  if (llvm::getMaxNTIDy(F, maxntidy) == false)
597    maxntidy = 1;
598  else
599    specified = true;
600  if (llvm::getMaxNTIDz(F, maxntidz) == false)
601    maxntidz = 1;
602  else
603    specified = true;
604
605  if (specified)
606    O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
607      << "\n";
608
609  unsigned mincta;
610  if (llvm::getMinCTASm(F, mincta))
611    O << ".minnctapersm " << mincta << "\n";
612}
613
614std::string
615NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
616  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
617
618  std::string Name;
619  raw_string_ostream NameStr(Name);
620
621  VRegRCMap::const_iterator I = VRegMapping.find(RC);
622  assert(I != VRegMapping.end() && "Bad register class");
623  const DenseMap<unsigned, unsigned> &RegMap = I->second;
624
625  VRegMap::const_iterator VI = RegMap.find(Reg);
626  assert(VI != RegMap.end() && "Bad virtual register");
627  unsigned MappedVR = VI->second;
628
629  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
630
631  NameStr.flush();
632  return Name;
633}
634
635void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
636                                          raw_ostream &O) {
637  O << getVirtualRegisterName(vr);
638}
639
640void NVPTXAsmPrinter::printVecModifiedImmediate(
641    const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
642  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
643  int Imm = (int) MO.getImm();
644  if (0 == strcmp(Modifier, "vecelem"))
645    O << "_" << vecelem[Imm];
646  else if (0 == strcmp(Modifier, "vecv4comm1")) {
647    if ((Imm < 0) || (Imm > 3))
648      O << "//";
649  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
650    if ((Imm < 4) || (Imm > 7))
651      O << "//";
652  } else if (0 == strcmp(Modifier, "vecv4pos")) {
653    if (Imm < 0)
654      Imm = 0;
655    O << "_" << vecelem[Imm % 4];
656  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
657    if ((Imm < 0) || (Imm > 1))
658      O << "//";
659  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
660    if ((Imm < 2) || (Imm > 3))
661      O << "//";
662  } else if (0 == strcmp(Modifier, "vecv2pos")) {
663    if (Imm < 0)
664      Imm = 0;
665    O << "_" << vecelem[Imm % 2];
666  } else
667    llvm_unreachable("Unknown Modifier on immediate operand");
668}
669
670
671
672void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
673
674  emitLinkageDirective(F, O);
675  if (llvm::isKernelFunction(*F))
676    O << ".entry ";
677  else
678    O << ".func ";
679  printReturnValStr(F, O);
680  O << *getSymbol(F) << "\n";
681  emitFunctionParamList(F, O);
682  O << ";\n";
683}
684
685static bool usedInGlobalVarDef(const Constant *C) {
686  if (!C)
687    return false;
688
689  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
690    if (GV->getName().str() == "llvm.used")
691      return false;
692    return true;
693  }
694
695  for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
696       ui != ue; ++ui) {
697    const Constant *C = dyn_cast<Constant>(*ui);
698    if (usedInGlobalVarDef(C))
699      return true;
700  }
701  return false;
702}
703
704static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
705  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
706    if (othergv->getName().str() == "llvm.used")
707      return true;
708  }
709
710  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
711    if (instr->getParent() && instr->getParent()->getParent()) {
712      const Function *curFunc = instr->getParent()->getParent();
713      if (oneFunc && (curFunc != oneFunc))
714        return false;
715      oneFunc = curFunc;
716      return true;
717    } else
718      return false;
719  }
720
721  if (const MDNode *md = dyn_cast<MDNode>(U))
722    if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
723                          (md->getName().str() == "llvm.dbg.sp")))
724      return true;
725
726  for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end();
727       ui != ue; ++ui) {
728    if (usedInOneFunc(*ui, oneFunc) == false)
729      return false;
730  }
731  return true;
732}
733
734/* Find out if a global variable can be demoted to local scope.
735 * Currently, this is valid for CUDA shared variables, which have local
736 * scope and global lifetime. So the conditions to check are :
737 * 1. Is the global variable in shared address space?
738 * 2. Does it have internal linkage?
739 * 3. Is the global variable referenced only in one function?
740 */
741static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
742  if (gv->hasInternalLinkage() == false)
743    return false;
744  const PointerType *Pty = gv->getType();
745  if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
746    return false;
747
748  const Function *oneFunc = 0;
749
750  bool flag = usedInOneFunc(gv, oneFunc);
751  if (flag == false)
752    return false;
753  if (!oneFunc)
754    return false;
755  f = oneFunc;
756  return true;
757}
758
759static bool useFuncSeen(const Constant *C,
760                        llvm::DenseMap<const Function *, bool> &seenMap) {
761  for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
762       ui != ue; ++ui) {
763    if (const Constant *cu = dyn_cast<Constant>(*ui)) {
764      if (useFuncSeen(cu, seenMap))
765        return true;
766    } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
767      const BasicBlock *bb = I->getParent();
768      if (!bb)
769        continue;
770      const Function *caller = bb->getParent();
771      if (!caller)
772        continue;
773      if (seenMap.find(caller) != seenMap.end())
774        return true;
775    }
776  }
777  return false;
778}
779
780void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
781  llvm::DenseMap<const Function *, bool> seenMap;
782  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
783    const Function *F = FI;
784
785    if (F->isDeclaration()) {
786      if (F->use_empty())
787        continue;
788      if (F->getIntrinsicID())
789        continue;
790      emitDeclaration(F, O);
791      continue;
792    }
793    for (Value::const_use_iterator iter = F->use_begin(),
794                                   iterEnd = F->use_end();
795         iter != iterEnd; ++iter) {
796      if (const Constant *C = dyn_cast<Constant>(*iter)) {
797        if (usedInGlobalVarDef(C)) {
798          // The use is in the initialization of a global variable
799          // that is a function pointer, so print a declaration
800          // for the original function
801          emitDeclaration(F, O);
802          break;
803        }
804        // Emit a declaration of this function if the function that
805        // uses this constant expr has already been seen.
806        if (useFuncSeen(C, seenMap)) {
807          emitDeclaration(F, O);
808          break;
809        }
810      }
811
812      if (!isa<Instruction>(*iter))
813        continue;
814      const Instruction *instr = cast<Instruction>(*iter);
815      const BasicBlock *bb = instr->getParent();
816      if (!bb)
817        continue;
818      const Function *caller = bb->getParent();
819      if (!caller)
820        continue;
821
822      // If a caller has already been seen, then the caller is
823      // appearing in the module before the callee. so print out
824      // a declaration for the callee.
825      if (seenMap.find(caller) != seenMap.end()) {
826        emitDeclaration(F, O);
827        break;
828      }
829    }
830    seenMap[F] = true;
831  }
832}
833
834void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
835  DebugInfoFinder DbgFinder;
836  DbgFinder.processModule(M);
837
838  unsigned i = 1;
839  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
840                                 E = DbgFinder.compile_unit_end();
841       I != E; ++I) {
842    DICompileUnit DIUnit(*I);
843    StringRef Filename(DIUnit.getFilename());
844    StringRef Dirname(DIUnit.getDirectory());
845    SmallString<128> FullPathName = Dirname;
846    if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
847      sys::path::append(FullPathName, Filename);
848      Filename = FullPathName.str();
849    }
850    if (filenameMap.find(Filename.str()) != filenameMap.end())
851      continue;
852    filenameMap[Filename.str()] = i;
853    OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
854    ++i;
855  }
856
857  for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
858                                 E = DbgFinder.subprogram_end();
859       I != E; ++I) {
860    DISubprogram SP(*I);
861    StringRef Filename(SP.getFilename());
862    StringRef Dirname(SP.getDirectory());
863    SmallString<128> FullPathName = Dirname;
864    if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
865      sys::path::append(FullPathName, Filename);
866      Filename = FullPathName.str();
867    }
868    if (filenameMap.find(Filename.str()) != filenameMap.end())
869      continue;
870    filenameMap[Filename.str()] = i;
871    ++i;
872  }
873}
874
875bool NVPTXAsmPrinter::doInitialization(Module &M) {
876
877  SmallString<128> Str1;
878  raw_svector_ostream OS1(Str1);
879
880  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
881  MMI->AnalyzeModule(M);
882
883  // We need to call the parent's one explicitly.
884  //bool Result = AsmPrinter::doInitialization(M);
885
886  // Initialize TargetLoweringObjectFile.
887  const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
888      .Initialize(OutContext, TM);
889
890  Mang = new Mangler(&TM);
891
892  // Emit header before any dwarf directives are emitted below.
893  emitHeader(M, OS1);
894  OutStreamer.EmitRawText(OS1.str());
895
896  // Already commented out
897  //bool Result = AsmPrinter::doInitialization(M);
898
899  // Emit module-level inline asm if it exists.
900  if (!M.getModuleInlineAsm().empty()) {
901    OutStreamer.AddComment("Start of file scope inline assembly");
902    OutStreamer.AddBlankLine();
903    OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm()));
904    OutStreamer.AddBlankLine();
905    OutStreamer.AddComment("End of file scope inline assembly");
906    OutStreamer.AddBlankLine();
907  }
908
909  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
910    recordAndEmitFilenames(M);
911
912  GlobalsEmitted = false;
913
914  return false; // success
915}
916
917void NVPTXAsmPrinter::emitGlobals(const Module &M) {
918  SmallString<128> Str2;
919  raw_svector_ostream OS2(Str2);
920
921  emitDeclarations(M, OS2);
922
923  // As ptxas does not support forward references of globals, we need to first
924  // sort the list of module-level globals in def-use order. We visit each
925  // global variable in order, and ensure that we emit it *after* its dependent
926  // globals. We use a little extra memory maintaining both a set and a list to
927  // have fast searches while maintaining a strict ordering.
928  SmallVector<const GlobalVariable *, 8> Globals;
929  DenseSet<const GlobalVariable *> GVVisited;
930  DenseSet<const GlobalVariable *> GVVisiting;
931
932  // Visit each global variable, in order
933  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
934       I != E; ++I)
935    VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
936
937  assert(GVVisited.size() == M.getGlobalList().size() &&
938         "Missed a global variable");
939  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
940
941  // Print out module-level global variables in proper order
942  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
943    printModuleLevelGV(Globals[i], OS2);
944
945  OS2 << '\n';
946
947  OutStreamer.EmitRawText(OS2.str());
948}
949
950void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
951  O << "//\n";
952  O << "// Generated by LLVM NVPTX Back-End\n";
953  O << "//\n";
954  O << "\n";
955
956  unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
957  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
958
959  O << ".target ";
960  O << nvptxSubtarget.getTargetName();
961
962  if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
963    O << ", texmode_independent";
964  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
965    if (!nvptxSubtarget.hasDouble())
966      O << ", map_f64_to_f32";
967  }
968
969  if (MAI->doesSupportDebugInformation())
970    O << ", debug";
971
972  O << "\n";
973
974  O << ".address_size ";
975  if (nvptxSubtarget.is64Bit())
976    O << "64";
977  else
978    O << "32";
979  O << "\n";
980
981  O << "\n";
982}
983
984bool NVPTXAsmPrinter::doFinalization(Module &M) {
985
986  // If we did not emit any functions, then the global declarations have not
987  // yet been emitted.
988  if (!GlobalsEmitted) {
989    emitGlobals(M);
990    GlobalsEmitted = true;
991  }
992
993  // XXX Temproarily remove global variables so that doFinalization() will not
994  // emit them again (global variables are emitted at beginning).
995
996  Module::GlobalListType &global_list = M.getGlobalList();
997  int i, n = global_list.size();
998  GlobalVariable **gv_array = new GlobalVariable *[n];
999
1000  // first, back-up GlobalVariable in gv_array
1001  i = 0;
1002  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
1003       I != E; ++I)
1004    gv_array[i++] = &*I;
1005
1006  // second, empty global_list
1007  while (!global_list.empty())
1008    global_list.remove(global_list.begin());
1009
1010  // call doFinalization
1011  bool ret = AsmPrinter::doFinalization(M);
1012
1013  // now we restore global variables
1014  for (i = 0; i < n; i++)
1015    global_list.insert(global_list.end(), gv_array[i]);
1016
1017  delete[] gv_array;
1018  return ret;
1019
1020  //bool Result = AsmPrinter::doFinalization(M);
1021  // Instead of calling the parents doFinalization, we may
1022  // clone parents doFinalization and customize here.
1023  // Currently, we if NVISA out the EmitGlobals() in
1024  // parent's doFinalization, which is too intrusive.
1025  //
1026  // Same for the doInitialization.
1027  //return Result;
1028}
1029
1030// This function emits appropriate linkage directives for
1031// functions and global variables.
1032//
1033// extern function declaration            -> .extern
1034// extern function definition             -> .visible
1035// external global variable with init     -> .visible
1036// external without init                  -> .extern
1037// appending                              -> not allowed, assert.
1038
1039void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
1040                                           raw_ostream &O) {
1041  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
1042    if (V->hasExternalLinkage()) {
1043      if (isa<GlobalVariable>(V)) {
1044        const GlobalVariable *GVar = cast<GlobalVariable>(V);
1045        if (GVar) {
1046          if (GVar->hasInitializer())
1047            O << ".visible ";
1048          else
1049            O << ".extern ";
1050        }
1051      } else if (V->isDeclaration())
1052        O << ".extern ";
1053      else
1054        O << ".visible ";
1055    } else if (V->hasAppendingLinkage()) {
1056      std::string msg;
1057      msg.append("Error: ");
1058      msg.append("Symbol ");
1059      if (V->hasName())
1060        msg.append(V->getName().str());
1061      msg.append("has unsupported appending linkage type");
1062      llvm_unreachable(msg.c_str());
1063    }
1064  }
1065}
1066
1067void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1068                                         raw_ostream &O,
1069                                         bool processDemoted) {
1070
1071  // Skip meta data
1072  if (GVar->hasSection()) {
1073    if (GVar->getSection() == "llvm.metadata")
1074      return;
1075  }
1076
1077  const DataLayout *TD = TM.getDataLayout();
1078
1079  // GlobalVariables are always constant pointers themselves.
1080  const PointerType *PTy = GVar->getType();
1081  Type *ETy = PTy->getElementType();
1082
1083  if (GVar->hasExternalLinkage()) {
1084    if (GVar->hasInitializer())
1085      O << ".visible ";
1086    else
1087      O << ".extern ";
1088  }
1089
1090  if (llvm::isTexture(*GVar)) {
1091    O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
1092    return;
1093  }
1094
1095  if (llvm::isSurface(*GVar)) {
1096    O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
1097    return;
1098  }
1099
1100  if (GVar->isDeclaration()) {
1101    // (extern) declarations, no definition or initializer
1102    // Currently the only known declaration is for an automatic __local
1103    // (.shared) promoted to global.
1104    emitPTXGlobalVariable(GVar, O);
1105    O << ";\n";
1106    return;
1107  }
1108
1109  if (llvm::isSampler(*GVar)) {
1110    O << ".global .samplerref " << llvm::getSamplerName(*GVar);
1111
1112    const Constant *Initializer = NULL;
1113    if (GVar->hasInitializer())
1114      Initializer = GVar->getInitializer();
1115    const ConstantInt *CI = NULL;
1116    if (Initializer)
1117      CI = dyn_cast<ConstantInt>(Initializer);
1118    if (CI) {
1119      unsigned sample = CI->getZExtValue();
1120
1121      O << " = { ";
1122
1123      for (int i = 0,
1124               addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1125           i < 3; i++) {
1126        O << "addr_mode_" << i << " = ";
1127        switch (addr) {
1128        case 0:
1129          O << "wrap";
1130          break;
1131        case 1:
1132          O << "clamp_to_border";
1133          break;
1134        case 2:
1135          O << "clamp_to_edge";
1136          break;
1137        case 3:
1138          O << "wrap";
1139          break;
1140        case 4:
1141          O << "mirror";
1142          break;
1143        }
1144        O << ", ";
1145      }
1146      O << "filter_mode = ";
1147      switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1148      case 0:
1149        O << "nearest";
1150        break;
1151      case 1:
1152        O << "linear";
1153        break;
1154      case 2:
1155        assert(0 && "Anisotropic filtering is not supported");
1156      default:
1157        O << "nearest";
1158        break;
1159      }
1160      if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1161        O << ", force_unnormalized_coords = 1";
1162      }
1163      O << " }";
1164    }
1165
1166    O << ";\n";
1167    return;
1168  }
1169
1170  if (GVar->hasPrivateLinkage()) {
1171
1172    if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
1173      return;
1174
1175    // FIXME - need better way (e.g. Metadata) to avoid generating this global
1176    if (!strncmp(GVar->getName().data(), "filename", 8))
1177      return;
1178    if (GVar->use_empty())
1179      return;
1180  }
1181
1182  const Function *demotedFunc = 0;
1183  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1184    O << "// " << GVar->getName().str() << " has been demoted\n";
1185    if (localDecls.find(demotedFunc) != localDecls.end())
1186      localDecls[demotedFunc].push_back(GVar);
1187    else {
1188      std::vector<const GlobalVariable *> temp;
1189      temp.push_back(GVar);
1190      localDecls[demotedFunc] = temp;
1191    }
1192    return;
1193  }
1194
1195  O << ".";
1196  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1197  if (GVar->getAlignment() == 0)
1198    O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
1199  else
1200    O << " .align " << GVar->getAlignment();
1201
1202  if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
1203    O << " .";
1204    // Special case: ABI requires that we use .u8 for predicates
1205    if (ETy->isIntegerTy(1))
1206      O << "u8";
1207    else
1208      O << getPTXFundamentalTypeStr(ETy, false);
1209    O << " ";
1210    O << *getSymbol(GVar);
1211
1212    // Ptx allows variable initilization only for constant and global state
1213    // spaces.
1214    if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
1215         (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
1216        GVar->hasInitializer()) {
1217      const Constant *Initializer = GVar->getInitializer();
1218      if (!Initializer->isNullValue()) {
1219        O << " = ";
1220        printScalarConstant(Initializer, O);
1221      }
1222    }
1223  } else {
1224    unsigned int ElementSize = 0;
1225
1226    // Although PTX has direct support for struct type and array type and
1227    // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1228    // targets that support these high level field accesses. Structs, arrays
1229    // and vectors are lowered into arrays of bytes.
1230    switch (ETy->getTypeID()) {
1231    case Type::StructTyID:
1232    case Type::ArrayTyID:
1233    case Type::VectorTyID:
1234      ElementSize = TD->getTypeStoreSize(ETy);
1235      // Ptx allows variable initilization only for constant and
1236      // global state spaces.
1237      if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
1238           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
1239          GVar->hasInitializer()) {
1240        const Constant *Initializer = GVar->getInitializer();
1241        if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1242          AggBuffer aggBuffer(ElementSize, O, *this);
1243          bufferAggregateConstant(Initializer, &aggBuffer);
1244          if (aggBuffer.numSymbols) {
1245            if (nvptxSubtarget.is64Bit()) {
1246              O << " .u64 " << *getSymbol(GVar) << "[";
1247              O << ElementSize / 8;
1248            } else {
1249              O << " .u32 " << *getSymbol(GVar) << "[";
1250              O << ElementSize / 4;
1251            }
1252            O << "]";
1253          } else {
1254            O << " .b8 " << *getSymbol(GVar) << "[";
1255            O << ElementSize;
1256            O << "]";
1257          }
1258          O << " = {";
1259          aggBuffer.print();
1260          O << "}";
1261        } else {
1262          O << " .b8 " << *getSymbol(GVar);
1263          if (ElementSize) {
1264            O << "[";
1265            O << ElementSize;
1266            O << "]";
1267          }
1268        }
1269      } else {
1270        O << " .b8 " << *getSymbol(GVar);
1271        if (ElementSize) {
1272          O << "[";
1273          O << ElementSize;
1274          O << "]";
1275        }
1276      }
1277      break;
1278    default:
1279      assert(0 && "type not supported yet");
1280    }
1281
1282  }
1283  O << ";\n";
1284}
1285
1286void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1287  if (localDecls.find(f) == localDecls.end())
1288    return;
1289
1290  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1291
1292  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
1293    O << "\t// demoted variable\n\t";
1294    printModuleLevelGV(gvars[i], O, true);
1295  }
1296}
1297
1298void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1299                                          raw_ostream &O) const {
1300  switch (AddressSpace) {
1301  case llvm::ADDRESS_SPACE_LOCAL:
1302    O << "local";
1303    break;
1304  case llvm::ADDRESS_SPACE_GLOBAL:
1305    O << "global";
1306    break;
1307  case llvm::ADDRESS_SPACE_CONST:
1308    O << "const";
1309    break;
1310  case llvm::ADDRESS_SPACE_SHARED:
1311    O << "shared";
1312    break;
1313  default:
1314    report_fatal_error("Bad address space found while emitting PTX");
1315    break;
1316  }
1317}
1318
1319std::string
1320NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
1321  switch (Ty->getTypeID()) {
1322  default:
1323    llvm_unreachable("unexpected type");
1324    break;
1325  case Type::IntegerTyID: {
1326    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1327    if (NumBits == 1)
1328      return "pred";
1329    else if (NumBits <= 64) {
1330      std::string name = "u";
1331      return name + utostr(NumBits);
1332    } else {
1333      llvm_unreachable("Integer too large");
1334      break;
1335    }
1336    break;
1337  }
1338  case Type::FloatTyID:
1339    return "f32";
1340  case Type::DoubleTyID:
1341    return "f64";
1342  case Type::PointerTyID:
1343    if (nvptxSubtarget.is64Bit())
1344      if (useB4PTR)
1345        return "b64";
1346      else
1347        return "u64";
1348    else if (useB4PTR)
1349      return "b32";
1350    else
1351      return "u32";
1352  }
1353  llvm_unreachable("unexpected type");
1354  return NULL;
1355}
1356
1357void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1358                                            raw_ostream &O) {
1359
1360  const DataLayout *TD = TM.getDataLayout();
1361
1362  // GlobalVariables are always constant pointers themselves.
1363  const PointerType *PTy = GVar->getType();
1364  Type *ETy = PTy->getElementType();
1365
1366  O << ".";
1367  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1368  if (GVar->getAlignment() == 0)
1369    O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
1370  else
1371    O << " .align " << GVar->getAlignment();
1372
1373  if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
1374    O << " .";
1375    O << getPTXFundamentalTypeStr(ETy);
1376    O << " ";
1377    O << *getSymbol(GVar);
1378    return;
1379  }
1380
1381  int64_t ElementSize = 0;
1382
1383  // Although PTX has direct support for struct type and array type and LLVM IR
1384  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1385  // support these high level field accesses. Structs and arrays are lowered
1386  // into arrays of bytes.
1387  switch (ETy->getTypeID()) {
1388  case Type::StructTyID:
1389  case Type::ArrayTyID:
1390  case Type::VectorTyID:
1391    ElementSize = TD->getTypeStoreSize(ETy);
1392    O << " .b8 " << *getSymbol(GVar) << "[";
1393    if (ElementSize) {
1394      O << itostr(ElementSize);
1395    }
1396    O << "]";
1397    break;
1398  default:
1399    assert(0 && "type not supported yet");
1400  }
1401  return;
1402}
1403
1404static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
1405  if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
1406    return TD->getPrefTypeAlignment(Ty);
1407
1408  const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
1409  if (ATy)
1410    return getOpenCLAlignment(TD, ATy->getElementType());
1411
1412  const VectorType *VTy = dyn_cast<VectorType>(Ty);
1413  if (VTy) {
1414    Type *ETy = VTy->getElementType();
1415    unsigned int numE = VTy->getNumElements();
1416    unsigned int alignE = TD->getPrefTypeAlignment(ETy);
1417    if (numE == 3)
1418      return 4 * alignE;
1419    else
1420      return numE * alignE;
1421  }
1422
1423  const StructType *STy = dyn_cast<StructType>(Ty);
1424  if (STy) {
1425    unsigned int alignStruct = 1;
1426    // Go through each element of the struct and find the
1427    // largest alignment.
1428    for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1429      Type *ETy = STy->getElementType(i);
1430      unsigned int align = getOpenCLAlignment(TD, ETy);
1431      if (align > alignStruct)
1432        alignStruct = align;
1433    }
1434    return alignStruct;
1435  }
1436
1437  const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
1438  if (FTy)
1439    return TD->getPointerPrefAlignment();
1440  return TD->getPrefTypeAlignment(Ty);
1441}
1442
1443void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1444                                     int paramIndex, raw_ostream &O) {
1445  if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
1446      (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
1447    O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
1448  else {
1449    std::string argName = I->getName();
1450    const char *p = argName.c_str();
1451    while (*p) {
1452      if (*p == '.')
1453        O << "_";
1454      else
1455        O << *p;
1456      p++;
1457    }
1458  }
1459}
1460
1461void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
1462  Function::const_arg_iterator I, E;
1463  int i = 0;
1464
1465  if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
1466      (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
1467    O << *CurrentFnSym << "_param_" << paramIndex;
1468    return;
1469  }
1470
1471  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
1472    if (i == paramIndex) {
1473      printParamName(I, paramIndex, O);
1474      return;
1475    }
1476  }
1477  llvm_unreachable("paramIndex out of bound");
1478}
1479
1480void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1481  const DataLayout *TD = TM.getDataLayout();
1482  const AttributeSet &PAL = F->getAttributes();
1483  const TargetLowering *TLI = TM.getTargetLowering();
1484  Function::const_arg_iterator I, E;
1485  unsigned paramIndex = 0;
1486  bool first = true;
1487  bool isKernelFunc = llvm::isKernelFunction(*F);
1488  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
1489  MVT thePointerTy = TLI->getPointerTy();
1490
1491  O << "(\n";
1492
1493  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1494    Type *Ty = I->getType();
1495
1496    if (!first)
1497      O << ",\n";
1498
1499    first = false;
1500
1501    // Handle image/sampler parameters
1502    if (llvm::isSampler(*I) || llvm::isImage(*I)) {
1503      if (llvm::isImage(*I)) {
1504        std::string sname = I->getName();
1505        if (llvm::isImageWriteOnly(*I))
1506          O << "\t.param .surfref " << *getSymbol(F) << "_param_"
1507            << paramIndex;
1508        else // Default image is read_only
1509          O << "\t.param .texref " << *getSymbol(F) << "_param_"
1510            << paramIndex;
1511      } else // Should be llvm::isSampler(*I)
1512        O << "\t.param .samplerref " << *getSymbol(F) << "_param_"
1513          << paramIndex;
1514      continue;
1515    }
1516
1517    if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
1518      if (Ty->isVectorTy()) {
1519        // Just print .param .b8 .align <a> .param[size];
1520        // <a> = PAL.getparamalignment
1521        // size = typeallocsize of element type
1522        unsigned align = PAL.getParamAlignment(paramIndex + 1);
1523        if (align == 0)
1524          align = TD->getABITypeAlignment(Ty);
1525
1526        unsigned sz = TD->getTypeAllocSize(Ty);
1527        O << "\t.param .align " << align << " .b8 ";
1528        printParamName(I, paramIndex, O);
1529        O << "[" << sz << "]";
1530
1531        continue;
1532      }
1533      // Just a scalar
1534      const PointerType *PTy = dyn_cast<PointerType>(Ty);
1535      if (isKernelFunc) {
1536        if (PTy) {
1537          // Special handling for pointer arguments to kernel
1538          O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1539
1540          if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
1541            Type *ETy = PTy->getElementType();
1542            int addrSpace = PTy->getAddressSpace();
1543            switch (addrSpace) {
1544            default:
1545              O << ".ptr ";
1546              break;
1547            case llvm::ADDRESS_SPACE_CONST:
1548              O << ".ptr .const ";
1549              break;
1550            case llvm::ADDRESS_SPACE_SHARED:
1551              O << ".ptr .shared ";
1552              break;
1553            case llvm::ADDRESS_SPACE_GLOBAL:
1554              O << ".ptr .global ";
1555              break;
1556            }
1557            O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
1558          }
1559          printParamName(I, paramIndex, O);
1560          continue;
1561        }
1562
1563        // non-pointer scalar to kernel func
1564        O << "\t.param .";
1565        // Special case: predicate operands become .u8 types
1566        if (Ty->isIntegerTy(1))
1567          O << "u8";
1568        else
1569          O << getPTXFundamentalTypeStr(Ty);
1570        O << " ";
1571        printParamName(I, paramIndex, O);
1572        continue;
1573      }
1574      // Non-kernel function, just print .param .b<size> for ABI
1575      // and .reg .b<size> for non ABY
1576      unsigned sz = 0;
1577      if (isa<IntegerType>(Ty)) {
1578        sz = cast<IntegerType>(Ty)->getBitWidth();
1579        if (sz < 32)
1580          sz = 32;
1581      } else if (isa<PointerType>(Ty))
1582        sz = thePointerTy.getSizeInBits();
1583      else
1584        sz = Ty->getPrimitiveSizeInBits();
1585      if (isABI)
1586        O << "\t.param .b" << sz << " ";
1587      else
1588        O << "\t.reg .b" << sz << " ";
1589      printParamName(I, paramIndex, O);
1590      continue;
1591    }
1592
1593    // param has byVal attribute. So should be a pointer
1594    const PointerType *PTy = dyn_cast<PointerType>(Ty);
1595    assert(PTy && "Param with byval attribute should be a pointer type");
1596    Type *ETy = PTy->getElementType();
1597
1598    if (isABI || isKernelFunc) {
1599      // Just print .param .b8 .align <a> .param[size];
1600      // <a> = PAL.getparamalignment
1601      // size = typeallocsize of element type
1602      unsigned align = PAL.getParamAlignment(paramIndex + 1);
1603      if (align == 0)
1604        align = TD->getABITypeAlignment(ETy);
1605
1606      unsigned sz = TD->getTypeAllocSize(ETy);
1607      O << "\t.param .align " << align << " .b8 ";
1608      printParamName(I, paramIndex, O);
1609      O << "[" << sz << "]";
1610      continue;
1611    } else {
1612      // Split the ETy into constituent parts and
1613      // print .param .b<size> <name> for each part.
1614      // Further, if a part is vector, print the above for
1615      // each vector element.
1616      SmallVector<EVT, 16> vtparts;
1617      ComputeValueVTs(*TLI, ETy, vtparts);
1618      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1619        unsigned elems = 1;
1620        EVT elemtype = vtparts[i];
1621        if (vtparts[i].isVector()) {
1622          elems = vtparts[i].getVectorNumElements();
1623          elemtype = vtparts[i].getVectorElementType();
1624        }
1625
1626        for (unsigned j = 0, je = elems; j != je; ++j) {
1627          unsigned sz = elemtype.getSizeInBits();
1628          if (elemtype.isInteger() && (sz < 32))
1629            sz = 32;
1630          O << "\t.reg .b" << sz << " ";
1631          printParamName(I, paramIndex, O);
1632          if (j < je - 1)
1633            O << ",\n";
1634          ++paramIndex;
1635        }
1636        if (i < e - 1)
1637          O << ",\n";
1638      }
1639      --paramIndex;
1640      continue;
1641    }
1642  }
1643
1644  O << "\n)\n";
1645}
1646
1647void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1648                                            raw_ostream &O) {
1649  const Function *F = MF.getFunction();
1650  emitFunctionParamList(F, O);
1651}
1652
1653void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1654    const MachineFunction &MF) {
1655  SmallString<128> Str;
1656  raw_svector_ostream O(Str);
1657
1658  // Map the global virtual register number to a register class specific
1659  // virtual register number starting from 1 with that class.
1660  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
1661  //unsigned numRegClasses = TRI->getNumRegClasses();
1662
1663  // Emit the Fake Stack Object
1664  const MachineFrameInfo *MFI = MF.getFrameInfo();
1665  int NumBytes = (int) MFI->getStackSize();
1666  if (NumBytes) {
1667    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
1668      << getFunctionNumber() << "[" << NumBytes << "];\n";
1669    if (nvptxSubtarget.is64Bit()) {
1670      O << "\t.reg .b64 \t%SP;\n";
1671      O << "\t.reg .b64 \t%SPL;\n";
1672    } else {
1673      O << "\t.reg .b32 \t%SP;\n";
1674      O << "\t.reg .b32 \t%SPL;\n";
1675    }
1676  }
1677
1678  // Go through all virtual registers to establish the mapping between the
1679  // global virtual
1680  // register number and the per class virtual register number.
1681  // We use the per class virtual register number in the ptx output.
1682  unsigned int numVRs = MRI->getNumVirtRegs();
1683  for (unsigned i = 0; i < numVRs; i++) {
1684    unsigned int vr = TRI->index2VirtReg(i);
1685    const TargetRegisterClass *RC = MRI->getRegClass(vr);
1686    DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1687    int n = regmap.size();
1688    regmap.insert(std::make_pair(vr, n + 1));
1689  }
1690
1691  // Emit register declarations
1692  // @TODO: Extract out the real register usage
1693  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1694  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1695  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1696  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1697  // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
1698  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1699  // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
1700
1701  // Emit declaration of the virtual registers or 'physical' registers for
1702  // each register class
1703  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1704    const TargetRegisterClass *RC = TRI->getRegClass(i);
1705    DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1706    std::string rcname = getNVPTXRegClassName(RC);
1707    std::string rcStr = getNVPTXRegClassStr(RC);
1708    int n = regmap.size();
1709
1710    // Only declare those registers that may be used.
1711    if (n) {
1712       O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1713         << ">;\n";
1714    }
1715  }
1716
1717  OutStreamer.EmitRawText(O.str());
1718}
1719
1720void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1721  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1722  bool ignored;
1723  unsigned int numHex;
1724  const char *lead;
1725
1726  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1727    numHex = 8;
1728    lead = "0f";
1729    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
1730  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1731    numHex = 16;
1732    lead = "0d";
1733    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
1734  } else
1735    llvm_unreachable("unsupported fp type");
1736
1737  APInt API = APF.bitcastToAPInt();
1738  std::string hexstr(utohexstr(API.getZExtValue()));
1739  O << lead;
1740  if (hexstr.length() < numHex)
1741    O << std::string(numHex - hexstr.length(), '0');
1742  O << utohexstr(API.getZExtValue());
1743}
1744
1745void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1746  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1747    O << CI->getValue();
1748    return;
1749  }
1750  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1751    printFPConstant(CFP, O);
1752    return;
1753  }
1754  if (isa<ConstantPointerNull>(CPV)) {
1755    O << "0";
1756    return;
1757  }
1758  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1759    O << *getSymbol(GVar);
1760    return;
1761  }
1762  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1763    const Value *v = Cexpr->stripPointerCasts();
1764    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1765      O << *getSymbol(GVar);
1766      return;
1767    } else {
1768      O << *LowerConstant(CPV, *this);
1769      return;
1770    }
1771  }
1772  llvm_unreachable("Not scalar type found in printScalarConstant()");
1773}
1774
1775void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1776                                   AggBuffer *aggBuffer) {
1777
1778  const DataLayout *TD = TM.getDataLayout();
1779
1780  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1781    int s = TD->getTypeAllocSize(CPV->getType());
1782    if (s < Bytes)
1783      s = Bytes;
1784    aggBuffer->addZeros(s);
1785    return;
1786  }
1787
1788  unsigned char *ptr;
1789  switch (CPV->getType()->getTypeID()) {
1790
1791  case Type::IntegerTyID: {
1792    const Type *ETy = CPV->getType();
1793    if (ETy == Type::getInt8Ty(CPV->getContext())) {
1794      unsigned char c =
1795          (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
1796      ptr = &c;
1797      aggBuffer->addBytes(ptr, 1, Bytes);
1798    } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
1799      short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
1800      ptr = (unsigned char *)&int16;
1801      aggBuffer->addBytes(ptr, 2, Bytes);
1802    } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
1803      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1804        int int32 = (int)(constInt->getZExtValue());
1805        ptr = (unsigned char *)&int32;
1806        aggBuffer->addBytes(ptr, 4, Bytes);
1807        break;
1808      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1809        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
1810                ConstantFoldConstantExpression(Cexpr, TD))) {
1811          int int32 = (int)(constInt->getZExtValue());
1812          ptr = (unsigned char *)&int32;
1813          aggBuffer->addBytes(ptr, 4, Bytes);
1814          break;
1815        }
1816        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1817          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1818          aggBuffer->addSymbol(v);
1819          aggBuffer->addZeros(4);
1820          break;
1821        }
1822      }
1823      llvm_unreachable("unsupported integer const type");
1824    } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
1825      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1826        long long int64 = (long long)(constInt->getZExtValue());
1827        ptr = (unsigned char *)&int64;
1828        aggBuffer->addBytes(ptr, 8, Bytes);
1829        break;
1830      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1831        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
1832                ConstantFoldConstantExpression(Cexpr, TD))) {
1833          long long int64 = (long long)(constInt->getZExtValue());
1834          ptr = (unsigned char *)&int64;
1835          aggBuffer->addBytes(ptr, 8, Bytes);
1836          break;
1837        }
1838        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1839          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1840          aggBuffer->addSymbol(v);
1841          aggBuffer->addZeros(8);
1842          break;
1843        }
1844      }
1845      llvm_unreachable("unsupported integer const type");
1846    } else
1847      llvm_unreachable("unsupported integer const type");
1848    break;
1849  }
1850  case Type::FloatTyID:
1851  case Type::DoubleTyID: {
1852    const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
1853    const Type *Ty = CFP->getType();
1854    if (Ty == Type::getFloatTy(CPV->getContext())) {
1855      float float32 = (float) CFP->getValueAPF().convertToFloat();
1856      ptr = (unsigned char *)&float32;
1857      aggBuffer->addBytes(ptr, 4, Bytes);
1858    } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
1859      double float64 = CFP->getValueAPF().convertToDouble();
1860      ptr = (unsigned char *)&float64;
1861      aggBuffer->addBytes(ptr, 8, Bytes);
1862    } else {
1863      llvm_unreachable("unsupported fp const type");
1864    }
1865    break;
1866  }
1867  case Type::PointerTyID: {
1868    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1869      aggBuffer->addSymbol(GVar);
1870    } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1871      const Value *v = Cexpr->stripPointerCasts();
1872      aggBuffer->addSymbol(v);
1873    }
1874    unsigned int s = TD->getTypeAllocSize(CPV->getType());
1875    aggBuffer->addZeros(s);
1876    break;
1877  }
1878
1879  case Type::ArrayTyID:
1880  case Type::VectorTyID:
1881  case Type::StructTyID: {
1882    if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
1883        isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
1884      int ElementSize = TD->getTypeAllocSize(CPV->getType());
1885      bufferAggregateConstant(CPV, aggBuffer);
1886      if (Bytes > ElementSize)
1887        aggBuffer->addZeros(Bytes - ElementSize);
1888    } else if (isa<ConstantAggregateZero>(CPV))
1889      aggBuffer->addZeros(Bytes);
1890    else
1891      llvm_unreachable("Unexpected Constant type");
1892    break;
1893  }
1894
1895  default:
1896    llvm_unreachable("unsupported type");
1897  }
1898}
1899
1900void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1901                                              AggBuffer *aggBuffer) {
1902  const DataLayout *TD = TM.getDataLayout();
1903  int Bytes;
1904
1905  // Old constants
1906  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1907    if (CPV->getNumOperands())
1908      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1909        bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1910    return;
1911  }
1912
1913  if (const ConstantDataSequential *CDS =
1914          dyn_cast<ConstantDataSequential>(CPV)) {
1915    if (CDS->getNumElements())
1916      for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1917        bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1918                     aggBuffer);
1919    return;
1920  }
1921
1922  if (isa<ConstantStruct>(CPV)) {
1923    if (CPV->getNumOperands()) {
1924      StructType *ST = cast<StructType>(CPV->getType());
1925      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1926        if (i == (e - 1))
1927          Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
1928                  TD->getTypeAllocSize(ST) -
1929                  TD->getStructLayout(ST)->getElementOffset(i);
1930        else
1931          Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
1932                  TD->getStructLayout(ST)->getElementOffset(i);
1933        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1934      }
1935    }
1936    return;
1937  }
1938  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1939}
1940
1941// buildTypeNameMap - Run through symbol table looking for type names.
1942//
1943
1944bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
1945
1946  std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
1947
1948  if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
1949                                  !PI->second.compare("struct._image2d_t") ||
1950                                  !PI->second.compare("struct._image3d_t")))
1951    return true;
1952
1953  return false;
1954}
1955
1956
1957bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
1958  switch (MI.getOpcode()) {
1959  default:
1960    return false;
1961  case NVPTX::CallArgBeginInst:
1962  case NVPTX::CallArgEndInst0:
1963  case NVPTX::CallArgEndInst1:
1964  case NVPTX::CallArgF32:
1965  case NVPTX::CallArgF64:
1966  case NVPTX::CallArgI16:
1967  case NVPTX::CallArgI32:
1968  case NVPTX::CallArgI32imm:
1969  case NVPTX::CallArgI64:
1970  case NVPTX::CallArgParam:
1971  case NVPTX::CallVoidInst:
1972  case NVPTX::CallVoidInstReg:
1973  case NVPTX::Callseq_End:
1974  case NVPTX::CallVoidInstReg64:
1975  case NVPTX::DeclareParamInst:
1976  case NVPTX::DeclareRetMemInst:
1977  case NVPTX::DeclareRetRegInst:
1978  case NVPTX::DeclareRetScalarInst:
1979  case NVPTX::DeclareScalarParamInst:
1980  case NVPTX::DeclareScalarRegInst:
1981  case NVPTX::StoreParamF32:
1982  case NVPTX::StoreParamF64:
1983  case NVPTX::StoreParamI16:
1984  case NVPTX::StoreParamI32:
1985  case NVPTX::StoreParamI64:
1986  case NVPTX::StoreParamI8:
1987  case NVPTX::StoreRetvalF32:
1988  case NVPTX::StoreRetvalF64:
1989  case NVPTX::StoreRetvalI16:
1990  case NVPTX::StoreRetvalI32:
1991  case NVPTX::StoreRetvalI64:
1992  case NVPTX::StoreRetvalI8:
1993  case NVPTX::LastCallArgF32:
1994  case NVPTX::LastCallArgF64:
1995  case NVPTX::LastCallArgI16:
1996  case NVPTX::LastCallArgI32:
1997  case NVPTX::LastCallArgI32imm:
1998  case NVPTX::LastCallArgI64:
1999  case NVPTX::LastCallArgParam:
2000  case NVPTX::LoadParamMemF32:
2001  case NVPTX::LoadParamMemF64:
2002  case NVPTX::LoadParamMemI16:
2003  case NVPTX::LoadParamMemI32:
2004  case NVPTX::LoadParamMemI64:
2005  case NVPTX::LoadParamMemI8:
2006  case NVPTX::PrototypeInst:
2007  case NVPTX::DBG_VALUE:
2008    return true;
2009  }
2010  return false;
2011}
2012
2013/// PrintAsmOperand - Print out an operand for an inline asm expression.
2014///
2015bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2016                                      unsigned AsmVariant,
2017                                      const char *ExtraCode, raw_ostream &O) {
2018  if (ExtraCode && ExtraCode[0]) {
2019    if (ExtraCode[1] != 0)
2020      return true; // Unknown modifier.
2021
2022    switch (ExtraCode[0]) {
2023    default:
2024      // See if this is a generic print operand
2025      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
2026    case 'r':
2027      break;
2028    }
2029  }
2030
2031  printOperand(MI, OpNo, O);
2032
2033  return false;
2034}
2035
2036bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
2037    const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
2038    const char *ExtraCode, raw_ostream &O) {
2039  if (ExtraCode && ExtraCode[0])
2040    return true; // Unknown modifier
2041
2042  O << '[';
2043  printMemOperand(MI, OpNo, O);
2044  O << ']';
2045
2046  return false;
2047}
2048
2049void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2050                                   raw_ostream &O, const char *Modifier) {
2051  const MachineOperand &MO = MI->getOperand(opNum);
2052  switch (MO.getType()) {
2053  case MachineOperand::MO_Register:
2054    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
2055      if (MO.getReg() == NVPTX::VRDepot)
2056        O << DEPOTNAME << getFunctionNumber();
2057      else
2058        O << NVPTXInstPrinter::getRegisterName(MO.getReg());
2059    } else {
2060      emitVirtualRegister(MO.getReg(), O);
2061    }
2062    return;
2063
2064  case MachineOperand::MO_Immediate:
2065    if (!Modifier)
2066      O << MO.getImm();
2067    else if (strstr(Modifier, "vec") == Modifier)
2068      printVecModifiedImmediate(MO, Modifier, O);
2069    else
2070      llvm_unreachable(
2071          "Don't know how to handle modifier on immediate operand");
2072    return;
2073
2074  case MachineOperand::MO_FPImmediate:
2075    printFPConstant(MO.getFPImm(), O);
2076    break;
2077
2078  case MachineOperand::MO_GlobalAddress:
2079    O << *getSymbol(MO.getGlobal());
2080    break;
2081
2082  case MachineOperand::MO_ExternalSymbol: {
2083    const char *symbname = MO.getSymbolName();
2084    if (strstr(symbname, ".PARAM") == symbname) {
2085      unsigned index;
2086      sscanf(symbname + 6, "%u[];", &index);
2087      printParamName(index, O);
2088    } else if (strstr(symbname, ".HLPPARAM") == symbname) {
2089      unsigned index;
2090      sscanf(symbname + 9, "%u[];", &index);
2091      O << *CurrentFnSym << "_param_" << index << "_offset";
2092    } else
2093      O << symbname;
2094    break;
2095  }
2096
2097  case MachineOperand::MO_MachineBasicBlock:
2098    O << *MO.getMBB()->getSymbol();
2099    return;
2100
2101  default:
2102    llvm_unreachable("Operand type not supported.");
2103  }
2104}
2105
2106void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2107                                      raw_ostream &O, const char *Modifier) {
2108  printOperand(MI, opNum, O);
2109
2110  if (Modifier && !strcmp(Modifier, "add")) {
2111    O << ", ";
2112    printOperand(MI, opNum + 1, O);
2113  } else {
2114    if (MI->getOperand(opNum + 1).isImm() &&
2115        MI->getOperand(opNum + 1).getImm() == 0)
2116      return; // don't print ',0' or '+0'
2117    O << "+";
2118    printOperand(MI, opNum + 1, O);
2119  }
2120}
2121
2122
2123// Force static initialization.
2124extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
2125  RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
2126  RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
2127}
2128
2129void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
2130  std::stringstream temp;
2131  LineReader *reader = this->getReader(filename.str());
2132  temp << "\n//";
2133  temp << filename.str();
2134  temp << ":";
2135  temp << line;
2136  temp << " ";
2137  temp << reader->readLine(line);
2138  temp << "\n";
2139  this->OutStreamer.EmitRawText(Twine(temp.str()));
2140}
2141
2142LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
2143  if (reader == NULL) {
2144    reader = new LineReader(filename);
2145  }
2146
2147  if (reader->fileName() != filename) {
2148    delete reader;
2149    reader = new LineReader(filename);
2150  }
2151
2152  return reader;
2153}
2154
2155std::string LineReader::readLine(unsigned lineNum) {
2156  if (lineNum < theCurLine) {
2157    theCurLine = 0;
2158    fstr.seekg(0, std::ios::beg);
2159  }
2160  while (theCurLine < lineNum) {
2161    fstr.getline(buff, 500);
2162    theCurLine++;
2163  }
2164  return buff;
2165}
2166
2167// Force static initialization.
2168extern "C" void LLVMInitializeNVPTXAsmPrinter() {
2169  RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
2170  RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
2171}
2172