NVPTXAsmPrinter.cpp revision cd81d94322a39503e4a3e87b6ee03d4fcb3465fb
146d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)//===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
246d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)//
346d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)//                     The LLVM Compiler Infrastructure
446d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)//
546d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)// This file is distributed under the University of Illinois Open Source
646d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)// License. See LICENSE.TXT for details.
746d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)//
846d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)//===----------------------------------------------------------------------===//
946d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)//
1046d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)// This file contains a printer that converts from our internal representation
1146d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)// of machine-dependent LLVM code to NVPTX assembly language.
1246d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)//
1346d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)//===----------------------------------------------------------------------===//
1446d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)
1546d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "NVPTXAsmPrinter.h"
1646d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "InstPrinter/NVPTXInstPrinter.h"
1746d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "MCTargetDesc/NVPTXMCAsmInfo.h"
1846d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "NVPTX.h"
1946d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "NVPTXInstrInfo.h"
2046d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "NVPTXMachineFunctionInfo.h"
2146d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "NVPTXMCExpr.h"
2246d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "NVPTXRegisterInfo.h"
2346d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "NVPTXTargetMachine.h"
2446d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "NVPTXUtilities.h"
2546d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "cl_common_defines.h"
2646d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/ADT/StringExtras.h"
2746d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/Analysis/ConstantFolding.h"
2846d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/CodeGen/Analysis.h"
2946d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/CodeGen/MachineFrameInfo.h"
3046d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/CodeGen/MachineModuleInfo.h"
3146d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/CodeGen/MachineRegisterInfo.h"
3246d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/IR/DebugInfo.h"
3346d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/IR/DerivedTypes.h"
3446d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/IR/Function.h"
3546d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/IR/GlobalVariable.h"
3646d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/IR/Mangler.h"
3746d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/IR/Module.h"
3846d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/IR/Operator.h"
3946d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/MC/MCStreamer.h"
4046d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/MC/MCSymbol.h"
4146d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/Support/CommandLine.h"
4246d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/Support/ErrorHandling.h"
4346d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/Support/FormattedStream.h"
4446d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/Support/Path.h"
4546d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/Support/TargetRegistry.h"
4646d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/Support/TimeValue.h"
4746d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include "llvm/Target/TargetLoweringObjectFile.h"
4846d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#include <sstream>
4946d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)using namespace llvm;
5046d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)
5146d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)#define DEPOTNAME "__local_depot"
5246d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)
5346d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)static cl::opt<bool>
5446d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden,
5546d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)                cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
5646d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)                cl::init(true));
5746d4c2bc3267f3f028f39e7e311b0f89aba2e4fdTorne (Richard Coles)
58static cl::opt<bool>
59InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden,
60              cl::desc("NVPTX Specific: Emit source line in ptx file"),
61              cl::init(false));
62
63namespace {
64/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
65/// depends.
66void DiscoverDependentGlobals(const Value *V,
67                              DenseSet<const GlobalVariable *> &Globals) {
68  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
69    Globals.insert(GV);
70  else {
71    if (const User *U = dyn_cast<User>(V)) {
72      for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
73        DiscoverDependentGlobals(U->getOperand(i), Globals);
74      }
75    }
76  }
77}
78
79/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
80/// instances to be emitted, but only after any dependents have been added
81/// first.
82void VisitGlobalVariableForEmission(
83    const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
84    DenseSet<const GlobalVariable *> &Visited,
85    DenseSet<const GlobalVariable *> &Visiting) {
86  // Have we already visited this one?
87  if (Visited.count(GV))
88    return;
89
90  // Do we have a circular dependency?
91  if (Visiting.count(GV))
92    report_fatal_error("Circular dependency found in global variable set");
93
94  // Start visiting this global
95  Visiting.insert(GV);
96
97  // Make sure we visit all dependents first
98  DenseSet<const GlobalVariable *> Others;
99  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
100    DiscoverDependentGlobals(GV->getOperand(i), Others);
101
102  for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
103                                                  E = Others.end();
104       I != E; ++I)
105    VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
106
107  // Now we can visit ourself
108  Order.push_back(GV);
109  Visited.insert(GV);
110  Visiting.erase(GV);
111}
112}
113
114// @TODO: This is a copy from AsmPrinter.cpp.  The function is static, so we
115// cannot just link to the existing version.
116/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
117///
118using namespace nvptx;
119const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
120  MCContext &Ctx = AP.OutContext;
121
122  if (CV->isNullValue() || isa<UndefValue>(CV))
123    return MCConstantExpr::Create(0, Ctx);
124
125  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
126    return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
127
128  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
129    return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
130
131  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
132    return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
133
134  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
135  if (!CE)
136    llvm_unreachable("Unknown constant value to lower!");
137
138  switch (CE->getOpcode()) {
139  default:
140    // If the code isn't optimized, there may be outstanding folding
141    // opportunities. Attempt to fold the expression using DataLayout as a
142    // last resort before giving up.
143    if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
144      if (C != CE)
145        return LowerConstant(C, AP);
146
147    // Otherwise report the problem to the user.
148    {
149      std::string S;
150      raw_string_ostream OS(S);
151      OS << "Unsupported expression in static initializer: ";
152      CE->printAsOperand(OS, /*PrintType=*/ false,
153                         !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
154      report_fatal_error(OS.str());
155    }
156  case Instruction::AddrSpaceCast: {
157    // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be
158    // handled by the generic() logic in the MCExpr printer
159    PointerType *DstTy            = cast<PointerType>(CE->getType());
160    PointerType *SrcTy            = cast<PointerType>(CE->getOperand(0)->getType());
161    if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) {
162      return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP);
163    }
164    std::string S;
165    raw_string_ostream OS(S);
166    OS << "Unsupported expression in static initializer: ";
167    CE->printAsOperand(OS, /*PrintType=*/ false,
168                       !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
169    report_fatal_error(OS.str());
170  }
171  case Instruction::GetElementPtr: {
172    const DataLayout &TD = *AP.TM.getDataLayout();
173    // Generate a symbolic expression for the byte address
174    APInt OffsetAI(TD.getPointerSizeInBits(), 0);
175    cast<GEPOperator>(CE)->accumulateConstantOffset(TD, OffsetAI);
176
177    const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
178    if (!OffsetAI)
179      return Base;
180
181    int64_t Offset = OffsetAI.getSExtValue();
182    return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
183                                   Ctx);
184  }
185
186  case Instruction::Trunc:
187    // We emit the value and depend on the assembler to truncate the generated
188    // expression properly.  This is important for differences between
189    // blockaddress labels.  Since the two labels are in the same function, it
190    // is reasonable to treat their delta as a 32-bit value.
191  // FALL THROUGH.
192  case Instruction::BitCast:
193    return LowerConstant(CE->getOperand(0), AP);
194
195  case Instruction::IntToPtr: {
196    const DataLayout &TD = *AP.TM.getDataLayout();
197    // Handle casts to pointers by changing them into casts to the appropriate
198    // integer type.  This promotes constant folding and simplifies this code.
199    Constant *Op = CE->getOperand(0);
200    Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
201                                      false /*ZExt*/);
202    return LowerConstant(Op, AP);
203  }
204
205  case Instruction::PtrToInt: {
206    const DataLayout &TD = *AP.TM.getDataLayout();
207    // Support only foldable casts to/from pointers that can be eliminated by
208    // changing the pointer to the appropriately sized integer type.
209    Constant *Op = CE->getOperand(0);
210    Type *Ty = CE->getType();
211
212    const MCExpr *OpExpr = LowerConstant(Op, AP);
213
214    // We can emit the pointer value into this slot if the slot is an
215    // integer slot equal to the size of the pointer.
216    if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
217      return OpExpr;
218
219    // Otherwise the pointer is smaller than the resultant integer, mask off
220    // the high bits so we are sure to get a proper truncation if the input is
221    // a constant expr.
222    unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
223    const MCExpr *MaskExpr =
224        MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
225    return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
226  }
227
228    // The MC library also has a right-shift operator, but it isn't consistently
229  // signed or unsigned between different targets.
230  case Instruction::Add:
231  case Instruction::Sub:
232  case Instruction::Mul:
233  case Instruction::SDiv:
234  case Instruction::SRem:
235  case Instruction::Shl:
236  case Instruction::And:
237  case Instruction::Or:
238  case Instruction::Xor: {
239    const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
240    const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
241    switch (CE->getOpcode()) {
242    default:
243      llvm_unreachable("Unknown binary operator constant cast expr");
244    case Instruction::Add:
245      return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
246    case Instruction::Sub:
247      return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
248    case Instruction::Mul:
249      return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
250    case Instruction::SDiv:
251      return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
252    case Instruction::SRem:
253      return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
254    case Instruction::Shl:
255      return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
256    case Instruction::And:
257      return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
258    case Instruction::Or:
259      return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
260    case Instruction::Xor:
261      return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
262    }
263  }
264  }
265}
266
267void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
268  if (!EmitLineNumbers)
269    return;
270  if (ignoreLoc(MI))
271    return;
272
273  DebugLoc curLoc = MI.getDebugLoc();
274
275  if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
276    return;
277
278  if (prevDebugLoc == curLoc)
279    return;
280
281  prevDebugLoc = curLoc;
282
283  if (curLoc.isUnknown())
284    return;
285
286  const MachineFunction *MF = MI.getParent()->getParent();
287  //const TargetMachine &TM = MF->getTarget();
288
289  const LLVMContext &ctx = MF->getFunction()->getContext();
290  DIScope Scope(curLoc.getScope(ctx));
291
292  assert((!Scope || Scope.isScope()) &&
293    "Scope of a DebugLoc should be null or a DIScope.");
294  if (!Scope)
295     return;
296
297  StringRef fileName(Scope.getFilename());
298  StringRef dirName(Scope.getDirectory());
299  SmallString<128> FullPathName = dirName;
300  if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
301    sys::path::append(FullPathName, fileName);
302    fileName = FullPathName.str();
303  }
304
305  if (filenameMap.find(fileName.str()) == filenameMap.end())
306    return;
307
308  // Emit the line from the source file.
309  if (InterleaveSrc)
310    this->emitSrcInText(fileName.str(), curLoc.getLine());
311
312  std::stringstream temp;
313  temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
314       << " " << curLoc.getCol();
315  OutStreamer.EmitRawText(Twine(temp.str().c_str()));
316}
317
318void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
319  SmallString<128> Str;
320  raw_svector_ostream OS(Str);
321  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
322    emitLineNumberAsDotLoc(*MI);
323
324  MCInst Inst;
325  lowerToMCInst(MI, Inst);
326  EmitToStreamer(OutStreamer, Inst);
327}
328
329// Handle symbol backtracking for targets that do not support image handles
330bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
331                                           unsigned OpNo, MCOperand &MCOp) {
332  const MachineOperand &MO = MI->getOperand(OpNo);
333
334  switch (MI->getOpcode()) {
335  default: return false;
336  case NVPTX::TEX_1D_F32_I32:
337  case NVPTX::TEX_1D_F32_F32:
338  case NVPTX::TEX_1D_F32_F32_LEVEL:
339  case NVPTX::TEX_1D_F32_F32_GRAD:
340  case NVPTX::TEX_1D_I32_I32:
341  case NVPTX::TEX_1D_I32_F32:
342  case NVPTX::TEX_1D_I32_F32_LEVEL:
343  case NVPTX::TEX_1D_I32_F32_GRAD:
344  case NVPTX::TEX_1D_ARRAY_F32_I32:
345  case NVPTX::TEX_1D_ARRAY_F32_F32:
346  case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
347  case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
348  case NVPTX::TEX_1D_ARRAY_I32_I32:
349  case NVPTX::TEX_1D_ARRAY_I32_F32:
350  case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
351  case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
352  case NVPTX::TEX_2D_F32_I32:
353  case NVPTX::TEX_2D_F32_F32:
354  case NVPTX::TEX_2D_F32_F32_LEVEL:
355  case NVPTX::TEX_2D_F32_F32_GRAD:
356  case NVPTX::TEX_2D_I32_I32:
357  case NVPTX::TEX_2D_I32_F32:
358  case NVPTX::TEX_2D_I32_F32_LEVEL:
359  case NVPTX::TEX_2D_I32_F32_GRAD:
360  case NVPTX::TEX_2D_ARRAY_F32_I32:
361  case NVPTX::TEX_2D_ARRAY_F32_F32:
362  case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
363  case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
364  case NVPTX::TEX_2D_ARRAY_I32_I32:
365  case NVPTX::TEX_2D_ARRAY_I32_F32:
366  case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
367  case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
368  case NVPTX::TEX_3D_F32_I32:
369  case NVPTX::TEX_3D_F32_F32:
370  case NVPTX::TEX_3D_F32_F32_LEVEL:
371  case NVPTX::TEX_3D_F32_F32_GRAD:
372  case NVPTX::TEX_3D_I32_I32:
373  case NVPTX::TEX_3D_I32_F32:
374  case NVPTX::TEX_3D_I32_F32_LEVEL:
375  case NVPTX::TEX_3D_I32_F32_GRAD:
376   {
377    // This is a texture fetch, so operand 4 is a texref and operand 5 is
378    // a samplerref
379    if (OpNo == 4) {
380      lowerImageHandleSymbol(MO.getImm(), MCOp);
381      return true;
382    }
383    if (OpNo == 5) {
384      lowerImageHandleSymbol(MO.getImm(), MCOp);
385      return true;
386    }
387
388    return false;
389  }
390  case NVPTX::SULD_1D_I8_TRAP:
391  case NVPTX::SULD_1D_I16_TRAP:
392  case NVPTX::SULD_1D_I32_TRAP:
393  case NVPTX::SULD_1D_ARRAY_I8_TRAP:
394  case NVPTX::SULD_1D_ARRAY_I16_TRAP:
395  case NVPTX::SULD_1D_ARRAY_I32_TRAP:
396  case NVPTX::SULD_2D_I8_TRAP:
397  case NVPTX::SULD_2D_I16_TRAP:
398  case NVPTX::SULD_2D_I32_TRAP:
399  case NVPTX::SULD_2D_ARRAY_I8_TRAP:
400  case NVPTX::SULD_2D_ARRAY_I16_TRAP:
401  case NVPTX::SULD_2D_ARRAY_I32_TRAP:
402  case NVPTX::SULD_3D_I8_TRAP:
403  case NVPTX::SULD_3D_I16_TRAP:
404  case NVPTX::SULD_3D_I32_TRAP: {
405    // This is a V1 surface load, so operand 1 is a surfref
406    if (OpNo == 1) {
407      lowerImageHandleSymbol(MO.getImm(), MCOp);
408      return true;
409    }
410
411    return false;
412  }
413  case NVPTX::SULD_1D_V2I8_TRAP:
414  case NVPTX::SULD_1D_V2I16_TRAP:
415  case NVPTX::SULD_1D_V2I32_TRAP:
416  case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
417  case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
418  case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
419  case NVPTX::SULD_2D_V2I8_TRAP:
420  case NVPTX::SULD_2D_V2I16_TRAP:
421  case NVPTX::SULD_2D_V2I32_TRAP:
422  case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
423  case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
424  case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
425  case NVPTX::SULD_3D_V2I8_TRAP:
426  case NVPTX::SULD_3D_V2I16_TRAP:
427  case NVPTX::SULD_3D_V2I32_TRAP: {
428    // This is a V2 surface load, so operand 2 is a surfref
429    if (OpNo == 2) {
430      lowerImageHandleSymbol(MO.getImm(), MCOp);
431      return true;
432    }
433
434    return false;
435  }
436  case NVPTX::SULD_1D_V4I8_TRAP:
437  case NVPTX::SULD_1D_V4I16_TRAP:
438  case NVPTX::SULD_1D_V4I32_TRAP:
439  case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
440  case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
441  case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
442  case NVPTX::SULD_2D_V4I8_TRAP:
443  case NVPTX::SULD_2D_V4I16_TRAP:
444  case NVPTX::SULD_2D_V4I32_TRAP:
445  case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
446  case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
447  case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
448  case NVPTX::SULD_3D_V4I8_TRAP:
449  case NVPTX::SULD_3D_V4I16_TRAP:
450  case NVPTX::SULD_3D_V4I32_TRAP: {
451    // This is a V4 surface load, so operand 4 is a surfref
452    if (OpNo == 4) {
453      lowerImageHandleSymbol(MO.getImm(), MCOp);
454      return true;
455    }
456
457    return false;
458  }
459  case NVPTX::SUST_B_1D_B8_TRAP:
460  case NVPTX::SUST_B_1D_B16_TRAP:
461  case NVPTX::SUST_B_1D_B32_TRAP:
462  case NVPTX::SUST_B_1D_V2B8_TRAP:
463  case NVPTX::SUST_B_1D_V2B16_TRAP:
464  case NVPTX::SUST_B_1D_V2B32_TRAP:
465  case NVPTX::SUST_B_1D_V4B8_TRAP:
466  case NVPTX::SUST_B_1D_V4B16_TRAP:
467  case NVPTX::SUST_B_1D_V4B32_TRAP:
468  case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
469  case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
470  case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
471  case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
472  case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
473  case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
474  case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
475  case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
476  case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
477  case NVPTX::SUST_B_2D_B8_TRAP:
478  case NVPTX::SUST_B_2D_B16_TRAP:
479  case NVPTX::SUST_B_2D_B32_TRAP:
480  case NVPTX::SUST_B_2D_V2B8_TRAP:
481  case NVPTX::SUST_B_2D_V2B16_TRAP:
482  case NVPTX::SUST_B_2D_V2B32_TRAP:
483  case NVPTX::SUST_B_2D_V4B8_TRAP:
484  case NVPTX::SUST_B_2D_V4B16_TRAP:
485  case NVPTX::SUST_B_2D_V4B32_TRAP:
486  case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
487  case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
488  case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
489  case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
490  case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
491  case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
492  case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
493  case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
494  case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
495  case NVPTX::SUST_B_3D_B8_TRAP:
496  case NVPTX::SUST_B_3D_B16_TRAP:
497  case NVPTX::SUST_B_3D_B32_TRAP:
498  case NVPTX::SUST_B_3D_V2B8_TRAP:
499  case NVPTX::SUST_B_3D_V2B16_TRAP:
500  case NVPTX::SUST_B_3D_V2B32_TRAP:
501  case NVPTX::SUST_B_3D_V4B8_TRAP:
502  case NVPTX::SUST_B_3D_V4B16_TRAP:
503  case NVPTX::SUST_B_3D_V4B32_TRAP:
504  case NVPTX::SUST_P_1D_B8_TRAP:
505  case NVPTX::SUST_P_1D_B16_TRAP:
506  case NVPTX::SUST_P_1D_B32_TRAP:
507  case NVPTX::SUST_P_1D_V2B8_TRAP:
508  case NVPTX::SUST_P_1D_V2B16_TRAP:
509  case NVPTX::SUST_P_1D_V2B32_TRAP:
510  case NVPTX::SUST_P_1D_V4B8_TRAP:
511  case NVPTX::SUST_P_1D_V4B16_TRAP:
512  case NVPTX::SUST_P_1D_V4B32_TRAP:
513  case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
514  case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
515  case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
516  case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
517  case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
518  case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
519  case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
520  case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
521  case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
522  case NVPTX::SUST_P_2D_B8_TRAP:
523  case NVPTX::SUST_P_2D_B16_TRAP:
524  case NVPTX::SUST_P_2D_B32_TRAP:
525  case NVPTX::SUST_P_2D_V2B8_TRAP:
526  case NVPTX::SUST_P_2D_V2B16_TRAP:
527  case NVPTX::SUST_P_2D_V2B32_TRAP:
528  case NVPTX::SUST_P_2D_V4B8_TRAP:
529  case NVPTX::SUST_P_2D_V4B16_TRAP:
530  case NVPTX::SUST_P_2D_V4B32_TRAP:
531  case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
532  case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
533  case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
534  case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
535  case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
536  case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
537  case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
538  case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
539  case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
540  case NVPTX::SUST_P_3D_B8_TRAP:
541  case NVPTX::SUST_P_3D_B16_TRAP:
542  case NVPTX::SUST_P_3D_B32_TRAP:
543  case NVPTX::SUST_P_3D_V2B8_TRAP:
544  case NVPTX::SUST_P_3D_V2B16_TRAP:
545  case NVPTX::SUST_P_3D_V2B32_TRAP:
546  case NVPTX::SUST_P_3D_V4B8_TRAP:
547  case NVPTX::SUST_P_3D_V4B16_TRAP:
548  case NVPTX::SUST_P_3D_V4B32_TRAP: {
549    // This is a surface store, so operand 0 is a surfref
550    if (OpNo == 0) {
551      lowerImageHandleSymbol(MO.getImm(), MCOp);
552      return true;
553    }
554
555    return false;
556  }
557  case NVPTX::TXQ_CHANNEL_ORDER:
558  case NVPTX::TXQ_CHANNEL_DATA_TYPE:
559  case NVPTX::TXQ_WIDTH:
560  case NVPTX::TXQ_HEIGHT:
561  case NVPTX::TXQ_DEPTH:
562  case NVPTX::TXQ_ARRAY_SIZE:
563  case NVPTX::TXQ_NUM_SAMPLES:
564  case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
565  case NVPTX::SUQ_CHANNEL_ORDER:
566  case NVPTX::SUQ_CHANNEL_DATA_TYPE:
567  case NVPTX::SUQ_WIDTH:
568  case NVPTX::SUQ_HEIGHT:
569  case NVPTX::SUQ_DEPTH:
570  case NVPTX::SUQ_ARRAY_SIZE: {
571    // This is a query, so operand 1 is a surfref/texref
572    if (OpNo == 1) {
573      lowerImageHandleSymbol(MO.getImm(), MCOp);
574      return true;
575    }
576
577    return false;
578  }
579  }
580}
581
582void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
583  // Ewwww
584  TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
585  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
586  const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
587  const char *Sym = MFI->getImageHandleSymbol(Index);
588  std::string *SymNamePtr =
589    nvTM.getManagedStrPool()->getManagedString(Sym);
590  MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol(
591    StringRef(SymNamePtr->c_str())));
592}
593
594void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
595  OutMI.setOpcode(MI->getOpcode());
596  const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>();
597
598  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
599  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
600    const MachineOperand &MO = MI->getOperand(0);
601    OutMI.addOperand(GetSymbolRef(
602      OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
603    return;
604  }
605
606  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
607    const MachineOperand &MO = MI->getOperand(i);
608
609    MCOperand MCOp;
610    if (!ST.hasImageHandles()) {
611      if (lowerImageHandleOperand(MI, i, MCOp)) {
612        OutMI.addOperand(MCOp);
613        continue;
614      }
615    }
616
617    if (lowerOperand(MO, MCOp))
618      OutMI.addOperand(MCOp);
619  }
620}
621
622bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
623                                   MCOperand &MCOp) {
624  switch (MO.getType()) {
625  default: llvm_unreachable("unknown operand type");
626  case MachineOperand::MO_Register:
627    MCOp = MCOperand::CreateReg(encodeVirtualRegister(MO.getReg()));
628    break;
629  case MachineOperand::MO_Immediate:
630    MCOp = MCOperand::CreateImm(MO.getImm());
631    break;
632  case MachineOperand::MO_MachineBasicBlock:
633    MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
634        MO.getMBB()->getSymbol(), OutContext));
635    break;
636  case MachineOperand::MO_ExternalSymbol:
637    MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
638    break;
639  case MachineOperand::MO_GlobalAddress:
640    MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
641    break;
642  case MachineOperand::MO_FPImmediate: {
643    const ConstantFP *Cnt = MO.getFPImm();
644    APFloat Val = Cnt->getValueAPF();
645
646    switch (Cnt->getType()->getTypeID()) {
647    default: report_fatal_error("Unsupported FP type"); break;
648    case Type::FloatTyID:
649      MCOp = MCOperand::CreateExpr(
650        NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext));
651      break;
652    case Type::DoubleTyID:
653      MCOp = MCOperand::CreateExpr(
654        NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext));
655      break;
656    }
657    break;
658  }
659  }
660  return true;
661}
662
663unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
664  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
665    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
666
667    DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
668    unsigned RegNum = RegMap[Reg];
669
670    // Encode the register class in the upper 4 bits
671    // Must be kept in sync with NVPTXInstPrinter::printRegName
672    unsigned Ret = 0;
673    if (RC == &NVPTX::Int1RegsRegClass) {
674      Ret = (1 << 28);
675    } else if (RC == &NVPTX::Int16RegsRegClass) {
676      Ret = (2 << 28);
677    } else if (RC == &NVPTX::Int32RegsRegClass) {
678      Ret = (3 << 28);
679    } else if (RC == &NVPTX::Int64RegsRegClass) {
680      Ret = (4 << 28);
681    } else if (RC == &NVPTX::Float32RegsRegClass) {
682      Ret = (5 << 28);
683    } else if (RC == &NVPTX::Float64RegsRegClass) {
684      Ret = (6 << 28);
685    } else {
686      report_fatal_error("Bad register class");
687    }
688
689    // Insert the vreg number
690    Ret |= (RegNum & 0x0FFFFFFF);
691    return Ret;
692  } else {
693    // Some special-use registers are actually physical registers.
694    // Encode this as the register class ID of 0 and the real register ID.
695    return Reg & 0x0FFFFFFF;
696  }
697}
698
699MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
700  const MCExpr *Expr;
701  Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
702                                 OutContext);
703  return MCOperand::CreateExpr(Expr);
704}
705
706void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
707  const DataLayout *TD = TM.getDataLayout();
708  const TargetLowering *TLI = TM.getTargetLowering();
709
710  Type *Ty = F->getReturnType();
711
712  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
713
714  if (Ty->getTypeID() == Type::VoidTyID)
715    return;
716
717  O << " (";
718
719  if (isABI) {
720    if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
721      unsigned size = 0;
722      if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
723        size = ITy->getBitWidth();
724        if (size < 32)
725          size = 32;
726      } else {
727        assert(Ty->isFloatingPointTy() && "Floating point type expected here");
728        size = Ty->getPrimitiveSizeInBits();
729      }
730
731      O << ".param .b" << size << " func_retval0";
732    } else if (isa<PointerType>(Ty)) {
733      O << ".param .b" << TLI->getPointerTy().getSizeInBits()
734        << " func_retval0";
735    } else {
736      if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
737        unsigned totalsz = TD->getTypeAllocSize(Ty);
738        unsigned retAlignment = 0;
739        if (!llvm::getAlign(*F, 0, retAlignment))
740          retAlignment = TD->getABITypeAlignment(Ty);
741        O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
742          << "]";
743      } else
744        assert(false && "Unknown return type");
745    }
746  } else {
747    SmallVector<EVT, 16> vtparts;
748    ComputeValueVTs(*TLI, Ty, vtparts);
749    unsigned idx = 0;
750    for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
751      unsigned elems = 1;
752      EVT elemtype = vtparts[i];
753      if (vtparts[i].isVector()) {
754        elems = vtparts[i].getVectorNumElements();
755        elemtype = vtparts[i].getVectorElementType();
756      }
757
758      for (unsigned j = 0, je = elems; j != je; ++j) {
759        unsigned sz = elemtype.getSizeInBits();
760        if (elemtype.isInteger() && (sz < 32))
761          sz = 32;
762        O << ".reg .b" << sz << " func_retval" << idx;
763        if (j < je - 1)
764          O << ", ";
765        ++idx;
766      }
767      if (i < e - 1)
768        O << ", ";
769    }
770  }
771  O << ") ";
772  return;
773}
774
775void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
776                                        raw_ostream &O) {
777  const Function *F = MF.getFunction();
778  printReturnValStr(F, O);
779}
780
781void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
782  SmallString<128> Str;
783  raw_svector_ostream O(Str);
784
785  if (!GlobalsEmitted) {
786    emitGlobals(*MF->getFunction()->getParent());
787    GlobalsEmitted = true;
788  }
789
790  // Set up
791  MRI = &MF->getRegInfo();
792  F = MF->getFunction();
793  emitLinkageDirective(F, O);
794  if (llvm::isKernelFunction(*F))
795    O << ".entry ";
796  else {
797    O << ".func ";
798    printReturnValStr(*MF, O);
799  }
800
801  O << *CurrentFnSym;
802
803  emitFunctionParamList(*MF, O);
804
805  if (llvm::isKernelFunction(*F))
806    emitKernelFunctionDirectives(*F, O);
807
808  OutStreamer.EmitRawText(O.str());
809
810  prevDebugLoc = DebugLoc();
811}
812
813void NVPTXAsmPrinter::EmitFunctionBodyStart() {
814  VRegMapping.clear();
815  OutStreamer.EmitRawText(StringRef("{\n"));
816  setAndEmitFunctionVirtualRegisters(*MF);
817
818  SmallString<128> Str;
819  raw_svector_ostream O(Str);
820  emitDemotedVars(MF->getFunction(), O);
821  OutStreamer.EmitRawText(O.str());
822}
823
824void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
825  OutStreamer.EmitRawText(StringRef("}\n"));
826  VRegMapping.clear();
827}
828
829void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
830  unsigned RegNo = MI->getOperand(0).getReg();
831  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
832  if (TRI->isVirtualRegister(RegNo)) {
833    OutStreamer.AddComment(Twine("implicit-def: ") +
834                           getVirtualRegisterName(RegNo));
835  } else {
836    OutStreamer.AddComment(Twine("implicit-def: ") +
837                           TM.getRegisterInfo()->getName(RegNo));
838  }
839  OutStreamer.AddBlankLine();
840}
841
842void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
843                                                   raw_ostream &O) const {
844  // If the NVVM IR has some of reqntid* specified, then output
845  // the reqntid directive, and set the unspecified ones to 1.
846  // If none of reqntid* is specified, don't output reqntid directive.
847  unsigned reqntidx, reqntidy, reqntidz;
848  bool specified = false;
849  if (llvm::getReqNTIDx(F, reqntidx) == false)
850    reqntidx = 1;
851  else
852    specified = true;
853  if (llvm::getReqNTIDy(F, reqntidy) == false)
854    reqntidy = 1;
855  else
856    specified = true;
857  if (llvm::getReqNTIDz(F, reqntidz) == false)
858    reqntidz = 1;
859  else
860    specified = true;
861
862  if (specified)
863    O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
864      << "\n";
865
866  // If the NVVM IR has some of maxntid* specified, then output
867  // the maxntid directive, and set the unspecified ones to 1.
868  // If none of maxntid* is specified, don't output maxntid directive.
869  unsigned maxntidx, maxntidy, maxntidz;
870  specified = false;
871  if (llvm::getMaxNTIDx(F, maxntidx) == false)
872    maxntidx = 1;
873  else
874    specified = true;
875  if (llvm::getMaxNTIDy(F, maxntidy) == false)
876    maxntidy = 1;
877  else
878    specified = true;
879  if (llvm::getMaxNTIDz(F, maxntidz) == false)
880    maxntidz = 1;
881  else
882    specified = true;
883
884  if (specified)
885    O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
886      << "\n";
887
888  unsigned mincta;
889  if (llvm::getMinCTASm(F, mincta))
890    O << ".minnctapersm " << mincta << "\n";
891}
892
893std::string
894NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
895  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
896
897  std::string Name;
898  raw_string_ostream NameStr(Name);
899
900  VRegRCMap::const_iterator I = VRegMapping.find(RC);
901  assert(I != VRegMapping.end() && "Bad register class");
902  const DenseMap<unsigned, unsigned> &RegMap = I->second;
903
904  VRegMap::const_iterator VI = RegMap.find(Reg);
905  assert(VI != RegMap.end() && "Bad virtual register");
906  unsigned MappedVR = VI->second;
907
908  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
909
910  NameStr.flush();
911  return Name;
912}
913
914void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
915                                          raw_ostream &O) {
916  O << getVirtualRegisterName(vr);
917}
918
919void NVPTXAsmPrinter::printVecModifiedImmediate(
920    const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
921  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
922  int Imm = (int) MO.getImm();
923  if (0 == strcmp(Modifier, "vecelem"))
924    O << "_" << vecelem[Imm];
925  else if (0 == strcmp(Modifier, "vecv4comm1")) {
926    if ((Imm < 0) || (Imm > 3))
927      O << "//";
928  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
929    if ((Imm < 4) || (Imm > 7))
930      O << "//";
931  } else if (0 == strcmp(Modifier, "vecv4pos")) {
932    if (Imm < 0)
933      Imm = 0;
934    O << "_" << vecelem[Imm % 4];
935  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
936    if ((Imm < 0) || (Imm > 1))
937      O << "//";
938  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
939    if ((Imm < 2) || (Imm > 3))
940      O << "//";
941  } else if (0 == strcmp(Modifier, "vecv2pos")) {
942    if (Imm < 0)
943      Imm = 0;
944    O << "_" << vecelem[Imm % 2];
945  } else
946    llvm_unreachable("Unknown Modifier on immediate operand");
947}
948
949
950
951void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
952
953  emitLinkageDirective(F, O);
954  if (llvm::isKernelFunction(*F))
955    O << ".entry ";
956  else
957    O << ".func ";
958  printReturnValStr(F, O);
959  O << *getSymbol(F) << "\n";
960  emitFunctionParamList(F, O);
961  O << ";\n";
962}
963
964static bool usedInGlobalVarDef(const Constant *C) {
965  if (!C)
966    return false;
967
968  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
969    if (GV->getName().str() == "llvm.used")
970      return false;
971    return true;
972  }
973
974  for (const User *U : C->users())
975    if (const Constant *C = dyn_cast<Constant>(U))
976      if (usedInGlobalVarDef(C))
977        return true;
978
979  return false;
980}
981
982static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
983  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
984    if (othergv->getName().str() == "llvm.used")
985      return true;
986  }
987
988  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
989    if (instr->getParent() && instr->getParent()->getParent()) {
990      const Function *curFunc = instr->getParent()->getParent();
991      if (oneFunc && (curFunc != oneFunc))
992        return false;
993      oneFunc = curFunc;
994      return true;
995    } else
996      return false;
997  }
998
999  if (const MDNode *md = dyn_cast<MDNode>(U))
1000    if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
1001                          (md->getName().str() == "llvm.dbg.sp")))
1002      return true;
1003
1004  for (const User *UU : U->users())
1005    if (usedInOneFunc(UU, oneFunc) == false)
1006      return false;
1007
1008  return true;
1009}
1010
1011/* Find out if a global variable can be demoted to local scope.
1012 * Currently, this is valid for CUDA shared variables, which have local
1013 * scope and global lifetime. So the conditions to check are :
1014 * 1. Is the global variable in shared address space?
1015 * 2. Does it have internal linkage?
1016 * 3. Is the global variable referenced only in one function?
1017 */
1018static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
1019  if (gv->hasInternalLinkage() == false)
1020    return false;
1021  const PointerType *Pty = gv->getType();
1022  if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
1023    return false;
1024
1025  const Function *oneFunc = nullptr;
1026
1027  bool flag = usedInOneFunc(gv, oneFunc);
1028  if (flag == false)
1029    return false;
1030  if (!oneFunc)
1031    return false;
1032  f = oneFunc;
1033  return true;
1034}
1035
1036static bool useFuncSeen(const Constant *C,
1037                        llvm::DenseMap<const Function *, bool> &seenMap) {
1038  for (const User *U : C->users()) {
1039    if (const Constant *cu = dyn_cast<Constant>(U)) {
1040      if (useFuncSeen(cu, seenMap))
1041        return true;
1042    } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
1043      const BasicBlock *bb = I->getParent();
1044      if (!bb)
1045        continue;
1046      const Function *caller = bb->getParent();
1047      if (!caller)
1048        continue;
1049      if (seenMap.find(caller) != seenMap.end())
1050        return true;
1051    }
1052  }
1053  return false;
1054}
1055
1056void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
1057  llvm::DenseMap<const Function *, bool> seenMap;
1058  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
1059    const Function *F = FI;
1060
1061    if (F->isDeclaration()) {
1062      if (F->use_empty())
1063        continue;
1064      if (F->getIntrinsicID())
1065        continue;
1066      emitDeclaration(F, O);
1067      continue;
1068    }
1069    for (const User *U : F->users()) {
1070      if (const Constant *C = dyn_cast<Constant>(U)) {
1071        if (usedInGlobalVarDef(C)) {
1072          // The use is in the initialization of a global variable
1073          // that is a function pointer, so print a declaration
1074          // for the original function
1075          emitDeclaration(F, O);
1076          break;
1077        }
1078        // Emit a declaration of this function if the function that
1079        // uses this constant expr has already been seen.
1080        if (useFuncSeen(C, seenMap)) {
1081          emitDeclaration(F, O);
1082          break;
1083        }
1084      }
1085
1086      if (!isa<Instruction>(U))
1087        continue;
1088      const Instruction *instr = cast<Instruction>(U);
1089      const BasicBlock *bb = instr->getParent();
1090      if (!bb)
1091        continue;
1092      const Function *caller = bb->getParent();
1093      if (!caller)
1094        continue;
1095
1096      // If a caller has already been seen, then the caller is
1097      // appearing in the module before the callee. so print out
1098      // a declaration for the callee.
1099      if (seenMap.find(caller) != seenMap.end()) {
1100        emitDeclaration(F, O);
1101        break;
1102      }
1103    }
1104    seenMap[F] = true;
1105  }
1106}
1107
1108void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
1109  DebugInfoFinder DbgFinder;
1110  DbgFinder.processModule(M);
1111
1112  unsigned i = 1;
1113  for (DICompileUnit DIUnit : DbgFinder.compile_units()) {
1114    StringRef Filename(DIUnit.getFilename());
1115    StringRef Dirname(DIUnit.getDirectory());
1116    SmallString<128> FullPathName = Dirname;
1117    if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
1118      sys::path::append(FullPathName, Filename);
1119      Filename = FullPathName.str();
1120    }
1121    if (filenameMap.find(Filename.str()) != filenameMap.end())
1122      continue;
1123    filenameMap[Filename.str()] = i;
1124    OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
1125    ++i;
1126  }
1127
1128  for (DISubprogram SP : DbgFinder.subprograms()) {
1129    StringRef Filename(SP.getFilename());
1130    StringRef Dirname(SP.getDirectory());
1131    SmallString<128> FullPathName = Dirname;
1132    if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
1133      sys::path::append(FullPathName, Filename);
1134      Filename = FullPathName.str();
1135    }
1136    if (filenameMap.find(Filename.str()) != filenameMap.end())
1137      continue;
1138    filenameMap[Filename.str()] = i;
1139    ++i;
1140  }
1141}
1142
1143bool NVPTXAsmPrinter::doInitialization(Module &M) {
1144
1145  SmallString<128> Str1;
1146  raw_svector_ostream OS1(Str1);
1147
1148  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
1149  MMI->AnalyzeModule(M);
1150
1151  // We need to call the parent's one explicitly.
1152  //bool Result = AsmPrinter::doInitialization(M);
1153
1154  // Initialize TargetLoweringObjectFile.
1155  const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
1156      .Initialize(OutContext, TM);
1157
1158  Mang = new Mangler(TM.getDataLayout());
1159
1160  // Emit header before any dwarf directives are emitted below.
1161  emitHeader(M, OS1);
1162  OutStreamer.EmitRawText(OS1.str());
1163
1164  // Already commented out
1165  //bool Result = AsmPrinter::doInitialization(M);
1166
1167  // Emit module-level inline asm if it exists.
1168  if (!M.getModuleInlineAsm().empty()) {
1169    OutStreamer.AddComment("Start of file scope inline assembly");
1170    OutStreamer.AddBlankLine();
1171    OutStreamer.EmitRawText(StringRef(M.getModuleInlineAsm()));
1172    OutStreamer.AddBlankLine();
1173    OutStreamer.AddComment("End of file scope inline assembly");
1174    OutStreamer.AddBlankLine();
1175  }
1176
1177  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
1178    recordAndEmitFilenames(M);
1179
1180  GlobalsEmitted = false;
1181
1182  return false; // success
1183}
1184
1185void NVPTXAsmPrinter::emitGlobals(const Module &M) {
1186  SmallString<128> Str2;
1187  raw_svector_ostream OS2(Str2);
1188
1189  emitDeclarations(M, OS2);
1190
1191  // As ptxas does not support forward references of globals, we need to first
1192  // sort the list of module-level globals in def-use order. We visit each
1193  // global variable in order, and ensure that we emit it *after* its dependent
1194  // globals. We use a little extra memory maintaining both a set and a list to
1195  // have fast searches while maintaining a strict ordering.
1196  SmallVector<const GlobalVariable *, 8> Globals;
1197  DenseSet<const GlobalVariable *> GVVisited;
1198  DenseSet<const GlobalVariable *> GVVisiting;
1199
1200  // Visit each global variable, in order
1201  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
1202       I != E; ++I)
1203    VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
1204
1205  assert(GVVisited.size() == M.getGlobalList().size() &&
1206         "Missed a global variable");
1207  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
1208
1209  // Print out module-level global variables in proper order
1210  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
1211    printModuleLevelGV(Globals[i], OS2);
1212
1213  OS2 << '\n';
1214
1215  OutStreamer.EmitRawText(OS2.str());
1216}
1217
1218void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
1219  O << "//\n";
1220  O << "// Generated by LLVM NVPTX Back-End\n";
1221  O << "//\n";
1222  O << "\n";
1223
1224  unsigned PTXVersion = nvptxSubtarget.getPTXVersion();
1225  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
1226
1227  O << ".target ";
1228  O << nvptxSubtarget.getTargetName();
1229
1230  if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL)
1231    O << ", texmode_independent";
1232  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
1233    if (!nvptxSubtarget.hasDouble())
1234      O << ", map_f64_to_f32";
1235  }
1236
1237  if (MAI->doesSupportDebugInformation())
1238    O << ", debug";
1239
1240  O << "\n";
1241
1242  O << ".address_size ";
1243  if (nvptxSubtarget.is64Bit())
1244    O << "64";
1245  else
1246    O << "32";
1247  O << "\n";
1248
1249  O << "\n";
1250}
1251
1252bool NVPTXAsmPrinter::doFinalization(Module &M) {
1253
1254  // If we did not emit any functions, then the global declarations have not
1255  // yet been emitted.
1256  if (!GlobalsEmitted) {
1257    emitGlobals(M);
1258    GlobalsEmitted = true;
1259  }
1260
1261  // XXX Temproarily remove global variables so that doFinalization() will not
1262  // emit them again (global variables are emitted at beginning).
1263
1264  Module::GlobalListType &global_list = M.getGlobalList();
1265  int i, n = global_list.size();
1266  GlobalVariable **gv_array = new GlobalVariable *[n];
1267
1268  // first, back-up GlobalVariable in gv_array
1269  i = 0;
1270  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
1271       I != E; ++I)
1272    gv_array[i++] = &*I;
1273
1274  // second, empty global_list
1275  while (!global_list.empty())
1276    global_list.remove(global_list.begin());
1277
1278  // call doFinalization
1279  bool ret = AsmPrinter::doFinalization(M);
1280
1281  // now we restore global variables
1282  for (i = 0; i < n; i++)
1283    global_list.insert(global_list.end(), gv_array[i]);
1284
1285  clearAnnotationCache(&M);
1286
1287  delete[] gv_array;
1288  return ret;
1289
1290  //bool Result = AsmPrinter::doFinalization(M);
1291  // Instead of calling the parents doFinalization, we may
1292  // clone parents doFinalization and customize here.
1293  // Currently, we if NVISA out the EmitGlobals() in
1294  // parent's doFinalization, which is too intrusive.
1295  //
1296  // Same for the doInitialization.
1297  //return Result;
1298}
1299
1300// This function emits appropriate linkage directives for
1301// functions and global variables.
1302//
1303// extern function declaration            -> .extern
1304// extern function definition             -> .visible
1305// external global variable with init     -> .visible
1306// external without init                  -> .extern
1307// appending                              -> not allowed, assert.
1308// for any linkage other than
1309// internal, private, linker_private,
1310// linker_private_weak, linker_private_weak_def_auto,
1311// we emit                                -> .weak.
1312
1313void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
1314                                           raw_ostream &O) {
1315  if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
1316    if (V->hasExternalLinkage()) {
1317      if (isa<GlobalVariable>(V)) {
1318        const GlobalVariable *GVar = cast<GlobalVariable>(V);
1319        if (GVar) {
1320          if (GVar->hasInitializer())
1321            O << ".visible ";
1322          else
1323            O << ".extern ";
1324        }
1325      } else if (V->isDeclaration())
1326        O << ".extern ";
1327      else
1328        O << ".visible ";
1329    } else if (V->hasAppendingLinkage()) {
1330      std::string msg;
1331      msg.append("Error: ");
1332      msg.append("Symbol ");
1333      if (V->hasName())
1334        msg.append(V->getName().str());
1335      msg.append("has unsupported appending linkage type");
1336      llvm_unreachable(msg.c_str());
1337    } else if (!V->hasInternalLinkage() &&
1338               !V->hasPrivateLinkage()) {
1339      O << ".weak ";
1340    }
1341  }
1342}
1343
1344void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1345                                         raw_ostream &O,
1346                                         bool processDemoted) {
1347
1348  // Skip meta data
1349  if (GVar->hasSection()) {
1350    if (GVar->getSection() == StringRef("llvm.metadata"))
1351      return;
1352  }
1353
1354  // Skip LLVM intrinsic global variables
1355  if (GVar->getName().startswith("llvm.") ||
1356      GVar->getName().startswith("nvvm."))
1357    return;
1358
1359  const DataLayout *TD = TM.getDataLayout();
1360
1361  // GlobalVariables are always constant pointers themselves.
1362  const PointerType *PTy = GVar->getType();
1363  Type *ETy = PTy->getElementType();
1364
1365  if (GVar->hasExternalLinkage()) {
1366    if (GVar->hasInitializer())
1367      O << ".visible ";
1368    else
1369      O << ".extern ";
1370  } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1371             GVar->hasAvailableExternallyLinkage() ||
1372             GVar->hasCommonLinkage()) {
1373    O << ".weak ";
1374  }
1375
1376  if (llvm::isTexture(*GVar)) {
1377    O << ".global .texref " << llvm::getTextureName(*GVar) << ";\n";
1378    return;
1379  }
1380
1381  if (llvm::isSurface(*GVar)) {
1382    O << ".global .surfref " << llvm::getSurfaceName(*GVar) << ";\n";
1383    return;
1384  }
1385
1386  if (GVar->isDeclaration()) {
1387    // (extern) declarations, no definition or initializer
1388    // Currently the only known declaration is for an automatic __local
1389    // (.shared) promoted to global.
1390    emitPTXGlobalVariable(GVar, O);
1391    O << ";\n";
1392    return;
1393  }
1394
1395  if (llvm::isSampler(*GVar)) {
1396    O << ".global .samplerref " << llvm::getSamplerName(*GVar);
1397
1398    const Constant *Initializer = nullptr;
1399    if (GVar->hasInitializer())
1400      Initializer = GVar->getInitializer();
1401    const ConstantInt *CI = nullptr;
1402    if (Initializer)
1403      CI = dyn_cast<ConstantInt>(Initializer);
1404    if (CI) {
1405      unsigned sample = CI->getZExtValue();
1406
1407      O << " = { ";
1408
1409      for (int i = 0,
1410               addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1411           i < 3; i++) {
1412        O << "addr_mode_" << i << " = ";
1413        switch (addr) {
1414        case 0:
1415          O << "wrap";
1416          break;
1417        case 1:
1418          O << "clamp_to_border";
1419          break;
1420        case 2:
1421          O << "clamp_to_edge";
1422          break;
1423        case 3:
1424          O << "wrap";
1425          break;
1426        case 4:
1427          O << "mirror";
1428          break;
1429        }
1430        O << ", ";
1431      }
1432      O << "filter_mode = ";
1433      switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1434      case 0:
1435        O << "nearest";
1436        break;
1437      case 1:
1438        O << "linear";
1439        break;
1440      case 2:
1441        llvm_unreachable("Anisotropic filtering is not supported");
1442      default:
1443        O << "nearest";
1444        break;
1445      }
1446      if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1447        O << ", force_unnormalized_coords = 1";
1448      }
1449      O << " }";
1450    }
1451
1452    O << ";\n";
1453    return;
1454  }
1455
1456  if (GVar->hasPrivateLinkage()) {
1457
1458    if (!strncmp(GVar->getName().data(), "unrollpragma", 12))
1459      return;
1460
1461    // FIXME - need better way (e.g. Metadata) to avoid generating this global
1462    if (!strncmp(GVar->getName().data(), "filename", 8))
1463      return;
1464    if (GVar->use_empty())
1465      return;
1466  }
1467
1468  const Function *demotedFunc = nullptr;
1469  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1470    O << "// " << GVar->getName().str() << " has been demoted\n";
1471    if (localDecls.find(demotedFunc) != localDecls.end())
1472      localDecls[demotedFunc].push_back(GVar);
1473    else {
1474      std::vector<const GlobalVariable *> temp;
1475      temp.push_back(GVar);
1476      localDecls[demotedFunc] = temp;
1477    }
1478    return;
1479  }
1480
1481  O << ".";
1482  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1483
1484  if (isManaged(*GVar)) {
1485    O << " .attribute(.managed)";
1486  }
1487
1488  if (GVar->getAlignment() == 0)
1489    O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
1490  else
1491    O << " .align " << GVar->getAlignment();
1492
1493  if (ETy->isSingleValueType()) {
1494    O << " .";
1495    // Special case: ABI requires that we use .u8 for predicates
1496    if (ETy->isIntegerTy(1))
1497      O << "u8";
1498    else
1499      O << getPTXFundamentalTypeStr(ETy, false);
1500    O << " ";
1501    O << *getSymbol(GVar);
1502
1503    // Ptx allows variable initilization only for constant and global state
1504    // spaces.
1505    if (GVar->hasInitializer()) {
1506      if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
1507          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) {
1508        const Constant *Initializer = GVar->getInitializer();
1509        // 'undef' is treated as there is no value spefied.
1510        if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1511          O << " = ";
1512          printScalarConstant(Initializer, O);
1513        }
1514      } else {
1515        // The frontend adds zero-initializer to variables that don't have an
1516        // initial value, so skip warning for this case.
1517        if (!GVar->getInitializer()->isNullValue()) {
1518          std::string warnMsg = "initial value of '" + GVar->getName().str() +
1519              "' is not allowed in addrspace(" +
1520              llvm::utostr_32(PTy->getAddressSpace()) + ")";
1521          report_fatal_error(warnMsg.c_str());
1522        }
1523      }
1524    }
1525  } else {
1526    unsigned int ElementSize = 0;
1527
1528    // Although PTX has direct support for struct type and array type and
1529    // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1530    // targets that support these high level field accesses. Structs, arrays
1531    // and vectors are lowered into arrays of bytes.
1532    switch (ETy->getTypeID()) {
1533    case Type::StructTyID:
1534    case Type::ArrayTyID:
1535    case Type::VectorTyID:
1536      ElementSize = TD->getTypeStoreSize(ETy);
1537      // Ptx allows variable initilization only for constant and
1538      // global state spaces.
1539      if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
1540           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
1541          GVar->hasInitializer()) {
1542        const Constant *Initializer = GVar->getInitializer();
1543        if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1544          AggBuffer aggBuffer(ElementSize, O, *this);
1545          bufferAggregateConstant(Initializer, &aggBuffer);
1546          if (aggBuffer.numSymbols) {
1547            if (nvptxSubtarget.is64Bit()) {
1548              O << " .u64 " << *getSymbol(GVar) << "[";
1549              O << ElementSize / 8;
1550            } else {
1551              O << " .u32 " << *getSymbol(GVar) << "[";
1552              O << ElementSize / 4;
1553            }
1554            O << "]";
1555          } else {
1556            O << " .b8 " << *getSymbol(GVar) << "[";
1557            O << ElementSize;
1558            O << "]";
1559          }
1560          O << " = {";
1561          aggBuffer.print();
1562          O << "}";
1563        } else {
1564          O << " .b8 " << *getSymbol(GVar);
1565          if (ElementSize) {
1566            O << "[";
1567            O << ElementSize;
1568            O << "]";
1569          }
1570        }
1571      } else {
1572        O << " .b8 " << *getSymbol(GVar);
1573        if (ElementSize) {
1574          O << "[";
1575          O << ElementSize;
1576          O << "]";
1577        }
1578      }
1579      break;
1580    default:
1581      llvm_unreachable("type not supported yet");
1582    }
1583
1584  }
1585  O << ";\n";
1586}
1587
1588void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1589  if (localDecls.find(f) == localDecls.end())
1590    return;
1591
1592  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1593
1594  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
1595    O << "\t// demoted variable\n\t";
1596    printModuleLevelGV(gvars[i], O, true);
1597  }
1598}
1599
1600void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1601                                          raw_ostream &O) const {
1602  switch (AddressSpace) {
1603  case llvm::ADDRESS_SPACE_LOCAL:
1604    O << "local";
1605    break;
1606  case llvm::ADDRESS_SPACE_GLOBAL:
1607    O << "global";
1608    break;
1609  case llvm::ADDRESS_SPACE_CONST:
1610    O << "const";
1611    break;
1612  case llvm::ADDRESS_SPACE_SHARED:
1613    O << "shared";
1614    break;
1615  default:
1616    report_fatal_error("Bad address space found while emitting PTX");
1617    break;
1618  }
1619}
1620
1621std::string
1622NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
1623  switch (Ty->getTypeID()) {
1624  default:
1625    llvm_unreachable("unexpected type");
1626    break;
1627  case Type::IntegerTyID: {
1628    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1629    if (NumBits == 1)
1630      return "pred";
1631    else if (NumBits <= 64) {
1632      std::string name = "u";
1633      return name + utostr(NumBits);
1634    } else {
1635      llvm_unreachable("Integer too large");
1636      break;
1637    }
1638    break;
1639  }
1640  case Type::FloatTyID:
1641    return "f32";
1642  case Type::DoubleTyID:
1643    return "f64";
1644  case Type::PointerTyID:
1645    if (nvptxSubtarget.is64Bit())
1646      if (useB4PTR)
1647        return "b64";
1648      else
1649        return "u64";
1650    else if (useB4PTR)
1651      return "b32";
1652    else
1653      return "u32";
1654  }
1655  llvm_unreachable("unexpected type");
1656  return nullptr;
1657}
1658
1659void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1660                                            raw_ostream &O) {
1661
1662  const DataLayout *TD = TM.getDataLayout();
1663
1664  // GlobalVariables are always constant pointers themselves.
1665  const PointerType *PTy = GVar->getType();
1666  Type *ETy = PTy->getElementType();
1667
1668  O << ".";
1669  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1670  if (GVar->getAlignment() == 0)
1671    O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
1672  else
1673    O << " .align " << GVar->getAlignment();
1674
1675  if (ETy->isSingleValueType()) {
1676    O << " .";
1677    O << getPTXFundamentalTypeStr(ETy);
1678    O << " ";
1679    O << *getSymbol(GVar);
1680    return;
1681  }
1682
1683  int64_t ElementSize = 0;
1684
1685  // Although PTX has direct support for struct type and array type and LLVM IR
1686  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1687  // support these high level field accesses. Structs and arrays are lowered
1688  // into arrays of bytes.
1689  switch (ETy->getTypeID()) {
1690  case Type::StructTyID:
1691  case Type::ArrayTyID:
1692  case Type::VectorTyID:
1693    ElementSize = TD->getTypeStoreSize(ETy);
1694    O << " .b8 " << *getSymbol(GVar) << "[";
1695    if (ElementSize) {
1696      O << itostr(ElementSize);
1697    }
1698    O << "]";
1699    break;
1700  default:
1701    llvm_unreachable("type not supported yet");
1702  }
1703  return;
1704}
1705
1706static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
1707  if (Ty->isSingleValueType())
1708    return TD->getPrefTypeAlignment(Ty);
1709
1710  const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
1711  if (ATy)
1712    return getOpenCLAlignment(TD, ATy->getElementType());
1713
1714  const VectorType *VTy = dyn_cast<VectorType>(Ty);
1715  if (VTy) {
1716    Type *ETy = VTy->getElementType();
1717    unsigned int numE = VTy->getNumElements();
1718    unsigned int alignE = TD->getPrefTypeAlignment(ETy);
1719    if (numE == 3)
1720      return 4 * alignE;
1721    else
1722      return numE * alignE;
1723  }
1724
1725  const StructType *STy = dyn_cast<StructType>(Ty);
1726  if (STy) {
1727    unsigned int alignStruct = 1;
1728    // Go through each element of the struct and find the
1729    // largest alignment.
1730    for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1731      Type *ETy = STy->getElementType(i);
1732      unsigned int align = getOpenCLAlignment(TD, ETy);
1733      if (align > alignStruct)
1734        alignStruct = align;
1735    }
1736    return alignStruct;
1737  }
1738
1739  const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
1740  if (FTy)
1741    return TD->getPointerPrefAlignment();
1742  return TD->getPrefTypeAlignment(Ty);
1743}
1744
1745void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1746                                     int paramIndex, raw_ostream &O) {
1747  if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
1748      (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
1749    O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
1750  else {
1751    std::string argName = I->getName();
1752    const char *p = argName.c_str();
1753    while (*p) {
1754      if (*p == '.')
1755        O << "_";
1756      else
1757        O << *p;
1758      p++;
1759    }
1760  }
1761}
1762
1763void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
1764  Function::const_arg_iterator I, E;
1765  int i = 0;
1766
1767  if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
1768      (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) {
1769    O << *CurrentFnSym << "_param_" << paramIndex;
1770    return;
1771  }
1772
1773  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
1774    if (i == paramIndex) {
1775      printParamName(I, paramIndex, O);
1776      return;
1777    }
1778  }
1779  llvm_unreachable("paramIndex out of bound");
1780}
1781
1782void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1783  const DataLayout *TD = TM.getDataLayout();
1784  const AttributeSet &PAL = F->getAttributes();
1785  const TargetLowering *TLI = TM.getTargetLowering();
1786  Function::const_arg_iterator I, E;
1787  unsigned paramIndex = 0;
1788  bool first = true;
1789  bool isKernelFunc = llvm::isKernelFunction(*F);
1790  bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
1791  MVT thePointerTy = TLI->getPointerTy();
1792
1793  O << "(\n";
1794
1795  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1796    Type *Ty = I->getType();
1797
1798    if (!first)
1799      O << ",\n";
1800
1801    first = false;
1802
1803    // Handle image/sampler parameters
1804    if (isKernelFunction(*F)) {
1805      if (isSampler(*I) || isImage(*I)) {
1806        if (isImage(*I)) {
1807          std::string sname = I->getName();
1808          if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1809            if (nvptxSubtarget.hasImageHandles())
1810              O << "\t.param .u64 .ptr .surfref ";
1811            else
1812              O << "\t.param .surfref ";
1813            O << *CurrentFnSym << "_param_" << paramIndex;
1814          }
1815          else { // Default image is read_only
1816            if (nvptxSubtarget.hasImageHandles())
1817              O << "\t.param .u64 .ptr .texref ";
1818            else
1819              O << "\t.param .texref ";
1820            O << *CurrentFnSym << "_param_" << paramIndex;
1821          }
1822        } else {
1823          if (nvptxSubtarget.hasImageHandles())
1824            O << "\t.param .u64 .ptr .samplerref ";
1825          else
1826            O << "\t.param .samplerref ";
1827          O << *CurrentFnSym << "_param_" << paramIndex;
1828        }
1829        continue;
1830      }
1831    }
1832
1833    if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
1834      if (Ty->isAggregateType() || Ty->isVectorTy()) {
1835        // Just print .param .align <a> .b8 .param[size];
1836        // <a> = PAL.getparamalignment
1837        // size = typeallocsize of element type
1838        unsigned align = PAL.getParamAlignment(paramIndex + 1);
1839        if (align == 0)
1840          align = TD->getABITypeAlignment(Ty);
1841
1842        unsigned sz = TD->getTypeAllocSize(Ty);
1843        O << "\t.param .align " << align << " .b8 ";
1844        printParamName(I, paramIndex, O);
1845        O << "[" << sz << "]";
1846
1847        continue;
1848      }
1849      // Just a scalar
1850      const PointerType *PTy = dyn_cast<PointerType>(Ty);
1851      if (isKernelFunc) {
1852        if (PTy) {
1853          // Special handling for pointer arguments to kernel
1854          O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1855
1856          if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
1857            Type *ETy = PTy->getElementType();
1858            int addrSpace = PTy->getAddressSpace();
1859            switch (addrSpace) {
1860            default:
1861              O << ".ptr ";
1862              break;
1863            case llvm::ADDRESS_SPACE_CONST:
1864              O << ".ptr .const ";
1865              break;
1866            case llvm::ADDRESS_SPACE_SHARED:
1867              O << ".ptr .shared ";
1868              break;
1869            case llvm::ADDRESS_SPACE_GLOBAL:
1870              O << ".ptr .global ";
1871              break;
1872            }
1873            O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
1874          }
1875          printParamName(I, paramIndex, O);
1876          continue;
1877        }
1878
1879        // non-pointer scalar to kernel func
1880        O << "\t.param .";
1881        // Special case: predicate operands become .u8 types
1882        if (Ty->isIntegerTy(1))
1883          O << "u8";
1884        else
1885          O << getPTXFundamentalTypeStr(Ty);
1886        O << " ";
1887        printParamName(I, paramIndex, O);
1888        continue;
1889      }
1890      // Non-kernel function, just print .param .b<size> for ABI
1891      // and .reg .b<size> for non-ABI
1892      unsigned sz = 0;
1893      if (isa<IntegerType>(Ty)) {
1894        sz = cast<IntegerType>(Ty)->getBitWidth();
1895        if (sz < 32)
1896          sz = 32;
1897      } else if (isa<PointerType>(Ty))
1898        sz = thePointerTy.getSizeInBits();
1899      else
1900        sz = Ty->getPrimitiveSizeInBits();
1901      if (isABI)
1902        O << "\t.param .b" << sz << " ";
1903      else
1904        O << "\t.reg .b" << sz << " ";
1905      printParamName(I, paramIndex, O);
1906      continue;
1907    }
1908
1909    // param has byVal attribute. So should be a pointer
1910    const PointerType *PTy = dyn_cast<PointerType>(Ty);
1911    assert(PTy && "Param with byval attribute should be a pointer type");
1912    Type *ETy = PTy->getElementType();
1913
1914    if (isABI || isKernelFunc) {
1915      // Just print .param .align <a> .b8 .param[size];
1916      // <a> = PAL.getparamalignment
1917      // size = typeallocsize of element type
1918      unsigned align = PAL.getParamAlignment(paramIndex + 1);
1919      if (align == 0)
1920        align = TD->getABITypeAlignment(ETy);
1921
1922      unsigned sz = TD->getTypeAllocSize(ETy);
1923      O << "\t.param .align " << align << " .b8 ";
1924      printParamName(I, paramIndex, O);
1925      O << "[" << sz << "]";
1926      continue;
1927    } else {
1928      // Split the ETy into constituent parts and
1929      // print .param .b<size> <name> for each part.
1930      // Further, if a part is vector, print the above for
1931      // each vector element.
1932      SmallVector<EVT, 16> vtparts;
1933      ComputeValueVTs(*TLI, ETy, vtparts);
1934      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1935        unsigned elems = 1;
1936        EVT elemtype = vtparts[i];
1937        if (vtparts[i].isVector()) {
1938          elems = vtparts[i].getVectorNumElements();
1939          elemtype = vtparts[i].getVectorElementType();
1940        }
1941
1942        for (unsigned j = 0, je = elems; j != je; ++j) {
1943          unsigned sz = elemtype.getSizeInBits();
1944          if (elemtype.isInteger() && (sz < 32))
1945            sz = 32;
1946          O << "\t.reg .b" << sz << " ";
1947          printParamName(I, paramIndex, O);
1948          if (j < je - 1)
1949            O << ",\n";
1950          ++paramIndex;
1951        }
1952        if (i < e - 1)
1953          O << ",\n";
1954      }
1955      --paramIndex;
1956      continue;
1957    }
1958  }
1959
1960  O << "\n)\n";
1961}
1962
1963void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1964                                            raw_ostream &O) {
1965  const Function *F = MF.getFunction();
1966  emitFunctionParamList(F, O);
1967}
1968
1969void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1970    const MachineFunction &MF) {
1971  SmallString<128> Str;
1972  raw_svector_ostream O(Str);
1973
1974  // Map the global virtual register number to a register class specific
1975  // virtual register number starting from 1 with that class.
1976  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
1977  //unsigned numRegClasses = TRI->getNumRegClasses();
1978
1979  // Emit the Fake Stack Object
1980  const MachineFrameInfo *MFI = MF.getFrameInfo();
1981  int NumBytes = (int) MFI->getStackSize();
1982  if (NumBytes) {
1983    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
1984      << getFunctionNumber() << "[" << NumBytes << "];\n";
1985    if (nvptxSubtarget.is64Bit()) {
1986      O << "\t.reg .b64 \t%SP;\n";
1987      O << "\t.reg .b64 \t%SPL;\n";
1988    } else {
1989      O << "\t.reg .b32 \t%SP;\n";
1990      O << "\t.reg .b32 \t%SPL;\n";
1991    }
1992  }
1993
1994  // Go through all virtual registers to establish the mapping between the
1995  // global virtual
1996  // register number and the per class virtual register number.
1997  // We use the per class virtual register number in the ptx output.
1998  unsigned int numVRs = MRI->getNumVirtRegs();
1999  for (unsigned i = 0; i < numVRs; i++) {
2000    unsigned int vr = TRI->index2VirtReg(i);
2001    const TargetRegisterClass *RC = MRI->getRegClass(vr);
2002    DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
2003    int n = regmap.size();
2004    regmap.insert(std::make_pair(vr, n + 1));
2005  }
2006
2007  // Emit register declarations
2008  // @TODO: Extract out the real register usage
2009  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
2010  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
2011  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
2012  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
2013  // O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
2014  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
2015  // O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
2016
2017  // Emit declaration of the virtual registers or 'physical' registers for
2018  // each register class
2019  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
2020    const TargetRegisterClass *RC = TRI->getRegClass(i);
2021    DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
2022    std::string rcname = getNVPTXRegClassName(RC);
2023    std::string rcStr = getNVPTXRegClassStr(RC);
2024    int n = regmap.size();
2025
2026    // Only declare those registers that may be used.
2027    if (n) {
2028       O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
2029         << ">;\n";
2030    }
2031  }
2032
2033  OutStreamer.EmitRawText(O.str());
2034}
2035
2036void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
2037  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
2038  bool ignored;
2039  unsigned int numHex;
2040  const char *lead;
2041
2042  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
2043    numHex = 8;
2044    lead = "0f";
2045    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
2046  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
2047    numHex = 16;
2048    lead = "0d";
2049    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
2050  } else
2051    llvm_unreachable("unsupported fp type");
2052
2053  APInt API = APF.bitcastToAPInt();
2054  std::string hexstr(utohexstr(API.getZExtValue()));
2055  O << lead;
2056  if (hexstr.length() < numHex)
2057    O << std::string(numHex - hexstr.length(), '0');
2058  O << utohexstr(API.getZExtValue());
2059}
2060
2061void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
2062  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
2063    O << CI->getValue();
2064    return;
2065  }
2066  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
2067    printFPConstant(CFP, O);
2068    return;
2069  }
2070  if (isa<ConstantPointerNull>(CPV)) {
2071    O << "0";
2072    return;
2073  }
2074  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
2075    PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
2076    bool IsNonGenericPointer = false;
2077    if (PTy && PTy->getAddressSpace() != 0) {
2078      IsNonGenericPointer = true;
2079    }
2080    if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
2081      O << "generic(";
2082      O << *getSymbol(GVar);
2083      O << ")";
2084    } else {
2085      O << *getSymbol(GVar);
2086    }
2087    return;
2088  }
2089  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
2090    const Value *v = Cexpr->stripPointerCasts();
2091    PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
2092    bool IsNonGenericPointer = false;
2093    if (PTy && PTy->getAddressSpace() != 0) {
2094      IsNonGenericPointer = true;
2095    }
2096    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
2097      if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
2098        O << "generic(";
2099        O << *getSymbol(GVar);
2100        O << ")";
2101      } else {
2102        O << *getSymbol(GVar);
2103      }
2104      return;
2105    } else {
2106      O << *LowerConstant(CPV, *this);
2107      return;
2108    }
2109  }
2110  llvm_unreachable("Not scalar type found in printScalarConstant()");
2111}
2112
2113void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
2114                                   AggBuffer *aggBuffer) {
2115
2116  const DataLayout *TD = TM.getDataLayout();
2117
2118  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
2119    int s = TD->getTypeAllocSize(CPV->getType());
2120    if (s < Bytes)
2121      s = Bytes;
2122    aggBuffer->addZeros(s);
2123    return;
2124  }
2125
2126  unsigned char *ptr;
2127  switch (CPV->getType()->getTypeID()) {
2128
2129  case Type::IntegerTyID: {
2130    const Type *ETy = CPV->getType();
2131    if (ETy == Type::getInt8Ty(CPV->getContext())) {
2132      unsigned char c =
2133          (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
2134      ptr = &c;
2135      aggBuffer->addBytes(ptr, 1, Bytes);
2136    } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
2137      short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
2138      ptr = (unsigned char *)&int16;
2139      aggBuffer->addBytes(ptr, 2, Bytes);
2140    } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
2141      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
2142        int int32 = (int)(constInt->getZExtValue());
2143        ptr = (unsigned char *)&int32;
2144        aggBuffer->addBytes(ptr, 4, Bytes);
2145        break;
2146      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
2147        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
2148                ConstantFoldConstantExpression(Cexpr, TD))) {
2149          int int32 = (int)(constInt->getZExtValue());
2150          ptr = (unsigned char *)&int32;
2151          aggBuffer->addBytes(ptr, 4, Bytes);
2152          break;
2153        }
2154        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
2155          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
2156          aggBuffer->addSymbol(v);
2157          aggBuffer->addZeros(4);
2158          break;
2159        }
2160      }
2161      llvm_unreachable("unsupported integer const type");
2162    } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
2163      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
2164        long long int64 = (long long)(constInt->getZExtValue());
2165        ptr = (unsigned char *)&int64;
2166        aggBuffer->addBytes(ptr, 8, Bytes);
2167        break;
2168      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
2169        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
2170                ConstantFoldConstantExpression(Cexpr, TD))) {
2171          long long int64 = (long long)(constInt->getZExtValue());
2172          ptr = (unsigned char *)&int64;
2173          aggBuffer->addBytes(ptr, 8, Bytes);
2174          break;
2175        }
2176        if (Cexpr->getOpcode() == Instruction::PtrToInt) {
2177          Value *v = Cexpr->getOperand(0)->stripPointerCasts();
2178          aggBuffer->addSymbol(v);
2179          aggBuffer->addZeros(8);
2180          break;
2181        }
2182      }
2183      llvm_unreachable("unsupported integer const type");
2184    } else
2185      llvm_unreachable("unsupported integer const type");
2186    break;
2187  }
2188  case Type::FloatTyID:
2189  case Type::DoubleTyID: {
2190    const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
2191    const Type *Ty = CFP->getType();
2192    if (Ty == Type::getFloatTy(CPV->getContext())) {
2193      float float32 = (float) CFP->getValueAPF().convertToFloat();
2194      ptr = (unsigned char *)&float32;
2195      aggBuffer->addBytes(ptr, 4, Bytes);
2196    } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
2197      double float64 = CFP->getValueAPF().convertToDouble();
2198      ptr = (unsigned char *)&float64;
2199      aggBuffer->addBytes(ptr, 8, Bytes);
2200    } else {
2201      llvm_unreachable("unsupported fp const type");
2202    }
2203    break;
2204  }
2205  case Type::PointerTyID: {
2206    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
2207      aggBuffer->addSymbol(GVar);
2208    } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
2209      const Value *v = Cexpr->stripPointerCasts();
2210      aggBuffer->addSymbol(v);
2211    }
2212    unsigned int s = TD->getTypeAllocSize(CPV->getType());
2213    aggBuffer->addZeros(s);
2214    break;
2215  }
2216
2217  case Type::ArrayTyID:
2218  case Type::VectorTyID:
2219  case Type::StructTyID: {
2220    if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
2221        isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
2222      int ElementSize = TD->getTypeAllocSize(CPV->getType());
2223      bufferAggregateConstant(CPV, aggBuffer);
2224      if (Bytes > ElementSize)
2225        aggBuffer->addZeros(Bytes - ElementSize);
2226    } else if (isa<ConstantAggregateZero>(CPV))
2227      aggBuffer->addZeros(Bytes);
2228    else
2229      llvm_unreachable("Unexpected Constant type");
2230    break;
2231  }
2232
2233  default:
2234    llvm_unreachable("unsupported type");
2235  }
2236}
2237
2238void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
2239                                              AggBuffer *aggBuffer) {
2240  const DataLayout *TD = TM.getDataLayout();
2241  int Bytes;
2242
2243  // Old constants
2244  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
2245    if (CPV->getNumOperands())
2246      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
2247        bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
2248    return;
2249  }
2250
2251  if (const ConstantDataSequential *CDS =
2252          dyn_cast<ConstantDataSequential>(CPV)) {
2253    if (CDS->getNumElements())
2254      for (unsigned i = 0; i < CDS->getNumElements(); ++i)
2255        bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
2256                     aggBuffer);
2257    return;
2258  }
2259
2260  if (isa<ConstantStruct>(CPV)) {
2261    if (CPV->getNumOperands()) {
2262      StructType *ST = cast<StructType>(CPV->getType());
2263      for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
2264        if (i == (e - 1))
2265          Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
2266                  TD->getTypeAllocSize(ST) -
2267                  TD->getStructLayout(ST)->getElementOffset(i);
2268        else
2269          Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
2270                  TD->getStructLayout(ST)->getElementOffset(i);
2271        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
2272      }
2273    }
2274    return;
2275  }
2276  llvm_unreachable("unsupported constant type in printAggregateConstant()");
2277}
2278
2279// buildTypeNameMap - Run through symbol table looking for type names.
2280//
2281
2282bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
2283
2284  std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
2285
2286  if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
2287                                  !PI->second.compare("struct._image2d_t") ||
2288                                  !PI->second.compare("struct._image3d_t")))
2289    return true;
2290
2291  return false;
2292}
2293
2294
2295bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
2296  switch (MI.getOpcode()) {
2297  default:
2298    return false;
2299  case NVPTX::CallArgBeginInst:
2300  case NVPTX::CallArgEndInst0:
2301  case NVPTX::CallArgEndInst1:
2302  case NVPTX::CallArgF32:
2303  case NVPTX::CallArgF64:
2304  case NVPTX::CallArgI16:
2305  case NVPTX::CallArgI32:
2306  case NVPTX::CallArgI32imm:
2307  case NVPTX::CallArgI64:
2308  case NVPTX::CallArgParam:
2309  case NVPTX::CallVoidInst:
2310  case NVPTX::CallVoidInstReg:
2311  case NVPTX::Callseq_End:
2312  case NVPTX::CallVoidInstReg64:
2313  case NVPTX::DeclareParamInst:
2314  case NVPTX::DeclareRetMemInst:
2315  case NVPTX::DeclareRetRegInst:
2316  case NVPTX::DeclareRetScalarInst:
2317  case NVPTX::DeclareScalarParamInst:
2318  case NVPTX::DeclareScalarRegInst:
2319  case NVPTX::StoreParamF32:
2320  case NVPTX::StoreParamF64:
2321  case NVPTX::StoreParamI16:
2322  case NVPTX::StoreParamI32:
2323  case NVPTX::StoreParamI64:
2324  case NVPTX::StoreParamI8:
2325  case NVPTX::StoreRetvalF32:
2326  case NVPTX::StoreRetvalF64:
2327  case NVPTX::StoreRetvalI16:
2328  case NVPTX::StoreRetvalI32:
2329  case NVPTX::StoreRetvalI64:
2330  case NVPTX::StoreRetvalI8:
2331  case NVPTX::LastCallArgF32:
2332  case NVPTX::LastCallArgF64:
2333  case NVPTX::LastCallArgI16:
2334  case NVPTX::LastCallArgI32:
2335  case NVPTX::LastCallArgI32imm:
2336  case NVPTX::LastCallArgI64:
2337  case NVPTX::LastCallArgParam:
2338  case NVPTX::LoadParamMemF32:
2339  case NVPTX::LoadParamMemF64:
2340  case NVPTX::LoadParamMemI16:
2341  case NVPTX::LoadParamMemI32:
2342  case NVPTX::LoadParamMemI64:
2343  case NVPTX::LoadParamMemI8:
2344  case NVPTX::PrototypeInst:
2345  case NVPTX::DBG_VALUE:
2346    return true;
2347  }
2348  return false;
2349}
2350
2351/// PrintAsmOperand - Print out an operand for an inline asm expression.
2352///
2353bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2354                                      unsigned AsmVariant,
2355                                      const char *ExtraCode, raw_ostream &O) {
2356  if (ExtraCode && ExtraCode[0]) {
2357    if (ExtraCode[1] != 0)
2358      return true; // Unknown modifier.
2359
2360    switch (ExtraCode[0]) {
2361    default:
2362      // See if this is a generic print operand
2363      return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
2364    case 'r':
2365      break;
2366    }
2367  }
2368
2369  printOperand(MI, OpNo, O);
2370
2371  return false;
2372}
2373
2374bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
2375    const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
2376    const char *ExtraCode, raw_ostream &O) {
2377  if (ExtraCode && ExtraCode[0])
2378    return true; // Unknown modifier
2379
2380  O << '[';
2381  printMemOperand(MI, OpNo, O);
2382  O << ']';
2383
2384  return false;
2385}
2386
2387void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2388                                   raw_ostream &O, const char *Modifier) {
2389  const MachineOperand &MO = MI->getOperand(opNum);
2390  switch (MO.getType()) {
2391  case MachineOperand::MO_Register:
2392    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
2393      if (MO.getReg() == NVPTX::VRDepot)
2394        O << DEPOTNAME << getFunctionNumber();
2395      else
2396        O << NVPTXInstPrinter::getRegisterName(MO.getReg());
2397    } else {
2398      emitVirtualRegister(MO.getReg(), O);
2399    }
2400    return;
2401
2402  case MachineOperand::MO_Immediate:
2403    if (!Modifier)
2404      O << MO.getImm();
2405    else if (strstr(Modifier, "vec") == Modifier)
2406      printVecModifiedImmediate(MO, Modifier, O);
2407    else
2408      llvm_unreachable(
2409          "Don't know how to handle modifier on immediate operand");
2410    return;
2411
2412  case MachineOperand::MO_FPImmediate:
2413    printFPConstant(MO.getFPImm(), O);
2414    break;
2415
2416  case MachineOperand::MO_GlobalAddress:
2417    O << *getSymbol(MO.getGlobal());
2418    break;
2419
2420  case MachineOperand::MO_MachineBasicBlock:
2421    O << *MO.getMBB()->getSymbol();
2422    return;
2423
2424  default:
2425    llvm_unreachable("Operand type not supported.");
2426  }
2427}
2428
2429void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2430                                      raw_ostream &O, const char *Modifier) {
2431  printOperand(MI, opNum, O);
2432
2433  if (Modifier && !strcmp(Modifier, "add")) {
2434    O << ", ";
2435    printOperand(MI, opNum + 1, O);
2436  } else {
2437    if (MI->getOperand(opNum + 1).isImm() &&
2438        MI->getOperand(opNum + 1).getImm() == 0)
2439      return; // don't print ',0' or '+0'
2440    O << "+";
2441    printOperand(MI, opNum + 1, O);
2442  }
2443}
2444
2445
2446// Force static initialization.
2447extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
2448  RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
2449  RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
2450}
2451
2452void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
2453  std::stringstream temp;
2454  LineReader *reader = this->getReader(filename.str());
2455  temp << "\n//";
2456  temp << filename.str();
2457  temp << ":";
2458  temp << line;
2459  temp << " ";
2460  temp << reader->readLine(line);
2461  temp << "\n";
2462  this->OutStreamer.EmitRawText(Twine(temp.str()));
2463}
2464
2465LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
2466  if (!reader) {
2467    reader = new LineReader(filename);
2468  }
2469
2470  if (reader->fileName() != filename) {
2471    delete reader;
2472    reader = new LineReader(filename);
2473  }
2474
2475  return reader;
2476}
2477
2478std::string LineReader::readLine(unsigned lineNum) {
2479  if (lineNum < theCurLine) {
2480    theCurLine = 0;
2481    fstr.seekg(0, std::ios::beg);
2482  }
2483  while (theCurLine < lineNum) {
2484    fstr.getline(buff, 500);
2485    theCurLine++;
2486  }
2487  return buff;
2488}
2489
2490// Force static initialization.
2491extern "C" void LLVMInitializeNVPTXAsmPrinter() {
2492  RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
2493  RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
2494}
2495