X86AsmPrinter.cpp revision de420aee32ea799eada24afb59857040172c75d1
1//===-- X86/Printer.cpp - Convert X86 LLVM code to Intel assembly ---------===//
2//
3// This file contains a printer that converts from our internal
4// representation of machine-dependent LLVM code to Intel-format
5// assembly language. This printer is the output mechanism used
6// by `llc' and `lli -printmachineinstrs' on X86.
7//
8//===----------------------------------------------------------------------===//
9
10#include "X86.h"
11#include "X86InstrInfo.h"
12#include "llvm/Function.h"
13#include "llvm/Constant.h"
14#include "llvm/Target/TargetMachine.h"
15#include "llvm/CodeGen/MachineFunctionPass.h"
16#include "llvm/CodeGen/MachineConstantPool.h"
17#include "llvm/CodeGen/MachineInstr.h"
18#include "Support/Statistic.h"
19#include "Support/hash_map"
20#include "llvm/Type.h"
21#include "llvm/Constants.h"
22#include "llvm/Assembly/Writer.h"
23#include "llvm/DerivedTypes.h"
24#include "llvm/SlotCalculator.h"
25#include "Support/StringExtras.h"
26#include "llvm/Module.h"
27
28namespace {
29  /// This is properly part of the name mangler; it keeps track of
30  /// which global values have had their names mangled. It is cleared
31  /// at the end of every module by doFinalization().
32  ///
33  std::set<const Value *> MangledGlobals;
34
35  struct Printer : public MachineFunctionPass {
36    /// Output stream on which we're printing assembly code.
37    ///
38    std::ostream &O;
39
40    /// Target machine description which we query for reg. names, data
41    /// layout, etc.
42    ///
43    TargetMachine &TM;
44
45    Printer(std::ostream &o, TargetMachine &tm) : O(o), TM(tm) { }
46
47    /// We name each basic block in a Function with a unique number, so
48    /// that we can consistently refer to them later. This is cleared
49    /// at the beginning of each call to runOnMachineFunction().
50    ///
51    typedef std::map<const Value *, unsigned> ValueMapTy;
52    ValueMapTy NumberForBB;
53
54    /// Cache of mangled name for current function. This is
55    /// recalculated at the beginning of each call to
56    /// runOnMachineFunction().
57    ///
58    std::string CurrentFnName;
59
60    virtual const char *getPassName() const {
61      return "X86 Assembly Printer";
62    }
63
64    void printMachineInstruction(const MachineInstr *MI) const;
65    void printOp(const MachineOperand &MO,
66		 bool elideOffsetKeyword = false) const;
67    void printMemReference(const MachineInstr *MI, unsigned Op) const;
68    void printConstantPool(MachineConstantPool *MCP);
69    bool runOnMachineFunction(MachineFunction &F);
70    std::string ConstantExprToString(const ConstantExpr* CE);
71    std::string valToExprString(const Value* V);
72    bool doInitialization(Module &M);
73    bool doFinalization(Module &M);
74    void PrintZeroBytesToPad(int numBytes);
75    void printConstantValueOnly(const Constant* CV, int numPadBytesAfter = 0);
76    void printSingleConstantValue(const Constant* CV);
77  };
78} // end of anonymous namespace
79
80/// createX86CodePrinterPass - Returns a pass that prints the X86
81/// assembly code for a MachineFunction to the given output stream,
82/// using the given target machine description.  This should work
83/// regardless of whether the function is in SSA form.
84///
85Pass *createX86CodePrinterPass(std::ostream &o, TargetMachine &tm) {
86  return new Printer(o, tm);
87}
88
89/// makeNameProper - We don't want identifier names with ., space, or
90/// - in them, so we mangle these characters into the strings "d_",
91/// "s_", and "D_", respectively.
92///
93static std::string makeNameProper(std::string x) {
94  std::string tmp;
95  for (std::string::iterator sI = x.begin(), sEnd = x.end(); sI != sEnd; sI++)
96    switch (*sI) {
97    case '.': tmp += "d_"; break;
98    case ' ': tmp += "s_"; break;
99    case '-': tmp += "D_"; break;
100    default:  tmp += *sI;
101    }
102  return tmp;
103}
104
105static std::string getValueName(const Value *V) {
106  if (V->hasName()) { // Print out the label if it exists...
107    // Name mangling occurs as follows:
108    // - If V is not a global, mangling always occurs.
109    // - Otherwise, mangling occurs when any of the following are true:
110    //   1) V has internal linkage
111    //   2) V's name would collide if it is not mangled.
112    //
113    if(const GlobalValue* gv = dyn_cast<GlobalValue>(V)) {
114      if(!gv->hasInternalLinkage() && !MangledGlobals.count(gv)) {
115        // No internal linkage, name will not collide -> no mangling.
116        return makeNameProper(gv->getName());
117      }
118    }
119    // Non-global, or global with internal linkage / colliding name -> mangle.
120    return "l" + utostr(V->getType()->getUniqueID()) + "_" +
121      makeNameProper(V->getName());
122  }
123  static int Count = 0;
124  Count++;
125  return "ltmp_" + itostr(Count) + "_" + utostr(V->getType()->getUniqueID());
126}
127
128/// valToExprString - Helper function for ConstantExprToString().
129/// Appends result to argument string S.
130///
131std::string Printer::valToExprString(const Value* V) {
132  std::string S;
133  bool failed = false;
134  if (const Constant* CV = dyn_cast<Constant>(V)) { // symbolic or known
135    if (const ConstantBool *CB = dyn_cast<ConstantBool>(CV))
136      S += std::string(CB == ConstantBool::True ? "1" : "0");
137    else if (const ConstantSInt *CI = dyn_cast<ConstantSInt>(CV))
138      S += itostr(CI->getValue());
139    else if (const ConstantUInt *CI = dyn_cast<ConstantUInt>(CV))
140      S += utostr(CI->getValue());
141    else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
142      S += ftostr(CFP->getValue());
143    else if (isa<ConstantPointerNull>(CV))
144      S += "0";
145    else if (const ConstantPointerRef *CPR = dyn_cast<ConstantPointerRef>(CV))
146      S += valToExprString(CPR->getValue());
147    else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV))
148      S += ConstantExprToString(CE);
149    else
150      failed = true;
151  } else if (const GlobalValue* GV = dyn_cast<GlobalValue>(V)) {
152    S += getValueName(GV);
153  }
154  else
155    failed = true;
156
157  if (failed) {
158    assert(0 && "Cannot convert value to string");
159    S += "<illegal-value>";
160  }
161  return S;
162}
163
164/// ConstantExprToString - Convert a ConstantExpr to an asm expression
165/// and return this as a string.
166///
167std::string Printer::ConstantExprToString(const ConstantExpr* CE) {
168  std::string S;
169  const TargetData &TD = TM.getTargetData();
170  switch(CE->getOpcode()) {
171  case Instruction::GetElementPtr:
172    { // generate a symbolic expression for the byte address
173      const Value* ptrVal = CE->getOperand(0);
174      std::vector<Value*> idxVec(CE->op_begin()+1, CE->op_end());
175      S += "(" + valToExprString(ptrVal) + ") + ("
176	+ utostr(TD.getIndexedOffset(ptrVal->getType(),idxVec)) + ")";
177      break;
178    }
179
180  case Instruction::Cast:
181    // Support only non-converting casts for now, i.e., a no-op.
182    // This assertion is not a complete check.
183    assert(TD.getTypeSize(CE->getType()) ==
184	   TD.getTypeSize(CE->getOperand(0)->getType()));
185    S += "(" + valToExprString(CE->getOperand(0)) + ")";
186    break;
187
188  case Instruction::Add:
189    S += "(" + valToExprString(CE->getOperand(0)) + ") + ("
190      + valToExprString(CE->getOperand(1)) + ")";
191    break;
192
193  default:
194    assert(0 && "Unsupported operator in ConstantExprToString()");
195    break;
196  }
197
198  return S;
199}
200
201/// printSingleConstantValue - Print a single constant value.
202///
203void
204Printer::printSingleConstantValue(const Constant* CV)
205{
206  assert(CV->getType() != Type::VoidTy &&
207         CV->getType() != Type::TypeTy &&
208         CV->getType() != Type::LabelTy &&
209         "Unexpected type for Constant");
210
211  assert((!isa<ConstantArray>(CV) && ! isa<ConstantStruct>(CV))
212         && "Aggregate types should be handled outside this function");
213
214  const Type *type = CV->getType();
215  O << "\t";
216  switch(type->getPrimitiveID())
217    {
218    case Type::BoolTyID: case Type::UByteTyID: case Type::SByteTyID:
219      O << ".byte";
220      break;
221    case Type::UShortTyID: case Type::ShortTyID:
222      O << ".word";
223      break;
224    case Type::UIntTyID: case Type::IntTyID: case Type::PointerTyID:
225      O << ".long";
226      break;
227    case Type::ULongTyID: case Type::LongTyID:
228      O << ".quad";
229      break;
230    case Type::FloatTyID:
231      O << ".long";
232      break;
233    case Type::DoubleTyID:
234      O << ".quad";
235      break;
236    case Type::ArrayTyID:
237      if ((cast<ArrayType>(type)->getElementType() == Type::UByteTy) ||
238	  (cast<ArrayType>(type)->getElementType() == Type::SByteTy))
239	O << ".string";
240      else
241	assert (0 && "Can't handle printing this type of array");
242      break;
243    default:
244      assert (0 && "Can't handle printing this type of thing");
245      break;
246    }
247  O << "\t";
248
249  if (const ConstantExpr* CE = dyn_cast<ConstantExpr>(CV))
250    {
251      // Constant expression built from operators, constants, and
252      // symbolic addrs
253      O << ConstantExprToString(CE) << "\n";
254    }
255  else if (type->isPrimitiveType())
256    {
257      if (type->isFloatingPoint()) {
258	// FP Constants are printed as integer constants to avoid losing
259	// precision...
260	double Val = cast<ConstantFP>(CV)->getValue();
261	if (type == Type::FloatTy) {
262	  float FVal = (float)Val;
263	  char *ProxyPtr = (char*)&FVal;        // Abide by C TBAA rules
264	  O << *(unsigned int*)ProxyPtr;
265	} else if (type == Type::DoubleTy) {
266	  char *ProxyPtr = (char*)&Val;         // Abide by C TBAA rules
267	  O << *(uint64_t*)ProxyPtr;
268	} else {
269	  assert(0 && "Unknown floating point type!");
270	}
271
272	O << "\t# " << type->getDescription() << " value: " << Val << "\n";
273      } else {
274	WriteAsOperand(O, CV, false, false) << "\n";
275      }
276    }
277  else if (const ConstantPointerRef* CPR = dyn_cast<ConstantPointerRef>(CV))
278    {
279      // This is a constant address for a global variable or method.
280      // Use the name of the variable or method as the address value.
281      O << getValueName(CPR->getValue()) << "\n";
282    }
283  else if (isa<ConstantPointerNull>(CV))
284    {
285      // Null pointer value
286      O << "0\n";
287    }
288  else
289    {
290      assert(0 && "Unknown elementary type for constant");
291    }
292}
293
294/// isStringCompatible - Can we treat the specified array as a string?
295/// Only if it is an array of ubytes or non-negative sbytes.
296///
297static bool isStringCompatible(const ConstantArray *CVA) {
298  const Type *ETy = cast<ArrayType>(CVA->getType())->getElementType();
299  if (ETy == Type::UByteTy) return true;
300  if (ETy != Type::SByteTy) return false;
301
302  for (unsigned i = 0; i < CVA->getNumOperands(); ++i)
303    if (cast<ConstantSInt>(CVA->getOperand(i))->getValue() < 0)
304      return false;
305
306  return true;
307}
308
309/// toOctal - Convert the low order bits of X into an octal digit.
310///
311static inline char toOctal(int X) {
312  return (X&7)+'0';
313}
314
315/// getAsCString - Return the specified array as a C compatible
316/// string, only if the predicate isStringCompatible is true.
317///
318static std::string getAsCString(const ConstantArray *CVA) {
319  assert(isStringCompatible(CVA) && "Array is not string compatible!");
320
321  std::string Result;
322  const Type *ETy = cast<ArrayType>(CVA->getType())->getElementType();
323  Result = "\"";
324  for (unsigned i = 0; i < CVA->getNumOperands(); ++i) {
325    unsigned char C = cast<ConstantInt>(CVA->getOperand(i))->getRawValue();
326
327    if (C == '"') {
328      Result += "\\\"";
329    } else if (C == '\\') {
330      Result += "\\\\";
331    } else if (isprint(C)) {
332      Result += C;
333    } else {
334      switch(C) {
335      case '\a': Result += "\\a"; break;
336      case '\b': Result += "\\b"; break;
337      case '\f': Result += "\\f"; break;
338      case '\n': Result += "\\n"; break;
339      case '\r': Result += "\\r"; break;
340      case '\t': Result += "\\t"; break;
341      case '\v': Result += "\\v"; break;
342      default:
343        Result += '\\';
344        Result += toOctal(C >> 6);
345        Result += toOctal(C >> 3);
346        Result += toOctal(C >> 0);
347        break;
348      }
349    }
350  }
351  Result += "\"";
352  return Result;
353}
354
355// Print a constant value or values (it may be an aggregate).
356// Uses printSingleConstantValue() to print each individual value.
357void
358Printer::printConstantValueOnly(const Constant* CV,
359				int numPadBytesAfter /* = 0 */)
360{
361  const ConstantArray *CVA = dyn_cast<ConstantArray>(CV);
362  const TargetData &TD = TM.getTargetData();
363
364  if (CVA && isStringCompatible(CVA))
365    { // print the string alone and return
366      O << "\t" << ".string" << "\t" << getAsCString(CVA) << "\n";
367    }
368  else if (CVA)
369    { // Not a string.  Print the values in successive locations
370      const std::vector<Use> &constValues = CVA->getValues();
371      for (unsigned i=0; i < constValues.size(); i++)
372        printConstantValueOnly(cast<Constant>(constValues[i].get()));
373    }
374  else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
375    { // Print the fields in successive locations. Pad to align if needed!
376      const StructLayout *cvsLayout =
377        TD.getStructLayout(CVS->getType());
378      const std::vector<Use>& constValues = CVS->getValues();
379      unsigned sizeSoFar = 0;
380      for (unsigned i=0, N = constValues.size(); i < N; i++)
381        {
382          const Constant* field = cast<Constant>(constValues[i].get());
383
384          // Check if padding is needed and insert one or more 0s.
385          unsigned fieldSize = TD.getTypeSize(field->getType());
386          int padSize = ((i == N-1? cvsLayout->StructSize
387			  : cvsLayout->MemberOffsets[i+1])
388                         - cvsLayout->MemberOffsets[i]) - fieldSize;
389          sizeSoFar += (fieldSize + padSize);
390
391          // Now print the actual field value
392          printConstantValueOnly(field, padSize);
393        }
394      assert(sizeSoFar == cvsLayout->StructSize &&
395             "Layout of constant struct may be incorrect!");
396    }
397  else
398    printSingleConstantValue(CV);
399
400  if (numPadBytesAfter) {
401    unsigned numBytes = numPadBytesAfter;
402    for ( ; numBytes >= 8; numBytes -= 8)
403      printSingleConstantValue(Constant::getNullValue(Type::ULongTy));
404    if (numBytes >= 4)
405      {
406	printSingleConstantValue(Constant::getNullValue(Type::UIntTy));
407	numBytes -= 4;
408      }
409    while (numBytes--)
410      printSingleConstantValue(Constant::getNullValue(Type::UByteTy));
411  }
412}
413
414/// printConstantPool - Print to the current output stream assembly
415/// representations of the constants in the constant pool MCP. This is
416/// used to print out constants which have been "spilled to memory" by
417/// the code generator.
418///
419void Printer::printConstantPool(MachineConstantPool *MCP){
420  const std::vector<Constant*> &CP = MCP->getConstants();
421  const TargetData &TD = TM.getTargetData();
422
423  if (CP.empty()) return;
424
425  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
426    O << "\t.section .rodata\n";
427    O << "\t.align " << (unsigned)TD.getTypeAlignment(CP[i]->getType())
428      << "\n";
429    O << ".CPI" << CurrentFnName << "_" << i << ":\t\t\t\t\t#"
430      << *CP[i] << "\n";
431    printConstantValueOnly (CP[i]);
432  }
433}
434
435/// runOnMachineFunction - This uses the printMachineInstruction()
436/// method to print assembly for each instruction.
437///
438bool Printer::runOnMachineFunction(MachineFunction &MF) {
439  // BBNumber is used here so that a given Printer will never give two
440  // BBs the same name. (If you have a better way, please let me know!)
441  static unsigned BBNumber = 0;
442
443  // What's my mangled name?
444  CurrentFnName = getValueName(MF.getFunction());
445
446  // Print out constants referenced by the function
447  printConstantPool(MF.getConstantPool());
448
449  // Print out labels for the function.
450  O << "\t.text\n";
451  O << "\t.align 16\n";
452  O << "\t.globl\t" << CurrentFnName << "\n";
453  O << "\t.type\t" << CurrentFnName << ", @function\n";
454  O << CurrentFnName << ":\n";
455
456  // Number each basic block so that we can consistently refer to them
457  // in PC-relative references.
458  NumberForBB.clear();
459  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
460       I != E; ++I) {
461    NumberForBB[I->getBasicBlock()] = BBNumber++;
462  }
463
464  // Print out code for the function.
465  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
466       I != E; ++I) {
467    // Print a label for the basic block.
468    O << ".BB" << NumberForBB[I->getBasicBlock()] << ":\t# "
469      << I->getBasicBlock()->getName() << "\n";
470    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
471	 II != E; ++II) {
472      // Print the assembly for the instruction.
473      O << "\t";
474      printMachineInstruction(*II);
475    }
476  }
477
478  // We didn't modify anything.
479  return false;
480}
481
482static bool isScale(const MachineOperand &MO) {
483  return MO.isImmediate() &&
484    (MO.getImmedValue() == 1 || MO.getImmedValue() == 2 ||
485     MO.getImmedValue() == 4 || MO.getImmedValue() == 8);
486}
487
488static bool isMem(const MachineInstr *MI, unsigned Op) {
489  if (MI->getOperand(Op).isFrameIndex()) return true;
490  if (MI->getOperand(Op).isConstantPoolIndex()) return true;
491  return Op+4 <= MI->getNumOperands() &&
492    MI->getOperand(Op  ).isRegister() &&isScale(MI->getOperand(Op+1)) &&
493    MI->getOperand(Op+2).isRegister() &&MI->getOperand(Op+3).isImmediate();
494}
495
496void Printer::printOp(const MachineOperand &MO,
497		      bool elideOffsetKeyword /* = false */) const {
498  const MRegisterInfo &RI = *TM.getRegisterInfo();
499  switch (MO.getType()) {
500  case MachineOperand::MO_VirtualRegister:
501    if (Value *V = MO.getVRegValueOrNull()) {
502      O << "<" << V->getName() << ">";
503      return;
504    }
505    // FALLTHROUGH
506  case MachineOperand::MO_MachineRegister:
507    if (MO.getReg() < MRegisterInfo::FirstVirtualRegister)
508      O << RI.get(MO.getReg()).Name;
509    else
510      O << "%reg" << MO.getReg();
511    return;
512
513  case MachineOperand::MO_SignExtendedImmed:
514  case MachineOperand::MO_UnextendedImmed:
515    O << (int)MO.getImmedValue();
516    return;
517  case MachineOperand::MO_PCRelativeDisp:
518    {
519      ValueMapTy::const_iterator i = NumberForBB.find(MO.getVRegValue());
520      assert (i != NumberForBB.end()
521	      && "Could not find a BB I previously put in the NumberForBB map!");
522      O << ".BB" << i->second << " # PC rel: " << MO.getVRegValue()->getName();
523    }
524    return;
525  case MachineOperand::MO_GlobalAddress:
526    if (!elideOffsetKeyword) O << "OFFSET "; O << getValueName(MO.getGlobal());
527    return;
528  case MachineOperand::MO_ExternalSymbol:
529    O << MO.getSymbolName();
530    return;
531  default:
532    O << "<unknown operand type>"; return;
533  }
534}
535
536static const std::string sizePtr(const TargetInstrDescriptor &Desc) {
537  switch (Desc.TSFlags & X86II::ArgMask) {
538  default: assert(0 && "Unknown arg size!");
539  case X86II::Arg8:   return "BYTE PTR";
540  case X86II::Arg16:  return "WORD PTR";
541  case X86II::Arg32:  return "DWORD PTR";
542  case X86II::Arg64:  return "QWORD PTR";
543  case X86II::ArgF32:  return "DWORD PTR";
544  case X86II::ArgF64:  return "QWORD PTR";
545  case X86II::ArgF80:  return "XWORD PTR";
546  }
547}
548
549void Printer::printMemReference(const MachineInstr *MI, unsigned Op) const {
550  const MRegisterInfo &RI = *TM.getRegisterInfo();
551  assert(isMem(MI, Op) && "Invalid memory reference!");
552
553  if (MI->getOperand(Op).isFrameIndex()) {
554    O << "[frame slot #" << MI->getOperand(Op).getFrameIndex();
555    if (MI->getOperand(Op+3).getImmedValue())
556      O << " + " << MI->getOperand(Op+3).getImmedValue();
557    O << "]";
558    return;
559  } else if (MI->getOperand(Op).isConstantPoolIndex()) {
560    O << "[.CPI" << CurrentFnName << "_"
561      << MI->getOperand(Op).getConstantPoolIndex();
562    if (MI->getOperand(Op+3).getImmedValue())
563      O << " + " << MI->getOperand(Op+3).getImmedValue();
564    O << "]";
565    return;
566  }
567
568  const MachineOperand &BaseReg  = MI->getOperand(Op);
569  int ScaleVal                   = MI->getOperand(Op+1).getImmedValue();
570  const MachineOperand &IndexReg = MI->getOperand(Op+2);
571  int DispVal                    = MI->getOperand(Op+3).getImmedValue();
572
573  O << "[";
574  bool NeedPlus = false;
575  if (BaseReg.getReg()) {
576    printOp(BaseReg);
577    NeedPlus = true;
578  }
579
580  if (IndexReg.getReg()) {
581    if (NeedPlus) O << " + ";
582    if (ScaleVal != 1)
583      O << ScaleVal << "*";
584    printOp(IndexReg);
585    NeedPlus = true;
586  }
587
588  if (DispVal) {
589    if (NeedPlus)
590      if (DispVal > 0)
591	O << " + ";
592      else {
593	O << " - ";
594	DispVal = -DispVal;
595      }
596    O << DispVal;
597  }
598  O << "]";
599}
600
601/// printMachineInstruction -- Print out a single X86 LLVM instruction
602/// MI in Intel syntax to the current output stream.
603///
604void Printer::printMachineInstruction(const MachineInstr *MI) const {
605  unsigned Opcode = MI->getOpcode();
606  const TargetInstrInfo &TII = TM.getInstrInfo();
607  const TargetInstrDescriptor &Desc = TII.get(Opcode);
608  const MRegisterInfo &RI = *TM.getRegisterInfo();
609
610  switch (Desc.TSFlags & X86II::FormMask) {
611  case X86II::Pseudo:
612    // Print pseudo-instructions as comments; either they should have been
613    // turned into real instructions by now, or they don't need to be
614    // seen by the assembler (e.g., IMPLICIT_USEs.)
615    O << "# ";
616    if (Opcode == X86::PHI) {
617      printOp(MI->getOperand(0));
618      O << " = phi ";
619      for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
620	if (i != 1) O << ", ";
621	O << "[";
622	printOp(MI->getOperand(i));
623	O << ", ";
624	printOp(MI->getOperand(i+1));
625	O << "]";
626      }
627    } else {
628      unsigned i = 0;
629      if (MI->getNumOperands() && (MI->getOperand(0).opIsDefOnly() ||
630                                   MI->getOperand(0).opIsDefAndUse())) {
631	printOp(MI->getOperand(0));
632	O << " = ";
633	++i;
634      }
635      O << TII.getName(MI->getOpcode());
636
637      for (unsigned e = MI->getNumOperands(); i != e; ++i) {
638	O << " ";
639	if (MI->getOperand(i).opIsDefOnly() ||
640            MI->getOperand(i).opIsDefAndUse()) O << "*";
641	printOp(MI->getOperand(i));
642	if (MI->getOperand(i).opIsDefOnly() ||
643            MI->getOperand(i).opIsDefAndUse()) O << "*";
644      }
645    }
646    O << "\n";
647    return;
648
649  case X86II::RawFrm:
650    // The accepted forms of Raw instructions are:
651    //   1. nop     - No operand required
652    //   2. jmp foo - PC relative displacement operand
653    //   3. call bar - GlobalAddress Operand or External Symbol Operand
654    //
655    assert(MI->getNumOperands() == 0 ||
656           (MI->getNumOperands() == 1 &&
657	    (MI->getOperand(0).isPCRelativeDisp() ||
658	     MI->getOperand(0).isGlobalAddress() ||
659	     MI->getOperand(0).isExternalSymbol())) &&
660           "Illegal raw instruction!");
661    O << TII.getName(MI->getOpcode()) << " ";
662
663    if (MI->getNumOperands() == 1) {
664      printOp(MI->getOperand(0), true); // Don't print "OFFSET"...
665    }
666    O << "\n";
667    return;
668
669  case X86II::AddRegFrm: {
670    // There are currently two forms of acceptable AddRegFrm instructions.
671    // Either the instruction JUST takes a single register (like inc, dec, etc),
672    // or it takes a register and an immediate of the same size as the register
673    // (move immediate f.e.).  Note that this immediate value might be stored as
674    // an LLVM value, to represent, for example, loading the address of a global
675    // into a register.  The initial register might be duplicated if this is a
676    // M_2_ADDR_REG instruction
677    //
678    assert(MI->getOperand(0).isRegister() &&
679           (MI->getNumOperands() == 1 ||
680            (MI->getNumOperands() == 2 &&
681             (MI->getOperand(1).getVRegValueOrNull() ||
682              MI->getOperand(1).isImmediate() ||
683	      MI->getOperand(1).isRegister() ||
684	      MI->getOperand(1).isGlobalAddress() ||
685	      MI->getOperand(1).isExternalSymbol()))) &&
686           "Illegal form for AddRegFrm instruction!");
687
688    unsigned Reg = MI->getOperand(0).getReg();
689
690    O << TII.getName(MI->getOpCode()) << " ";
691    printOp(MI->getOperand(0));
692    if (MI->getNumOperands() == 2 &&
693	(!MI->getOperand(1).isRegister() ||
694	 MI->getOperand(1).getVRegValueOrNull() ||
695	 MI->getOperand(1).isGlobalAddress() ||
696	 MI->getOperand(1).isExternalSymbol())) {
697      O << ", ";
698      printOp(MI->getOperand(1));
699    }
700    if (Desc.TSFlags & X86II::PrintImplUses) {
701      for (const unsigned *p = Desc.ImplicitUses; *p; ++p) {
702	O << ", " << RI.get(*p).Name;
703      }
704    }
705    O << "\n";
706    return;
707  }
708  case X86II::MRMDestReg: {
709    // There are two acceptable forms of MRMDestReg instructions, those with 2,
710    // 3 and 4 operands:
711    //
712    // 2 Operands: this is for things like mov that do not read a second input
713    //
714    // 3 Operands: in this form, the first two registers (the destination, and
715    // the first operand) should be the same, post register allocation.  The 3rd
716    // operand is an additional input.  This should be for things like add
717    // instructions.
718    //
719    // 4 Operands: This form is for instructions which are 3 operands forms, but
720    // have a constant argument as well.
721    //
722    bool isTwoAddr = TII.isTwoAddrInstr(Opcode);
723    assert(MI->getOperand(0).isRegister() &&
724           (MI->getNumOperands() == 2 ||
725	    (isTwoAddr && MI->getOperand(1).isRegister() &&
726	     MI->getOperand(0).getReg() == MI->getOperand(1).getReg() &&
727	     (MI->getNumOperands() == 3 ||
728	      (MI->getNumOperands() == 4 && MI->getOperand(3).isImmediate()))))
729           && "Bad format for MRMDestReg!");
730
731    O << TII.getName(MI->getOpCode()) << " ";
732    printOp(MI->getOperand(0));
733    O << ", ";
734    printOp(MI->getOperand(1+isTwoAddr));
735    if (MI->getNumOperands() == 4) {
736      O << ", ";
737      printOp(MI->getOperand(3));
738    }
739    O << "\n";
740    return;
741  }
742
743  case X86II::MRMDestMem: {
744    // These instructions are the same as MRMDestReg, but instead of having a
745    // register reference for the mod/rm field, it's a memory reference.
746    //
747    assert(isMem(MI, 0) && MI->getNumOperands() == 4+1 &&
748           MI->getOperand(4).isRegister() && "Bad format for MRMDestMem!");
749
750    O << TII.getName(MI->getOpCode()) << " " << sizePtr(Desc) << " ";
751    printMemReference(MI, 0);
752    O << ", ";
753    printOp(MI->getOperand(4));
754    O << "\n";
755    return;
756  }
757
758  case X86II::MRMSrcReg: {
759    // There is a two forms that are acceptable for MRMSrcReg instructions,
760    // those with 3 and 2 operands:
761    //
762    // 3 Operands: in this form, the last register (the second input) is the
763    // ModR/M input.  The first two operands should be the same, post register
764    // allocation.  This is for things like: add r32, r/m32
765    //
766    // 2 Operands: this is for things like mov that do not read a second input
767    //
768    assert(MI->getOperand(0).isRegister() &&
769           MI->getOperand(1).isRegister() &&
770           (MI->getNumOperands() == 2 ||
771            (MI->getNumOperands() == 3 && MI->getOperand(2).isRegister()))
772           && "Bad format for MRMSrcReg!");
773    if (MI->getNumOperands() == 3 &&
774        MI->getOperand(0).getReg() != MI->getOperand(1).getReg())
775      O << "**";
776
777    O << TII.getName(MI->getOpCode()) << " ";
778    printOp(MI->getOperand(0));
779    O << ", ";
780    printOp(MI->getOperand(MI->getNumOperands()-1));
781    O << "\n";
782    return;
783  }
784
785  case X86II::MRMSrcMem: {
786    // These instructions are the same as MRMSrcReg, but instead of having a
787    // register reference for the mod/rm field, it's a memory reference.
788    //
789    assert(MI->getOperand(0).isRegister() &&
790           (MI->getNumOperands() == 1+4 && isMem(MI, 1)) ||
791           (MI->getNumOperands() == 2+4 && MI->getOperand(1).isRegister() &&
792            isMem(MI, 2))
793           && "Bad format for MRMDestReg!");
794    if (MI->getNumOperands() == 2+4 &&
795        MI->getOperand(0).getReg() != MI->getOperand(1).getReg())
796      O << "**";
797
798    O << TII.getName(MI->getOpCode()) << " ";
799    printOp(MI->getOperand(0));
800    O << ", " << sizePtr(Desc) << " ";
801    printMemReference(MI, MI->getNumOperands()-4);
802    O << "\n";
803    return;
804  }
805
806  case X86II::MRMS0r: case X86II::MRMS1r:
807  case X86II::MRMS2r: case X86II::MRMS3r:
808  case X86II::MRMS4r: case X86II::MRMS5r:
809  case X86II::MRMS6r: case X86II::MRMS7r: {
810    // In this form, the following are valid formats:
811    //  1. sete r
812    //  2. cmp reg, immediate
813    //  2. shl rdest, rinput  <implicit CL or 1>
814    //  3. sbb rdest, rinput, immediate   [rdest = rinput]
815    //
816    assert(MI->getNumOperands() > 0 && MI->getNumOperands() < 4 &&
817           MI->getOperand(0).isRegister() && "Bad MRMSxR format!");
818    assert((MI->getNumOperands() != 2 ||
819            MI->getOperand(1).isRegister() || MI->getOperand(1).isImmediate())&&
820           "Bad MRMSxR format!");
821    assert((MI->getNumOperands() < 3 ||
822	    (MI->getOperand(1).isRegister() && MI->getOperand(2).isImmediate())) &&
823           "Bad MRMSxR format!");
824
825    if (MI->getNumOperands() > 1 && MI->getOperand(1).isRegister() &&
826        MI->getOperand(0).getReg() != MI->getOperand(1).getReg())
827      O << "**";
828
829    O << TII.getName(MI->getOpCode()) << " ";
830    printOp(MI->getOperand(0));
831    if (MI->getOperand(MI->getNumOperands()-1).isImmediate()) {
832      O << ", ";
833      printOp(MI->getOperand(MI->getNumOperands()-1));
834    }
835    if (Desc.TSFlags & X86II::PrintImplUses) {
836      for (const unsigned *p = Desc.ImplicitUses; *p; ++p) {
837	O << ", " << RI.get(*p).Name;
838      }
839    }
840    O << "\n";
841
842    return;
843  }
844
845  case X86II::MRMS0m: case X86II::MRMS1m:
846  case X86II::MRMS2m: case X86II::MRMS3m:
847  case X86II::MRMS4m: case X86II::MRMS5m:
848  case X86II::MRMS6m: case X86II::MRMS7m: {
849    // In this form, the following are valid formats:
850    //  1. sete [m]
851    //  2. cmp [m], immediate
852    //  2. shl [m], rinput  <implicit CL or 1>
853    //  3. sbb [m], immediate
854    //
855    assert(MI->getNumOperands() >= 4 && MI->getNumOperands() <= 5 &&
856           isMem(MI, 0) && "Bad MRMSxM format!");
857    assert((MI->getNumOperands() != 5 || MI->getOperand(4).isImmediate()) &&
858           "Bad MRMSxM format!");
859    // Bug: The 80-bit FP store-pop instruction "fstp XWORD PTR [...]"
860    // is misassembled by gas in intel_syntax mode as its 32-bit
861    // equivalent "fstp DWORD PTR [...]". Workaround: Output the raw
862    // opcode bytes instead of the instruction.
863    if (MI->getOpCode() == X86::FSTPr80) {
864      if ((MI->getOperand(0).getReg() == X86::ESP)
865	  && (MI->getOperand(1).getImmedValue() == 1)) {
866	int DispVal = MI->getOperand(3).getImmedValue();
867	if ((DispVal < -128) || (DispVal > 127)) { // 4 byte disp.
868          unsigned int val = (unsigned int) DispVal;
869          O << ".byte 0xdb, 0xbc, 0x24\n\t";
870          O << ".long 0x" << std::hex << (unsigned) val << std::dec << "\t# ";
871	} else { // 1 byte disp.
872          unsigned char val = (unsigned char) DispVal;
873          O << ".byte 0xdb, 0x7c, 0x24, 0x" << std::hex << (unsigned) val
874            << std::dec << "\t# ";
875	}
876      }
877    }
878    // Bug: The 80-bit FP load instruction "fld XWORD PTR [...]" is
879    // misassembled by gas in intel_syntax mode as its 32-bit
880    // equivalent "fld DWORD PTR [...]". Workaround: Output the raw
881    // opcode bytes instead of the instruction.
882    if (MI->getOpCode() == X86::FLDr80) {
883      if ((MI->getOperand(0).getReg() == X86::ESP)
884          && (MI->getOperand(1).getImmedValue() == 1)) {
885	int DispVal = MI->getOperand(3).getImmedValue();
886	if ((DispVal < -128) || (DispVal > 127)) { // 4 byte disp.
887          unsigned int val = (unsigned int) DispVal;
888          O << ".byte 0xdb, 0xac, 0x24\n\t";
889          O << ".long 0x" << std::hex << (unsigned) val << std::dec << "\t# ";
890	} else { // 1 byte disp.
891          unsigned char val = (unsigned char) DispVal;
892          O << ".byte 0xdb, 0x6c, 0x24, 0x" << std::hex << (unsigned) val
893            << std::dec << "\t# ";
894	}
895      }
896    }
897    // Bug: gas intel_syntax mode treats "fild QWORD PTR [...]" as an
898    // invalid opcode, saying "64 bit operations are only supported in
899    // 64 bit modes." libopcodes disassembles it as "fild DWORD PTR
900    // [...]", which is wrong. Workaround: Output the raw opcode bytes
901    // instead of the instruction.
902    if (MI->getOpCode() == X86::FILDr64) {
903      if ((MI->getOperand(0).getReg() == X86::ESP)
904          && (MI->getOperand(1).getImmedValue() == 1)) {
905	int DispVal = MI->getOperand(3).getImmedValue();
906	if ((DispVal < -128) || (DispVal > 127)) { // 4 byte disp.
907          unsigned int val = (unsigned int) DispVal;
908          O << ".byte 0xdf, 0xac, 0x24\n\t";
909          O << ".long 0x" << std::hex << (unsigned) val << std::dec << "\t# ";
910	} else { // 1 byte disp.
911          unsigned char val = (unsigned char) DispVal;
912          O << ".byte 0xdf, 0x6c, 0x24, 0x" << std::hex << (unsigned) val
913            << std::dec << "\t# ";
914	}
915      }
916    }
917    // Bug: gas intel_syntax mode treats "fistp QWORD PTR [...]" as
918    // an invalid opcode, saying "64 bit operations are only
919    // supported in 64 bit modes." libopcodes disassembles it as
920    // "fistpll DWORD PTR [...]", which is wrong. Workaround: Output
921    // "fistpll DWORD PTR " instead, which is what libopcodes is
922    // expecting to see.
923    if (MI->getOpCode() == X86::FISTPr64) {
924      O << "fistpll DWORD PTR ";
925      printMemReference(MI, 0);
926      if (MI->getNumOperands() == 5) {
927	O << ", ";
928	printOp(MI->getOperand(4));
929      }
930      O << "\t# ";
931    }
932
933    O << TII.getName(MI->getOpCode()) << " ";
934    O << sizePtr(Desc) << " ";
935    printMemReference(MI, 0);
936    if (MI->getNumOperands() == 5) {
937      O << ", ";
938      printOp(MI->getOperand(4));
939    }
940    O << "\n";
941    return;
942  }
943
944  default:
945    O << "\tUNKNOWN FORM:\t\t-"; MI->print(O, TM); break;
946  }
947}
948
949bool Printer::doInitialization(Module &M)
950{
951  // Tell gas we are outputting Intel syntax (not AT&T syntax) assembly,
952  // with no % decorations on register names.
953  O << "\t.intel_syntax noprefix\n";
954
955  // Ripped from CWriter:
956  // Calculate which global values have names that will collide when we throw
957  // away type information.
958  {  // Scope to delete the FoundNames set when we are done with it...
959    std::set<std::string> FoundNames;
960    for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
961      if (I->hasName())                      // If the global has a name...
962        if (FoundNames.count(I->getName()))  // And the name is already used
963          MangledGlobals.insert(I);          // Mangle the name
964        else
965          FoundNames.insert(I->getName());   // Otherwise, keep track of name
966
967    for (Module::giterator I = M.gbegin(), E = M.gend(); I != E; ++I)
968      if (I->hasName())                      // If the global has a name...
969        if (FoundNames.count(I->getName()))  // And the name is already used
970          MangledGlobals.insert(I);          // Mangle the name
971        else
972          FoundNames.insert(I->getName());   // Otherwise, keep track of name
973  }
974
975  return false; // success
976}
977
978static const Function *isConstantFunctionPointerRef(const Constant *C) {
979  if (const ConstantPointerRef *R = dyn_cast<ConstantPointerRef>(C))
980    if (const Function *F = dyn_cast<Function>(R->getValue()))
981      return F;
982  return 0;
983}
984
985bool Printer::doFinalization(Module &M)
986{
987  const TargetData &TD = TM.getTargetData();
988  // Print out module-level global variables here.
989  for (Module::const_giterator I = M.gbegin(), E = M.gend(); I != E; ++I) {
990    std::string name(getValueName(I));
991    if (I->hasInitializer()) {
992      Constant *C = I->getInitializer();
993      O << "\t.data\n";
994      O << "\t.globl " << name << "\n";
995      O << "\t.type " << name << ",@object\n";
996      O << "\t.size " << name << ","
997	<< (unsigned)TD.getTypeSize(I->getType()) << "\n";
998      O << "\t.align " << (unsigned)TD.getTypeAlignment(C->getType()) << "\n";
999      O << name << ":\t\t\t\t\t#";
1000      // If this is a constant function pointer, we only print out the
1001      // name of the function in the comment (because printing the
1002      // function means calling AsmWriter to print the whole LLVM
1003      // assembly, which would corrupt the X86 assembly output.)
1004      // Otherwise we print out the whole llvm value as a comment.
1005      if (const Function *F = isConstantFunctionPointerRef (C)) {
1006	O << " %" << F->getName() << "()\n";
1007      } else {
1008	O << *C << "\n";
1009      }
1010      printConstantValueOnly (C);
1011    } else {
1012      O << "\t.globl " << name << "\n";
1013      O << "\t.comm " << name << ", "
1014        << (unsigned)TD.getTypeSize(I->getType()) << ", "
1015        << (unsigned)TD.getTypeAlignment(I->getType()) << "\n";
1016    }
1017  }
1018  MangledGlobals.clear();
1019  return false; // success
1020}
1021