X86Disassembler.cpp revision 73f50d9bc3bd46cc0abeba9bb0d46977ba1aea42
1//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is part of the X86 Disassembler.
11// It contains code to translate the data produced by the decoder into
12//  MCInsts.
13// Documentation for the disassembler can be found in X86Disassembler.h.
14//
15//===----------------------------------------------------------------------===//
16
17#include "X86Disassembler.h"
18#include "X86DisassemblerDecoder.h"
19
20#include "llvm/MC/EDInstInfo.h"
21#include "llvm/MC/MCDisassembler.h"
22#include "llvm/MC/MCDisassembler.h"
23#include "llvm/MC/MCInst.h"
24#include "llvm/Target/TargetRegistry.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/MemoryObject.h"
27#include "llvm/Support/raw_ostream.h"
28
29#define GET_REGINFO_ENUM
30#include "X86GenRegisterInfo.inc"
31#include "X86GenEDInfo.inc"
32
33using namespace llvm;
34using namespace llvm::X86Disassembler;
35
36void x86DisassemblerDebug(const char *file,
37                          unsigned line,
38                          const char *s) {
39  dbgs() << file << ":" << line << ": " << s;
40}
41
42#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
43
44namespace llvm {
45
46// Fill-ins to make the compiler happy.  These constants are never actually
47//   assigned; they are just filler to make an automatically-generated switch
48//   statement work.
49namespace X86 {
50  enum {
51    BX_SI = 500,
52    BX_DI = 501,
53    BP_SI = 502,
54    BP_DI = 503,
55    sib   = 504,
56    sib64 = 505
57  };
58}
59
60extern Target TheX86_32Target, TheX86_64Target;
61
62}
63
64static bool translateInstruction(MCInst &target,
65                                InternalInstruction &source);
66
67X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
68    MCDisassembler(),
69    fMode(mode) {
70}
71
72X86GenericDisassembler::~X86GenericDisassembler() {
73}
74
75EDInstInfo *X86GenericDisassembler::getEDInfo() const {
76  return instInfoX86;
77}
78
79/// regionReader - a callback function that wraps the readByte method from
80///   MemoryObject.
81///
82/// @param arg      - The generic callback parameter.  In this case, this should
83///                   be a pointer to a MemoryObject.
84/// @param byte     - A pointer to the byte to be read.
85/// @param address  - The address to be read.
86static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
87  MemoryObject* region = static_cast<MemoryObject*>(arg);
88  return region->readByte(address, byte);
89}
90
91/// logger - a callback function that wraps the operator<< method from
92///   raw_ostream.
93///
94/// @param arg      - The generic callback parameter.  This should be a pointe
95///                   to a raw_ostream.
96/// @param log      - A string to be logged.  logger() adds a newline.
97static void logger(void* arg, const char* log) {
98  if (!arg)
99    return;
100
101  raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
102  vStream << log << "\n";
103}
104
105//
106// Public interface for the disassembler
107//
108
109bool X86GenericDisassembler::getInstruction(MCInst &instr,
110                                            uint64_t &size,
111                                            const MemoryObject &region,
112                                            uint64_t address,
113                                            raw_ostream &vStream) const {
114  InternalInstruction internalInstr;
115
116  int ret = decodeInstruction(&internalInstr,
117                              regionReader,
118                              (void*)&region,
119                              logger,
120                              (void*)&vStream,
121                              address,
122                              fMode);
123
124  if (ret) {
125    size = internalInstr.readerCursor - address;
126    return false;
127  }
128  else {
129    size = internalInstr.length;
130    return !translateInstruction(instr, internalInstr);
131  }
132}
133
134//
135// Private code that translates from struct InternalInstructions to MCInsts.
136//
137
138/// translateRegister - Translates an internal register to the appropriate LLVM
139///   register, and appends it as an operand to an MCInst.
140///
141/// @param mcInst     - The MCInst to append to.
142/// @param reg        - The Reg to append.
143static void translateRegister(MCInst &mcInst, Reg reg) {
144#define ENTRY(x) X86::x,
145  uint8_t llvmRegnums[] = {
146    ALL_REGS
147    0
148  };
149#undef ENTRY
150
151  uint8_t llvmRegnum = llvmRegnums[reg];
152  mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
153}
154
155/// translateImmediate  - Appends an immediate operand to an MCInst.
156///
157/// @param mcInst       - The MCInst to append to.
158/// @param immediate    - The immediate value to append.
159/// @param operand      - The operand, as stored in the descriptor table.
160/// @param insn         - The internal instruction.
161static void translateImmediate(MCInst &mcInst, uint64_t immediate,
162                               const OperandSpecifier &operand,
163                               InternalInstruction &insn) {
164  // Sign-extend the immediate if necessary.
165
166  OperandType type = operand.type;
167
168  if (type == TYPE_RELv) {
169    switch (insn.displacementSize) {
170    default:
171      break;
172    case 1:
173      type = TYPE_MOFFS8;
174      break;
175    case 2:
176      type = TYPE_MOFFS16;
177      break;
178    case 4:
179      type = TYPE_MOFFS32;
180      break;
181    case 8:
182      type = TYPE_MOFFS64;
183      break;
184    }
185  }
186
187  switch (type) {
188  case TYPE_MOFFS8:
189  case TYPE_REL8:
190    if(immediate & 0x80)
191      immediate |= ~(0xffull);
192    break;
193  case TYPE_MOFFS16:
194    if(immediate & 0x8000)
195      immediate |= ~(0xffffull);
196    break;
197  case TYPE_MOFFS32:
198  case TYPE_REL32:
199  case TYPE_REL64:
200    if(immediate & 0x80000000)
201      immediate |= ~(0xffffffffull);
202    break;
203  case TYPE_MOFFS64:
204  default:
205    // operand is 64 bits wide.  Do nothing.
206    break;
207  }
208
209  mcInst.addOperand(MCOperand::CreateImm(immediate));
210}
211
212/// translateRMRegister - Translates a register stored in the R/M field of the
213///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
214/// @param mcInst       - The MCInst to append to.
215/// @param insn         - The internal instruction to extract the R/M field
216///                       from.
217/// @return             - 0 on success; -1 otherwise
218static bool translateRMRegister(MCInst &mcInst,
219                                InternalInstruction &insn) {
220  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
221    debug("A R/M register operand may not have a SIB byte");
222    return true;
223  }
224
225  switch (insn.eaBase) {
226  default:
227    debug("Unexpected EA base register");
228    return true;
229  case EA_BASE_NONE:
230    debug("EA_BASE_NONE for ModR/M base");
231    return true;
232#define ENTRY(x) case EA_BASE_##x:
233  ALL_EA_BASES
234#undef ENTRY
235    debug("A R/M register operand may not have a base; "
236          "the operand must be a register.");
237    return true;
238#define ENTRY(x)                                                      \
239  case EA_REG_##x:                                                    \
240    mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
241  ALL_REGS
242#undef ENTRY
243  }
244
245  return false;
246}
247
248/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
249///   fields of an internal instruction (and possibly its SIB byte) to a memory
250///   operand in LLVM's format, and appends it to an MCInst.
251///
252/// @param mcInst       - The MCInst to append to.
253/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
254///                       from.
255/// @return             - 0 on success; nonzero otherwise
256static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
257  // Addresses in an MCInst are represented as five operands:
258  //   1. basereg       (register)  The R/M base, or (if there is a SIB) the
259  //                                SIB base
260  //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified
261  //                                scale amount
262  //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
263  //                                the index (which is multiplied by the
264  //                                scale amount)
265  //   4. displacement  (immediate) 0, or the displacement if there is one
266  //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
267  //                                if we have segment overrides
268
269  MCOperand baseReg;
270  MCOperand scaleAmount;
271  MCOperand indexReg;
272  MCOperand displacement;
273  MCOperand segmentReg;
274
275  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
276    if (insn.sibBase != SIB_BASE_NONE) {
277      switch (insn.sibBase) {
278      default:
279        debug("Unexpected sibBase");
280        return true;
281#define ENTRY(x)                                          \
282      case SIB_BASE_##x:                                  \
283        baseReg = MCOperand::CreateReg(X86::x); break;
284      ALL_SIB_BASES
285#undef ENTRY
286      }
287    } else {
288      baseReg = MCOperand::CreateReg(0);
289    }
290
291    if (insn.sibIndex != SIB_INDEX_NONE) {
292      switch (insn.sibIndex) {
293      default:
294        debug("Unexpected sibIndex");
295        return true;
296#define ENTRY(x)                                          \
297      case SIB_INDEX_##x:                                 \
298        indexReg = MCOperand::CreateReg(X86::x); break;
299      EA_BASES_32BIT
300      EA_BASES_64BIT
301#undef ENTRY
302      }
303    } else {
304      indexReg = MCOperand::CreateReg(0);
305    }
306
307    scaleAmount = MCOperand::CreateImm(insn.sibScale);
308  } else {
309    switch (insn.eaBase) {
310    case EA_BASE_NONE:
311      if (insn.eaDisplacement == EA_DISP_NONE) {
312        debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
313        return true;
314      }
315      if (insn.mode == MODE_64BIT)
316        baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
317      else
318        baseReg = MCOperand::CreateReg(0);
319
320      indexReg = MCOperand::CreateReg(0);
321      break;
322    case EA_BASE_BX_SI:
323      baseReg = MCOperand::CreateReg(X86::BX);
324      indexReg = MCOperand::CreateReg(X86::SI);
325      break;
326    case EA_BASE_BX_DI:
327      baseReg = MCOperand::CreateReg(X86::BX);
328      indexReg = MCOperand::CreateReg(X86::DI);
329      break;
330    case EA_BASE_BP_SI:
331      baseReg = MCOperand::CreateReg(X86::BP);
332      indexReg = MCOperand::CreateReg(X86::SI);
333      break;
334    case EA_BASE_BP_DI:
335      baseReg = MCOperand::CreateReg(X86::BP);
336      indexReg = MCOperand::CreateReg(X86::DI);
337      break;
338    default:
339      indexReg = MCOperand::CreateReg(0);
340      switch (insn.eaBase) {
341      default:
342        debug("Unexpected eaBase");
343        return true;
344        // Here, we will use the fill-ins defined above.  However,
345        //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
346        //   sib and sib64 were handled in the top-level if, so they're only
347        //   placeholders to keep the compiler happy.
348#define ENTRY(x)                                        \
349      case EA_BASE_##x:                                 \
350        baseReg = MCOperand::CreateReg(X86::x); break;
351      ALL_EA_BASES
352#undef ENTRY
353#define ENTRY(x) case EA_REG_##x:
354      ALL_REGS
355#undef ENTRY
356        debug("A R/M memory operand may not be a register; "
357              "the base field must be a base.");
358        return true;
359      }
360    }
361
362    scaleAmount = MCOperand::CreateImm(1);
363  }
364
365  displacement = MCOperand::CreateImm(insn.displacement);
366
367  static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
368    0,        // SEG_OVERRIDE_NONE
369    X86::CS,
370    X86::SS,
371    X86::DS,
372    X86::ES,
373    X86::FS,
374    X86::GS
375  };
376
377  segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
378
379  mcInst.addOperand(baseReg);
380  mcInst.addOperand(scaleAmount);
381  mcInst.addOperand(indexReg);
382  mcInst.addOperand(displacement);
383  mcInst.addOperand(segmentReg);
384  return false;
385}
386
387/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
388///   byte of an instruction to LLVM form, and appends it to an MCInst.
389///
390/// @param mcInst       - The MCInst to append to.
391/// @param operand      - The operand, as stored in the descriptor table.
392/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
393///                       from.
394/// @return             - 0 on success; nonzero otherwise
395static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
396                        InternalInstruction &insn) {
397  switch (operand.type) {
398  default:
399    debug("Unexpected type for a R/M operand");
400    return true;
401  case TYPE_R8:
402  case TYPE_R16:
403  case TYPE_R32:
404  case TYPE_R64:
405  case TYPE_Rv:
406  case TYPE_MM:
407  case TYPE_MM32:
408  case TYPE_MM64:
409  case TYPE_XMM:
410  case TYPE_XMM32:
411  case TYPE_XMM64:
412  case TYPE_XMM128:
413  case TYPE_XMM256:
414  case TYPE_DEBUGREG:
415  case TYPE_CONTROLREG:
416    return translateRMRegister(mcInst, insn);
417  case TYPE_M:
418  case TYPE_M8:
419  case TYPE_M16:
420  case TYPE_M32:
421  case TYPE_M64:
422  case TYPE_M128:
423  case TYPE_M256:
424  case TYPE_M512:
425  case TYPE_Mv:
426  case TYPE_M32FP:
427  case TYPE_M64FP:
428  case TYPE_M80FP:
429  case TYPE_M16INT:
430  case TYPE_M32INT:
431  case TYPE_M64INT:
432  case TYPE_M1616:
433  case TYPE_M1632:
434  case TYPE_M1664:
435  case TYPE_LEA:
436    return translateRMMemory(mcInst, insn);
437  }
438}
439
440/// translateFPRegister - Translates a stack position on the FPU stack to its
441///   LLVM form, and appends it to an MCInst.
442///
443/// @param mcInst       - The MCInst to append to.
444/// @param stackPos     - The stack position to translate.
445/// @return             - 0 on success; nonzero otherwise.
446static bool translateFPRegister(MCInst &mcInst,
447                               uint8_t stackPos) {
448  if (stackPos >= 8) {
449    debug("Invalid FP stack position");
450    return true;
451  }
452
453  mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
454
455  return false;
456}
457
458/// translateOperand - Translates an operand stored in an internal instruction
459///   to LLVM's format and appends it to an MCInst.
460///
461/// @param mcInst       - The MCInst to append to.
462/// @param operand      - The operand, as stored in the descriptor table.
463/// @param insn         - The internal instruction.
464/// @return             - false on success; true otherwise.
465static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
466                             InternalInstruction &insn) {
467  switch (operand.encoding) {
468  default:
469    debug("Unhandled operand encoding during translation");
470    return true;
471  case ENCODING_REG:
472    translateRegister(mcInst, insn.reg);
473    return false;
474  case ENCODING_RM:
475    return translateRM(mcInst, operand, insn);
476  case ENCODING_CB:
477  case ENCODING_CW:
478  case ENCODING_CD:
479  case ENCODING_CP:
480  case ENCODING_CO:
481  case ENCODING_CT:
482    debug("Translation of code offsets isn't supported.");
483    return true;
484  case ENCODING_IB:
485  case ENCODING_IW:
486  case ENCODING_ID:
487  case ENCODING_IO:
488  case ENCODING_Iv:
489  case ENCODING_Ia:
490    translateImmediate(mcInst,
491                       insn.immediates[insn.numImmediatesTranslated++],
492                       operand,
493                       insn);
494    return false;
495  case ENCODING_RB:
496  case ENCODING_RW:
497  case ENCODING_RD:
498  case ENCODING_RO:
499    translateRegister(mcInst, insn.opcodeRegister);
500    return false;
501  case ENCODING_I:
502    return translateFPRegister(mcInst, insn.opcodeModifier);
503  case ENCODING_Rv:
504    translateRegister(mcInst, insn.opcodeRegister);
505    return false;
506  case ENCODING_VVVV:
507    translateRegister(mcInst, insn.vvvv);
508    return false;
509  case ENCODING_DUP:
510    return translateOperand(mcInst,
511                            insn.spec->operands[operand.type - TYPE_DUP0],
512                            insn);
513  }
514}
515
516/// translateInstruction - Translates an internal instruction and all its
517///   operands to an MCInst.
518///
519/// @param mcInst       - The MCInst to populate with the instruction's data.
520/// @param insn         - The internal instruction.
521/// @return             - false on success; true otherwise.
522static bool translateInstruction(MCInst &mcInst,
523                                InternalInstruction &insn) {
524  if (!insn.spec) {
525    debug("Instruction has no specification");
526    return true;
527  }
528
529  mcInst.setOpcode(insn.instructionID);
530
531  int index;
532
533  insn.numImmediatesTranslated = 0;
534
535  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
536    if (insn.spec->operands[index].encoding != ENCODING_NONE) {
537      if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
538        return true;
539      }
540    }
541  }
542
543  return false;
544}
545
546static MCDisassembler *createX86_32Disassembler(const Target &T) {
547  return new X86Disassembler::X86_32Disassembler;
548}
549
550static MCDisassembler *createX86_64Disassembler(const Target &T) {
551  return new X86Disassembler::X86_64Disassembler;
552}
553
554extern "C" void LLVMInitializeX86Disassembler() {
555  // Register the disassembler.
556  TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
557                                         createX86_32Disassembler);
558  TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
559                                         createX86_64Disassembler);
560}
561