X86Disassembler.cpp revision a21e2eae3def2fe39caed861dcb73c76c715569b
1//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is part of the X86 Disassembler.
11// It contains code to translate the data produced by the decoder into
12//  MCInsts.
13// Documentation for the disassembler can be found in X86Disassembler.h.
14//
15//===----------------------------------------------------------------------===//
16
17#include "X86Disassembler.h"
18#include "X86DisassemblerDecoder.h"
19
20#include "llvm/MC/EDInstInfo.h"
21#include "llvm/MC/MCDisassembler.h"
22#include "llvm/MC/MCDisassembler.h"
23#include "llvm/MC/MCInst.h"
24#include "llvm/Target/TargetRegistry.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/MemoryObject.h"
27#include "llvm/Support/raw_ostream.h"
28
29#include "X86GenRegisterNames.inc"
30#include "X86GenEDInfo.inc"
31
32using namespace llvm;
33using namespace llvm::X86Disassembler;
34
35void x86DisassemblerDebug(const char *file,
36                          unsigned line,
37                          const char *s) {
38  dbgs() << file << ":" << line << ": " << s;
39}
40
41#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
42
43namespace llvm {
44
45// Fill-ins to make the compiler happy.  These constants are never actually
46//   assigned; they are just filler to make an automatically-generated switch
47//   statement work.
48namespace X86 {
49  enum {
50    BX_SI = 500,
51    BX_DI = 501,
52    BP_SI = 502,
53    BP_DI = 503,
54    sib   = 504,
55    sib64 = 505
56  };
57}
58
59extern Target TheX86_32Target, TheX86_64Target;
60
61}
62
63static bool translateInstruction(MCInst &target,
64                                InternalInstruction &source);
65
66X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
67    MCDisassembler(),
68    fMode(mode) {
69}
70
71X86GenericDisassembler::~X86GenericDisassembler() {
72}
73
74EDInstInfo *X86GenericDisassembler::getEDInfo() const {
75  return instInfoX86;
76}
77
78/// regionReader - a callback function that wraps the readByte method from
79///   MemoryObject.
80///
81/// @param arg      - The generic callback parameter.  In this case, this should
82///                   be a pointer to a MemoryObject.
83/// @param byte     - A pointer to the byte to be read.
84/// @param address  - The address to be read.
85static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
86  MemoryObject* region = static_cast<MemoryObject*>(arg);
87  return region->readByte(address, byte);
88}
89
90/// logger - a callback function that wraps the operator<< method from
91///   raw_ostream.
92///
93/// @param arg      - The generic callback parameter.  This should be a pointe
94///                   to a raw_ostream.
95/// @param log      - A string to be logged.  logger() adds a newline.
96static void logger(void* arg, const char* log) {
97  if (!arg)
98    return;
99
100  raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
101  vStream << log << "\n";
102}
103
104//
105// Public interface for the disassembler
106//
107
108bool X86GenericDisassembler::getInstruction(MCInst &instr,
109                                            uint64_t &size,
110                                            const MemoryObject &region,
111                                            uint64_t address,
112                                            raw_ostream &vStream) const {
113  InternalInstruction internalInstr;
114
115  int ret = decodeInstruction(&internalInstr,
116                              regionReader,
117                              (void*)&region,
118                              logger,
119                              (void*)&vStream,
120                              address,
121                              fMode);
122
123  if (ret) {
124    size = internalInstr.readerCursor - address;
125    return false;
126  }
127  else {
128    size = internalInstr.length;
129    return !translateInstruction(instr, internalInstr);
130  }
131}
132
133//
134// Private code that translates from struct InternalInstructions to MCInsts.
135//
136
137/// translateRegister - Translates an internal register to the appropriate LLVM
138///   register, and appends it as an operand to an MCInst.
139///
140/// @param mcInst     - The MCInst to append to.
141/// @param reg        - The Reg to append.
142static void translateRegister(MCInst &mcInst, Reg reg) {
143#define ENTRY(x) X86::x,
144  uint8_t llvmRegnums[] = {
145    ALL_REGS
146    0
147  };
148#undef ENTRY
149
150  uint8_t llvmRegnum = llvmRegnums[reg];
151  mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
152}
153
154/// translateImmediate  - Appends an immediate operand to an MCInst.
155///
156/// @param mcInst       - The MCInst to append to.
157/// @param immediate    - The immediate value to append.
158/// @param operand      - The operand, as stored in the descriptor table.
159/// @param insn         - The internal instruction.
160static void translateImmediate(MCInst &mcInst, uint64_t immediate,
161                               const OperandSpecifier &operand,
162                               InternalInstruction &insn) {
163  // Sign-extend the immediate if necessary.
164
165  OperandType type = operand.type;
166
167  if (type == TYPE_RELv) {
168    switch (insn.displacementSize) {
169    default:
170      break;
171    case 1:
172      type = TYPE_MOFFS8;
173      break;
174    case 2:
175      type = TYPE_MOFFS16;
176      break;
177    case 4:
178      type = TYPE_MOFFS32;
179      break;
180    case 8:
181      type = TYPE_MOFFS64;
182      break;
183    }
184  }
185
186  switch (type) {
187  case TYPE_MOFFS8:
188  case TYPE_REL8:
189    if(immediate & 0x80)
190      immediate |= ~(0xffull);
191    break;
192  case TYPE_MOFFS16:
193    if(immediate & 0x8000)
194      immediate |= ~(0xffffull);
195    break;
196  case TYPE_MOFFS32:
197  case TYPE_REL32:
198  case TYPE_REL64:
199    if(immediate & 0x80000000)
200      immediate |= ~(0xffffffffull);
201    break;
202  case TYPE_MOFFS64:
203  default:
204    // operand is 64 bits wide.  Do nothing.
205    break;
206  }
207
208  mcInst.addOperand(MCOperand::CreateImm(immediate));
209}
210
211/// translateRMRegister - Translates a register stored in the R/M field of the
212///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
213/// @param mcInst       - The MCInst to append to.
214/// @param insn         - The internal instruction to extract the R/M field
215///                       from.
216/// @return             - 0 on success; -1 otherwise
217static bool translateRMRegister(MCInst &mcInst,
218                                InternalInstruction &insn) {
219  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
220    debug("A R/M register operand may not have a SIB byte");
221    return true;
222  }
223
224  switch (insn.eaBase) {
225  default:
226    debug("Unexpected EA base register");
227    return true;
228  case EA_BASE_NONE:
229    debug("EA_BASE_NONE for ModR/M base");
230    return true;
231#define ENTRY(x) case EA_BASE_##x:
232  ALL_EA_BASES
233#undef ENTRY
234    debug("A R/M register operand may not have a base; "
235          "the operand must be a register.");
236    return true;
237#define ENTRY(x)                                                      \
238  case EA_REG_##x:                                                    \
239    mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
240  ALL_REGS
241#undef ENTRY
242  }
243
244  return false;
245}
246
247/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
248///   fields of an internal instruction (and possibly its SIB byte) to a memory
249///   operand in LLVM's format, and appends it to an MCInst.
250///
251/// @param mcInst       - The MCInst to append to.
252/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
253///                       from.
254/// @return             - 0 on success; nonzero otherwise
255static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
256  // Addresses in an MCInst are represented as five operands:
257  //   1. basereg       (register)  The R/M base, or (if there is a SIB) the
258  //                                SIB base
259  //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified
260  //                                scale amount
261  //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
262  //                                the index (which is multiplied by the
263  //                                scale amount)
264  //   4. displacement  (immediate) 0, or the displacement if there is one
265  //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
266  //                                if we have segment overrides
267
268  MCOperand baseReg;
269  MCOperand scaleAmount;
270  MCOperand indexReg;
271  MCOperand displacement;
272  MCOperand segmentReg;
273
274  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
275    if (insn.sibBase != SIB_BASE_NONE) {
276      switch (insn.sibBase) {
277      default:
278        debug("Unexpected sibBase");
279        return true;
280#define ENTRY(x)                                          \
281      case SIB_BASE_##x:                                  \
282        baseReg = MCOperand::CreateReg(X86::x); break;
283      ALL_SIB_BASES
284#undef ENTRY
285      }
286    } else {
287      baseReg = MCOperand::CreateReg(0);
288    }
289
290    if (insn.sibIndex != SIB_INDEX_NONE) {
291      switch (insn.sibIndex) {
292      default:
293        debug("Unexpected sibIndex");
294        return true;
295#define ENTRY(x)                                          \
296      case SIB_INDEX_##x:                                 \
297        indexReg = MCOperand::CreateReg(X86::x); break;
298      EA_BASES_32BIT
299      EA_BASES_64BIT
300#undef ENTRY
301      }
302    } else {
303      indexReg = MCOperand::CreateReg(0);
304    }
305
306    scaleAmount = MCOperand::CreateImm(insn.sibScale);
307  } else {
308    switch (insn.eaBase) {
309    case EA_BASE_NONE:
310      if (insn.eaDisplacement == EA_DISP_NONE) {
311        debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
312        return true;
313      }
314      if (insn.mode == MODE_64BIT)
315        baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
316      else
317        baseReg = MCOperand::CreateReg(0);
318
319      indexReg = MCOperand::CreateReg(0);
320      break;
321    case EA_BASE_BX_SI:
322      baseReg = MCOperand::CreateReg(X86::BX);
323      indexReg = MCOperand::CreateReg(X86::SI);
324      break;
325    case EA_BASE_BX_DI:
326      baseReg = MCOperand::CreateReg(X86::BX);
327      indexReg = MCOperand::CreateReg(X86::DI);
328      break;
329    case EA_BASE_BP_SI:
330      baseReg = MCOperand::CreateReg(X86::BP);
331      indexReg = MCOperand::CreateReg(X86::SI);
332      break;
333    case EA_BASE_BP_DI:
334      baseReg = MCOperand::CreateReg(X86::BP);
335      indexReg = MCOperand::CreateReg(X86::DI);
336      break;
337    default:
338      indexReg = MCOperand::CreateReg(0);
339      switch (insn.eaBase) {
340      default:
341        debug("Unexpected eaBase");
342        return true;
343        // Here, we will use the fill-ins defined above.  However,
344        //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
345        //   sib and sib64 were handled in the top-level if, so they're only
346        //   placeholders to keep the compiler happy.
347#define ENTRY(x)                                        \
348      case EA_BASE_##x:                                 \
349        baseReg = MCOperand::CreateReg(X86::x); break;
350      ALL_EA_BASES
351#undef ENTRY
352#define ENTRY(x) case EA_REG_##x:
353      ALL_REGS
354#undef ENTRY
355        debug("A R/M memory operand may not be a register; "
356              "the base field must be a base.");
357        return true;
358      }
359    }
360
361    scaleAmount = MCOperand::CreateImm(1);
362  }
363
364  displacement = MCOperand::CreateImm(insn.displacement);
365
366  static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
367    0,        // SEG_OVERRIDE_NONE
368    X86::CS,
369    X86::SS,
370    X86::DS,
371    X86::ES,
372    X86::FS,
373    X86::GS
374  };
375
376  segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
377
378  mcInst.addOperand(baseReg);
379  mcInst.addOperand(scaleAmount);
380  mcInst.addOperand(indexReg);
381  mcInst.addOperand(displacement);
382  mcInst.addOperand(segmentReg);
383  return false;
384}
385
386/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
387///   byte of an instruction to LLVM form, and appends it to an MCInst.
388///
389/// @param mcInst       - The MCInst to append to.
390/// @param operand      - The operand, as stored in the descriptor table.
391/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
392///                       from.
393/// @return             - 0 on success; nonzero otherwise
394static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
395                        InternalInstruction &insn) {
396  switch (operand.type) {
397  default:
398    debug("Unexpected type for a R/M operand");
399    return true;
400  case TYPE_R8:
401  case TYPE_R16:
402  case TYPE_R32:
403  case TYPE_R64:
404  case TYPE_Rv:
405  case TYPE_MM:
406  case TYPE_MM32:
407  case TYPE_MM64:
408  case TYPE_XMM:
409  case TYPE_XMM32:
410  case TYPE_XMM64:
411  case TYPE_XMM128:
412  case TYPE_XMM256:
413  case TYPE_DEBUGREG:
414  case TYPE_CONTROLREG:
415    return translateRMRegister(mcInst, insn);
416  case TYPE_M:
417  case TYPE_M8:
418  case TYPE_M16:
419  case TYPE_M32:
420  case TYPE_M64:
421  case TYPE_M128:
422  case TYPE_M256:
423  case TYPE_M512:
424  case TYPE_Mv:
425  case TYPE_M32FP:
426  case TYPE_M64FP:
427  case TYPE_M80FP:
428  case TYPE_M16INT:
429  case TYPE_M32INT:
430  case TYPE_M64INT:
431  case TYPE_M1616:
432  case TYPE_M1632:
433  case TYPE_M1664:
434  case TYPE_LEA:
435    return translateRMMemory(mcInst, insn);
436  }
437}
438
439/// translateFPRegister - Translates a stack position on the FPU stack to its
440///   LLVM form, and appends it to an MCInst.
441///
442/// @param mcInst       - The MCInst to append to.
443/// @param stackPos     - The stack position to translate.
444/// @return             - 0 on success; nonzero otherwise.
445static bool translateFPRegister(MCInst &mcInst,
446                               uint8_t stackPos) {
447  if (stackPos >= 8) {
448    debug("Invalid FP stack position");
449    return true;
450  }
451
452  mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
453
454  return false;
455}
456
457/// translateOperand - Translates an operand stored in an internal instruction
458///   to LLVM's format and appends it to an MCInst.
459///
460/// @param mcInst       - The MCInst to append to.
461/// @param operand      - The operand, as stored in the descriptor table.
462/// @param insn         - The internal instruction.
463/// @return             - false on success; true otherwise.
464static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
465                             InternalInstruction &insn) {
466  switch (operand.encoding) {
467  default:
468    debug("Unhandled operand encoding during translation");
469    return true;
470  case ENCODING_REG:
471    translateRegister(mcInst, insn.reg);
472    return false;
473  case ENCODING_RM:
474    return translateRM(mcInst, operand, insn);
475  case ENCODING_CB:
476  case ENCODING_CW:
477  case ENCODING_CD:
478  case ENCODING_CP:
479  case ENCODING_CO:
480  case ENCODING_CT:
481    debug("Translation of code offsets isn't supported.");
482    return true;
483  case ENCODING_IB:
484  case ENCODING_IW:
485  case ENCODING_ID:
486  case ENCODING_IO:
487  case ENCODING_Iv:
488  case ENCODING_Ia:
489    translateImmediate(mcInst,
490                       insn.immediates[insn.numImmediatesTranslated++],
491                       operand,
492                       insn);
493    return false;
494  case ENCODING_RB:
495  case ENCODING_RW:
496  case ENCODING_RD:
497  case ENCODING_RO:
498    translateRegister(mcInst, insn.opcodeRegister);
499    return false;
500  case ENCODING_I:
501    return translateFPRegister(mcInst, insn.opcodeModifier);
502  case ENCODING_Rv:
503    translateRegister(mcInst, insn.opcodeRegister);
504    return false;
505  case ENCODING_VVVV:
506    translateRegister(mcInst, insn.vvvv);
507    return false;
508  case ENCODING_DUP:
509    return translateOperand(mcInst,
510                            insn.spec->operands[operand.type - TYPE_DUP0],
511                            insn);
512  }
513}
514
515/// translateInstruction - Translates an internal instruction and all its
516///   operands to an MCInst.
517///
518/// @param mcInst       - The MCInst to populate with the instruction's data.
519/// @param insn         - The internal instruction.
520/// @return             - false on success; true otherwise.
521static bool translateInstruction(MCInst &mcInst,
522                                InternalInstruction &insn) {
523  if (!insn.spec) {
524    debug("Instruction has no specification");
525    return true;
526  }
527
528  mcInst.setOpcode(insn.instructionID);
529
530  int index;
531
532  insn.numImmediatesTranslated = 0;
533
534  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
535    if (insn.spec->operands[index].encoding != ENCODING_NONE) {
536      if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
537        return true;
538      }
539    }
540  }
541
542  return false;
543}
544
545static MCDisassembler *createX86_32Disassembler(const Target &T) {
546  return new X86Disassembler::X86_32Disassembler;
547}
548
549static MCDisassembler *createX86_64Disassembler(const Target &T) {
550  return new X86Disassembler::X86_64Disassembler;
551}
552
553extern "C" void LLVMInitializeX86Disassembler() {
554  // Register the disassembler.
555  TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
556                                         createX86_32Disassembler);
557  TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
558                                         createX86_64Disassembler);
559}
560