1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5/*
6 * Implementation of MiniDisassembler.
7 */
8
9#include "mini_disassembler.h"
10
11namespace sidestep {
12
13MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
14                                   bool address_default_is_32_bits)
15    : operand_default_is_32_bits_(operand_default_is_32_bits),
16      address_default_is_32_bits_(address_default_is_32_bits) {
17  Initialize();
18}
19
20MiniDisassembler::MiniDisassembler()
21    : operand_default_is_32_bits_(true),
22      address_default_is_32_bits_(true) {
23  Initialize();
24}
25
26InstructionType MiniDisassembler::Disassemble(
27    unsigned char* start_byte,
28    unsigned int& instruction_bytes) {
29  // Clean up any state from previous invocations.
30  Initialize();
31
32  // Start by processing any prefixes.
33  unsigned char* current_byte = start_byte;
34  unsigned int size = 0;
35  InstructionType instruction_type = ProcessPrefixes(current_byte, size);
36
37  if (IT_UNKNOWN == instruction_type)
38    return instruction_type;
39
40  current_byte += size;
41  size = 0;
42
43  // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
44  // and address_is_32_bits_ flags are correctly set.
45
46  instruction_type = ProcessOpcode(current_byte, 0, size);
47
48  // Check for error processing instruction
49  if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
50    return IT_UNKNOWN;
51  }
52
53  current_byte += size;
54
55  // Invariant: operand_bytes_ indicates the total size of operands
56  // specified by the opcode and/or ModR/M byte and/or SIB byte.
57  // pCurrentByte points to the first byte after the ModR/M byte, or after
58  // the SIB byte if it is present (i.e. the first byte of any operands
59  // encoded in the instruction).
60
61  // We get the total length of any prefixes, the opcode, and the ModR/M and
62  // SIB bytes if present, by taking the difference of the original starting
63  // address and the current byte (which points to the first byte of the
64  // operands if present, or to the first byte of the next instruction if
65  // they are not).  Adding the count of bytes in the operands encoded in
66  // the instruction gives us the full length of the instruction in bytes.
67  instruction_bytes += operand_bytes_ + (current_byte - start_byte);
68
69  // Return the instruction type, which was set by ProcessOpcode().
70  return instruction_type_;
71}
72
73void MiniDisassembler::Initialize() {
74  operand_is_32_bits_ = operand_default_is_32_bits_;
75  address_is_32_bits_ = address_default_is_32_bits_;
76  operand_bytes_ = 0;
77  have_modrm_ = false;
78  should_decode_modrm_ = false;
79  instruction_type_ = IT_UNKNOWN;
80  got_f2_prefix_ = false;
81  got_f3_prefix_ = false;
82  got_66_prefix_ = false;
83}
84
85InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
86                                                  unsigned int& size) {
87  InstructionType instruction_type = IT_GENERIC;
88  const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
89
90  switch (opcode.type_) {
91    case IT_PREFIX_ADDRESS:
92      address_is_32_bits_ = !address_default_is_32_bits_;
93      goto nochangeoperand;
94    case IT_PREFIX_OPERAND:
95      operand_is_32_bits_ = !operand_default_is_32_bits_;
96      nochangeoperand:
97    case IT_PREFIX:
98
99      if (0xF2 == (*start_byte))
100        got_f2_prefix_ = true;
101      else if (0xF3 == (*start_byte))
102        got_f3_prefix_ = true;
103      else if (0x66 == (*start_byte))
104        got_66_prefix_ = true;
105
106      instruction_type = opcode.type_;
107      size ++;
108      // we got a prefix, so add one and check next byte
109      ProcessPrefixes(start_byte + 1, size);
110    default:
111      break;   // not a prefix byte
112  }
113
114  return instruction_type;
115}
116
117InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
118                                                unsigned int table_index,
119                                                unsigned int& size) {
120  const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
121  unsigned char current_byte = (*start_byte) >> table.shift_;
122  current_byte = current_byte & table.mask_;  // Mask out the bits we will use
123
124  // Check whether the byte we have is inside the table we have.
125  if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
126    instruction_type_ = IT_UNKNOWN;
127    return instruction_type_;
128  }
129
130  const Opcode& opcode = table.table_[current_byte];
131  if (IT_UNUSED == opcode.type_) {
132    // This instruction is not used by the IA-32 ISA, so we indicate
133    // this to the user.  Probably means that we were pointed to
134    // a byte in memory that was not the start of an instruction.
135    instruction_type_ = IT_UNUSED;
136    return instruction_type_;
137  } else if (IT_REFERENCE == opcode.type_) {
138    // We are looking at an opcode that has more bytes (or is continued
139    // in the ModR/M byte).  Recursively find the opcode definition in
140    // the table for the opcode's next byte.
141    size++;
142    ProcessOpcode(start_byte + 1, opcode.table_index_, size);
143    return instruction_type_;
144  }
145
146  const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode;
147  if (opcode.is_prefix_dependent_) {
148    if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
149      specific_opcode = &opcode.opcode_if_f2_prefix_;
150    } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
151      specific_opcode = &opcode.opcode_if_f3_prefix_;
152    } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
153      specific_opcode = &opcode.opcode_if_66_prefix_;
154    }
155  }
156
157  // Inv: The opcode type is known.
158  instruction_type_ = specific_opcode->type_;
159
160  // Let's process the operand types to see if we have any immediate
161  // operands, and/or a ModR/M byte.
162
163  ProcessOperand(specific_opcode->flag_dest_);
164  ProcessOperand(specific_opcode->flag_source_);
165  ProcessOperand(specific_opcode->flag_aux_);
166
167  // Inv: We have processed the opcode and incremented operand_bytes_
168  // by the number of bytes of any operands specified by the opcode
169  // that are stored in the instruction (not registers etc.).  Now
170  // we need to return the total number of bytes for the opcode and
171  // for the ModR/M or SIB bytes if they are present.
172
173  if (table.mask_ != 0xff) {
174    if (have_modrm_) {
175      // we're looking at a ModR/M byte so we're not going to
176      // count that into the opcode size
177      ProcessModrm(start_byte, size);
178      return IT_GENERIC;
179    } else {
180      // need to count the ModR/M byte even if it's just being
181      // used for opcode extension
182      size++;
183      return IT_GENERIC;
184    }
185  } else {
186    if (have_modrm_) {
187      // The ModR/M byte is the next byte.
188      size++;
189      ProcessModrm(start_byte + 1, size);
190      return IT_GENERIC;
191    } else {
192      size++;
193      return IT_GENERIC;
194    }
195  }
196}
197
198bool MiniDisassembler::ProcessOperand(int flag_operand) {
199  bool succeeded = true;
200  if (AM_NOT_USED == flag_operand)
201    return succeeded;
202
203  // Decide what to do based on the addressing mode.
204  switch (flag_operand & AM_MASK) {
205    // No ModR/M byte indicated by these addressing modes, and no
206    // additional (e.g. immediate) parameters.
207    case AM_A: // Direct address
208    case AM_F: // EFLAGS register
209    case AM_X: // Memory addressed by the DS:SI register pair
210    case AM_Y: // Memory addressed by the ES:DI register pair
211    case AM_IMPLICIT: // Parameter is implicit, occupies no space in
212                       // instruction
213      break;
214
215    // There is a ModR/M byte but it does not necessarily need
216    // to be decoded.
217    case AM_C: // reg field of ModR/M selects a control register
218    case AM_D: // reg field of ModR/M selects a debug register
219    case AM_G: // reg field of ModR/M selects a general register
220    case AM_P: // reg field of ModR/M selects an MMX register
221    case AM_R: // mod field of ModR/M may refer only to a general register
222    case AM_S: // reg field of ModR/M selects a segment register
223    case AM_T: // reg field of ModR/M selects a test register
224    case AM_V: // reg field of ModR/M selects a 128-bit XMM register
225      have_modrm_ = true;
226      break;
227
228    // In these addressing modes, there is a ModR/M byte and it needs to be
229    // decoded. No other (e.g. immediate) params than indicated in ModR/M.
230    case AM_E: // Operand is either a general-purpose register or memory,
231                 // specified by ModR/M byte
232    case AM_M: // ModR/M byte will refer only to memory
233    case AM_Q: // Operand is either an MMX register or memory (complex
234                 // evaluation), specified by ModR/M byte
235    case AM_W: // Operand is either a 128-bit XMM register or memory (complex
236                 // eval), specified by ModR/M byte
237      have_modrm_ = true;
238      should_decode_modrm_ = true;
239      break;
240
241    // These addressing modes specify an immediate or an offset value
242    // directly, so we need to look at the operand type to see how many
243    // bytes.
244    case AM_I: // Immediate data.
245    case AM_J: // Jump to offset.
246    case AM_O: // Operand is at offset.
247      switch (flag_operand & OT_MASK) {
248        case OT_B: // Byte regardless of operand-size attribute.
249          operand_bytes_ += OS_BYTE;
250          break;
251        case OT_C: // Byte or word, depending on operand-size attribute.
252          if (operand_is_32_bits_)
253            operand_bytes_ += OS_WORD;
254          else
255            operand_bytes_ += OS_BYTE;
256          break;
257        case OT_D: // Doubleword, regardless of operand-size attribute.
258          operand_bytes_ += OS_DOUBLE_WORD;
259          break;
260        case OT_DQ: // Double-quadword, regardless of operand-size attribute.
261          operand_bytes_ += OS_DOUBLE_QUAD_WORD;
262          break;
263        case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
264                     // attribute.
265          if (operand_is_32_bits_)
266            operand_bytes_ += OS_48_BIT_POINTER;
267          else
268            operand_bytes_ += OS_32_BIT_POINTER;
269          break;
270        case OT_PS: // 128-bit packed single-precision floating-point data.
271          operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
272          break;
273        case OT_Q: // Quadword, regardless of operand-size attribute.
274          operand_bytes_ += OS_QUAD_WORD;
275          break;
276        case OT_S: // 6-byte pseudo-descriptor.
277          operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
278          break;
279        case OT_SD: // Scalar Double-Precision Floating-Point Value
280        case OT_PD: // Unaligned packed double-precision floating point value
281          operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
282          break;
283        case OT_SS:
284          // Scalar element of a 128-bit packed single-precision
285          // floating data.
286          // We simply return enItUnknown since we don't have to support
287          // floating point
288          succeeded = false;
289          break;
290        case OT_V: // Word or doubleword, depending on operand-size attribute.
291          if (operand_is_32_bits_)
292            operand_bytes_ += OS_DOUBLE_WORD;
293          else
294            operand_bytes_ += OS_WORD;
295          break;
296        case OT_W: // Word, regardless of operand-size attribute.
297          operand_bytes_ += OS_WORD;
298          break;
299
300        // Can safely ignore these.
301        case OT_A: // Two one-word operands in memory or two double-word
302                     // operands in memory
303        case OT_PI: // Quadword MMX technology register (e.g. mm0)
304        case OT_SI: // Doubleword integer register (e.g., eax)
305          break;
306
307        default:
308          break;
309      }
310      break;
311
312    default:
313      break;
314  }
315
316  return succeeded;
317}
318
319bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
320                                    unsigned int& size) {
321  // If we don't need to decode, we just return the size of the ModR/M
322  // byte (there is never a SIB byte in this case).
323  if (!should_decode_modrm_) {
324    size++;
325    return true;
326  }
327
328  // We never care about the reg field, only the combination of the mod
329  // and r/m fields, so let's start by packing those fields together into
330  // 5 bits.
331  unsigned char modrm = (*start_byte);
332  unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field
333  modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
334  mod = mod >> 3; // shift the mod field to the right place
335  modrm = mod | modrm; // combine the r/m and mod fields as discussed
336  mod = mod >> 3; // shift the mod field to bits 2..0
337
338  // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
339  // in bits 2..0, and mod contains the mod field in bits 2..0
340
341  const ModrmEntry* modrm_entry = 0;
342  if (address_is_32_bits_)
343    modrm_entry = &s_ia32_modrm_map_[modrm];
344  else
345    modrm_entry = &s_ia16_modrm_map_[modrm];
346
347  // Invariant: modrm_entry points to information that we need to decode
348  // the ModR/M byte.
349
350  // Add to the count of operand bytes, if the ModR/M byte indicates
351  // that some operands are encoded in the instruction.
352  if (modrm_entry->is_encoded_in_instruction_)
353    operand_bytes_ += modrm_entry->operand_size_;
354
355  // Process the SIB byte if necessary, and return the count
356  // of ModR/M and SIB bytes.
357  if (modrm_entry->use_sib_byte_) {
358    size++;
359    return ProcessSib(start_byte + 1, mod, size);
360  } else {
361    size++;
362    return true;
363  }
364}
365
366bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
367                                  unsigned char mod,
368                                  unsigned int& size) {
369  // get the mod field from the 2..0 bits of the SIB byte
370  unsigned char sib_base = (*start_byte) & 0x07;
371  if (0x05 == sib_base) {
372    switch (mod) {
373    case 0x00: // mod == 00
374    case 0x02: // mod == 10
375      operand_bytes_ += OS_DOUBLE_WORD;
376      break;
377    case 0x01: // mod == 01
378      operand_bytes_ += OS_BYTE;
379      break;
380    case 0x03: // mod == 11
381      // According to the IA-32 docs, there does not seem to be a disp
382      // value for this value of mod
383    default:
384      break;
385    }
386  }
387
388  size++;
389  return true;
390}
391
392};  // namespace sidestep
393