1/* Copyright (c) 2007, Google Inc.
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * ---
31 * Author: Joi Sigurdsson
32 *
33 * Implementation of MiniDisassembler.
34 */
35
36#include "mini_disassembler.h"
37
38namespace sidestep {
39
40MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
41                                   bool address_default_is_32_bits)
42    : operand_default_is_32_bits_(operand_default_is_32_bits),
43      address_default_is_32_bits_(address_default_is_32_bits) {
44  Initialize();
45}
46
47MiniDisassembler::MiniDisassembler()
48    : operand_default_is_32_bits_(true),
49      address_default_is_32_bits_(true) {
50  Initialize();
51}
52
53InstructionType MiniDisassembler::Disassemble(
54    unsigned char* start_byte,
55    unsigned int& instruction_bytes) {
56  // Clean up any state from previous invocations.
57  Initialize();
58
59  // Start by processing any prefixes.
60  unsigned char* current_byte = start_byte;
61  unsigned int size = 0;
62  InstructionType instruction_type = ProcessPrefixes(current_byte, size);
63
64  if (IT_UNKNOWN == instruction_type)
65    return instruction_type;
66
67  current_byte += size;
68  size = 0;
69
70  // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
71  // and address_is_32_bits_ flags are correctly set.
72
73  instruction_type = ProcessOpcode(current_byte, 0, size);
74
75  // Check for error processing instruction
76  if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
77    return IT_UNKNOWN;
78  }
79
80  current_byte += size;
81
82  // Invariant: operand_bytes_ indicates the total size of operands
83  // specified by the opcode and/or ModR/M byte and/or SIB byte.
84  // pCurrentByte points to the first byte after the ModR/M byte, or after
85  // the SIB byte if it is present (i.e. the first byte of any operands
86  // encoded in the instruction).
87
88  // We get the total length of any prefixes, the opcode, and the ModR/M and
89  // SIB bytes if present, by taking the difference of the original starting
90  // address and the current byte (which points to the first byte of the
91  // operands if present, or to the first byte of the next instruction if
92  // they are not).  Adding the count of bytes in the operands encoded in
93  // the instruction gives us the full length of the instruction in bytes.
94  instruction_bytes += operand_bytes_ + (current_byte - start_byte);
95
96  // Return the instruction type, which was set by ProcessOpcode().
97  return instruction_type_;
98}
99
100void MiniDisassembler::Initialize() {
101  operand_is_32_bits_ = operand_default_is_32_bits_;
102  address_is_32_bits_ = address_default_is_32_bits_;
103#ifdef _M_X64
104  operand_default_support_64_bits_ = true;
105#else
106  operand_default_support_64_bits_ = false;
107#endif
108  operand_is_64_bits_ = false;
109  operand_bytes_ = 0;
110  have_modrm_ = false;
111  should_decode_modrm_ = false;
112  instruction_type_ = IT_UNKNOWN;
113  got_f2_prefix_ = false;
114  got_f3_prefix_ = false;
115  got_66_prefix_ = false;
116}
117
118InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
119                                                  unsigned int& size) {
120  InstructionType instruction_type = IT_GENERIC;
121  const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
122
123  switch (opcode.type_) {
124    case IT_PREFIX_ADDRESS:
125      address_is_32_bits_ = !address_default_is_32_bits_;
126      goto nochangeoperand;
127    case IT_PREFIX_OPERAND:
128      operand_is_32_bits_ = !operand_default_is_32_bits_;
129      nochangeoperand:
130    case IT_PREFIX:
131
132      if (0xF2 == (*start_byte))
133        got_f2_prefix_ = true;
134      else if (0xF3 == (*start_byte))
135        got_f3_prefix_ = true;
136      else if (0x66 == (*start_byte))
137        got_66_prefix_ = true;
138      else if (operand_default_support_64_bits_ && (*start_byte) & 0x48)
139        operand_is_64_bits_ = true;
140
141      instruction_type = opcode.type_;
142      size ++;
143      // we got a prefix, so add one and check next byte
144      ProcessPrefixes(start_byte + 1, size);
145    default:
146      break;   // not a prefix byte
147  }
148
149  return instruction_type;
150}
151
152InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
153                                                unsigned int table_index,
154                                                unsigned int& size) {
155  const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
156  unsigned char current_byte = (*start_byte) >> table.shift_;
157  current_byte = current_byte & table.mask_;  // Mask out the bits we will use
158
159  // Check whether the byte we have is inside the table we have.
160  if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
161    instruction_type_ = IT_UNKNOWN;
162    return instruction_type_;
163  }
164
165  const Opcode& opcode = table.table_[current_byte];
166  if (IT_UNUSED == opcode.type_) {
167    // This instruction is not used by the IA-32 ISA, so we indicate
168    // this to the user.  Probably means that we were pointed to
169    // a byte in memory that was not the start of an instruction.
170    instruction_type_ = IT_UNUSED;
171    return instruction_type_;
172  } else if (IT_REFERENCE == opcode.type_) {
173    // We are looking at an opcode that has more bytes (or is continued
174    // in the ModR/M byte).  Recursively find the opcode definition in
175    // the table for the opcode's next byte.
176    size++;
177    ProcessOpcode(start_byte + 1, opcode.table_index_, size);
178    return instruction_type_;
179  }
180
181  const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode;
182  if (opcode.is_prefix_dependent_) {
183    if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
184      specific_opcode = &opcode.opcode_if_f2_prefix_;
185    } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
186      specific_opcode = &opcode.opcode_if_f3_prefix_;
187    } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
188      specific_opcode = &opcode.opcode_if_66_prefix_;
189    }
190  }
191
192  // Inv: The opcode type is known.
193  instruction_type_ = specific_opcode->type_;
194
195  // Let's process the operand types to see if we have any immediate
196  // operands, and/or a ModR/M byte.
197
198  ProcessOperand(specific_opcode->flag_dest_);
199  ProcessOperand(specific_opcode->flag_source_);
200  ProcessOperand(specific_opcode->flag_aux_);
201
202  // Inv: We have processed the opcode and incremented operand_bytes_
203  // by the number of bytes of any operands specified by the opcode
204  // that are stored in the instruction (not registers etc.).  Now
205  // we need to return the total number of bytes for the opcode and
206  // for the ModR/M or SIB bytes if they are present.
207
208  if (table.mask_ != 0xff) {
209    if (have_modrm_) {
210      // we're looking at a ModR/M byte so we're not going to
211      // count that into the opcode size
212      ProcessModrm(start_byte, size);
213      return IT_GENERIC;
214    } else {
215      // need to count the ModR/M byte even if it's just being
216      // used for opcode extension
217      size++;
218      return IT_GENERIC;
219    }
220  } else {
221    if (have_modrm_) {
222      // The ModR/M byte is the next byte.
223      size++;
224      ProcessModrm(start_byte + 1, size);
225      return IT_GENERIC;
226    } else {
227      size++;
228      return IT_GENERIC;
229    }
230  }
231}
232
233bool MiniDisassembler::ProcessOperand(int flag_operand) {
234  bool succeeded = true;
235  if (AM_NOT_USED == flag_operand)
236    return succeeded;
237
238  // Decide what to do based on the addressing mode.
239  switch (flag_operand & AM_MASK) {
240    // No ModR/M byte indicated by these addressing modes, and no
241    // additional (e.g. immediate) parameters.
242    case AM_A: // Direct address
243    case AM_F: // EFLAGS register
244    case AM_X: // Memory addressed by the DS:SI register pair
245    case AM_Y: // Memory addressed by the ES:DI register pair
246    case AM_IMPLICIT: // Parameter is implicit, occupies no space in
247                       // instruction
248      break;
249
250    // There is a ModR/M byte but it does not necessarily need
251    // to be decoded.
252    case AM_C: // reg field of ModR/M selects a control register
253    case AM_D: // reg field of ModR/M selects a debug register
254    case AM_G: // reg field of ModR/M selects a general register
255    case AM_P: // reg field of ModR/M selects an MMX register
256    case AM_R: // mod field of ModR/M may refer only to a general register
257    case AM_S: // reg field of ModR/M selects a segment register
258    case AM_T: // reg field of ModR/M selects a test register
259    case AM_V: // reg field of ModR/M selects a 128-bit XMM register
260      have_modrm_ = true;
261      break;
262
263    // In these addressing modes, there is a ModR/M byte and it needs to be
264    // decoded. No other (e.g. immediate) params than indicated in ModR/M.
265    case AM_E: // Operand is either a general-purpose register or memory,
266                 // specified by ModR/M byte
267    case AM_M: // ModR/M byte will refer only to memory
268    case AM_Q: // Operand is either an MMX register or memory (complex
269                 // evaluation), specified by ModR/M byte
270    case AM_W: // Operand is either a 128-bit XMM register or memory (complex
271                 // eval), specified by ModR/M byte
272      have_modrm_ = true;
273      should_decode_modrm_ = true;
274      break;
275
276    // These addressing modes specify an immediate or an offset value
277    // directly, so we need to look at the operand type to see how many
278    // bytes.
279    case AM_I: // Immediate data.
280    case AM_J: // Jump to offset.
281    case AM_O: // Operand is at offset.
282      switch (flag_operand & OT_MASK) {
283        case OT_B: // Byte regardless of operand-size attribute.
284          operand_bytes_ += OS_BYTE;
285          break;
286        case OT_C: // Byte or word, depending on operand-size attribute.
287          if (operand_is_32_bits_)
288            operand_bytes_ += OS_WORD;
289          else
290            operand_bytes_ += OS_BYTE;
291          break;
292        case OT_D: // Doubleword, regardless of operand-size attribute.
293          operand_bytes_ += OS_DOUBLE_WORD;
294          break;
295        case OT_DQ: // Double-quadword, regardless of operand-size attribute.
296          operand_bytes_ += OS_DOUBLE_QUAD_WORD;
297          break;
298        case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
299                     // attribute.
300          if (operand_is_32_bits_)
301            operand_bytes_ += OS_48_BIT_POINTER;
302          else
303            operand_bytes_ += OS_32_BIT_POINTER;
304          break;
305        case OT_PS: // 128-bit packed single-precision floating-point data.
306          operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
307          break;
308        case OT_Q: // Quadword, regardless of operand-size attribute.
309          operand_bytes_ += OS_QUAD_WORD;
310          break;
311        case OT_S: // 6-byte pseudo-descriptor.
312          operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
313          break;
314        case OT_SD: // Scalar Double-Precision Floating-Point Value
315        case OT_PD: // Unaligned packed double-precision floating point value
316          operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
317          break;
318        case OT_SS:
319          // Scalar element of a 128-bit packed single-precision
320          // floating data.
321          // We simply return enItUnknown since we don't have to support
322          // floating point
323          succeeded = false;
324          break;
325        case OT_V: // Word, doubleword or quadword, depending on operand-size
326                   // attribute.
327          if (operand_is_64_bits_ && flag_operand & AM_I &&
328              flag_operand & IOS_64)
329            operand_bytes_ += OS_QUAD_WORD;
330          else if (operand_is_32_bits_)
331            operand_bytes_ += OS_DOUBLE_WORD;
332          else
333            operand_bytes_ += OS_WORD;
334          break;
335        case OT_W: // Word, regardless of operand-size attribute.
336          operand_bytes_ += OS_WORD;
337          break;
338
339        // Can safely ignore these.
340        case OT_A: // Two one-word operands in memory or two double-word
341                     // operands in memory
342        case OT_PI: // Quadword MMX technology register (e.g. mm0)
343        case OT_SI: // Doubleword integer register (e.g., eax)
344          break;
345
346        default:
347          break;
348      }
349      break;
350
351    default:
352      break;
353  }
354
355  return succeeded;
356}
357
358bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
359                                    unsigned int& size) {
360  // If we don't need to decode, we just return the size of the ModR/M
361  // byte (there is never a SIB byte in this case).
362  if (!should_decode_modrm_) {
363    size++;
364    return true;
365  }
366
367  // We never care about the reg field, only the combination of the mod
368  // and r/m fields, so let's start by packing those fields together into
369  // 5 bits.
370  unsigned char modrm = (*start_byte);
371  unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field
372  modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
373  mod = mod >> 3; // shift the mod field to the right place
374  modrm = mod | modrm; // combine the r/m and mod fields as discussed
375  mod = mod >> 3; // shift the mod field to bits 2..0
376
377  // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
378  // in bits 2..0, and mod contains the mod field in bits 2..0
379
380  const ModrmEntry* modrm_entry = 0;
381  if (address_is_32_bits_)
382    modrm_entry = &s_ia32_modrm_map_[modrm];
383  else
384    modrm_entry = &s_ia16_modrm_map_[modrm];
385
386  // Invariant: modrm_entry points to information that we need to decode
387  // the ModR/M byte.
388
389  // Add to the count of operand bytes, if the ModR/M byte indicates
390  // that some operands are encoded in the instruction.
391  if (modrm_entry->is_encoded_in_instruction_)
392    operand_bytes_ += modrm_entry->operand_size_;
393
394  // Process the SIB byte if necessary, and return the count
395  // of ModR/M and SIB bytes.
396  if (modrm_entry->use_sib_byte_) {
397    size++;
398    return ProcessSib(start_byte + 1, mod, size);
399  } else {
400    size++;
401    return true;
402  }
403}
404
405bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
406                                  unsigned char mod,
407                                  unsigned int& size) {
408  // get the mod field from the 2..0 bits of the SIB byte
409  unsigned char sib_base = (*start_byte) & 0x07;
410  if (0x05 == sib_base) {
411    switch (mod) {
412    case 0x00: // mod == 00
413    case 0x02: // mod == 10
414      operand_bytes_ += OS_DOUBLE_WORD;
415      break;
416    case 0x01: // mod == 01
417      operand_bytes_ += OS_BYTE;
418      break;
419    case 0x03: // mod == 11
420      // According to the IA-32 docs, there does not seem to be a disp
421      // value for this value of mod
422    default:
423      break;
424    }
425  }
426
427  size++;
428  return true;
429}
430
431};  // namespace sidestep
432