1// Copyright 2008, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//    * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//    * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//    * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// Implementation of MiniDisassembler.
31
32#include "sidestep/mini_disassembler.h"
33
34namespace sidestep {
35
36MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
37                                   bool address_default_is_32_bits)
38    : operand_default_is_32_bits_(operand_default_is_32_bits),
39      address_default_is_32_bits_(address_default_is_32_bits) {
40  Initialize();
41}
42
43MiniDisassembler::MiniDisassembler()
44    : operand_default_is_32_bits_(true),
45      address_default_is_32_bits_(true) {
46  Initialize();
47}
48
49InstructionType MiniDisassembler::Disassemble(
50    unsigned char* start_byte,
51    unsigned int* instruction_bytes) {
52  // Clean up any state from previous invocations.
53  Initialize();
54
55  // Start by processing any prefixes.
56  unsigned char* current_byte = start_byte;
57  unsigned int size = 0;
58  InstructionType instruction_type = ProcessPrefixes(current_byte, &size);
59
60  if (IT_UNKNOWN == instruction_type)
61    return instruction_type;
62
63  current_byte += size;
64  size = 0;
65
66  // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
67  // and address_is_32_bits_ flags are correctly set.
68
69  instruction_type = ProcessOpcode(current_byte, 0, &size);
70
71  // Check for error processing instruction
72  if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
73    return IT_UNKNOWN;
74  }
75
76  current_byte += size;
77
78  // Invariant: operand_bytes_ indicates the total size of operands
79  // specified by the opcode and/or ModR/M byte and/or SIB byte.
80  // pCurrentByte points to the first byte after the ModR/M byte, or after
81  // the SIB byte if it is present (i.e. the first byte of any operands
82  // encoded in the instruction).
83
84  // We get the total length of any prefixes, the opcode, and the ModR/M and
85  // SIB bytes if present, by taking the difference of the original starting
86  // address and the current byte (which points to the first byte of the
87  // operands if present, or to the first byte of the next instruction if
88  // they are not).  Adding the count of bytes in the operands encoded in
89  // the instruction gives us the full length of the instruction in bytes.
90  *instruction_bytes += operand_bytes_ + (current_byte - start_byte);
91
92  // Return the instruction type, which was set by ProcessOpcode().
93  return instruction_type_;
94}
95
96void MiniDisassembler::Initialize() {
97  operand_is_32_bits_ = operand_default_is_32_bits_;
98  address_is_32_bits_ = address_default_is_32_bits_;
99  operand_bytes_ = 0;
100  have_modrm_ = false;
101  should_decode_modrm_ = false;
102  instruction_type_ = IT_UNKNOWN;
103  got_f2_prefix_ = false;
104  got_f3_prefix_ = false;
105  got_66_prefix_ = false;
106}
107
108InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
109                                                  unsigned int* size) {
110  InstructionType instruction_type = IT_GENERIC;
111  const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
112
113  switch (opcode.type_) {
114    case IT_PREFIX_ADDRESS:
115      address_is_32_bits_ = !address_default_is_32_bits_;
116      goto nochangeoperand;
117    case IT_PREFIX_OPERAND:
118      operand_is_32_bits_ = !operand_default_is_32_bits_;
119      nochangeoperand:
120    case IT_PREFIX:
121
122      if (0xF2 == (*start_byte))
123        got_f2_prefix_ = true;
124      else if (0xF3 == (*start_byte))
125        got_f3_prefix_ = true;
126      else if (0x66 == (*start_byte))
127        got_66_prefix_ = true;
128
129      instruction_type = opcode.type_;
130      (*size)++;
131      // we got a prefix, so add one and check next byte
132      ProcessPrefixes(start_byte + 1, size);
133    default:
134      break;   // not a prefix byte
135  }
136
137  return instruction_type;
138}
139
140InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
141                                                unsigned int table_index,
142                                                unsigned int* size) {
143  const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
144  unsigned char current_byte = (*start_byte) >> table.shift_;
145  current_byte = current_byte & table.mask_;  // Mask out the bits we will use
146
147  // Check whether the byte we have is inside the table we have.
148  if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
149    instruction_type_ = IT_UNKNOWN;
150    return instruction_type_;
151  }
152
153  const Opcode& opcode = table.table_[current_byte];
154  if (IT_UNUSED == opcode.type_) {
155    // This instruction is not used by the IA-32 ISA, so we indicate
156    // this to the user.  Probably means that we were pointed to
157    // a byte in memory that was not the start of an instruction.
158    instruction_type_ = IT_UNUSED;
159    return instruction_type_;
160  } else if (IT_REFERENCE == opcode.type_) {
161    // We are looking at an opcode that has more bytes (or is continued
162    // in the ModR/M byte).  Recursively find the opcode definition in
163    // the table for the opcode's next byte.
164    (*size)++;
165    ProcessOpcode(start_byte + 1, opcode.table_index_, size);
166    return instruction_type_;
167  }
168
169  const SpecificOpcode* specific_opcode = reinterpret_cast<
170                                              const SpecificOpcode*>(&opcode);
171  if (opcode.is_prefix_dependent_) {
172    if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
173      specific_opcode = &opcode.opcode_if_f2_prefix_;
174    } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
175      specific_opcode = &opcode.opcode_if_f3_prefix_;
176    } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
177      specific_opcode = &opcode.opcode_if_66_prefix_;
178    }
179  }
180
181  // Inv: The opcode type is known.
182  instruction_type_ = specific_opcode->type_;
183
184  // Let's process the operand types to see if we have any immediate
185  // operands, and/or a ModR/M byte.
186
187  ProcessOperand(specific_opcode->flag_dest_);
188  ProcessOperand(specific_opcode->flag_source_);
189  ProcessOperand(specific_opcode->flag_aux_);
190
191  // Inv: We have processed the opcode and incremented operand_bytes_
192  // by the number of bytes of any operands specified by the opcode
193  // that are stored in the instruction (not registers etc.).  Now
194  // we need to return the total number of bytes for the opcode and
195  // for the ModR/M or SIB bytes if they are present.
196
197  if (table.mask_ != 0xff) {
198    if (have_modrm_) {
199      // we're looking at a ModR/M byte so we're not going to
200      // count that into the opcode size
201      ProcessModrm(start_byte, size);
202      return IT_GENERIC;
203    } else {
204      // need to count the ModR/M byte even if it's just being
205      // used for opcode extension
206      (*size)++;
207      return IT_GENERIC;
208    }
209  } else {
210    if (have_modrm_) {
211      // The ModR/M byte is the next byte.
212      (*size)++;
213      ProcessModrm(start_byte + 1, size);
214      return IT_GENERIC;
215    } else {
216      (*size)++;
217      return IT_GENERIC;
218    }
219  }
220}
221
222bool MiniDisassembler::ProcessOperand(int flag_operand) {
223  bool succeeded = true;
224  if (AM_NOT_USED == flag_operand)
225    return succeeded;
226
227  // Decide what to do based on the addressing mode.
228  switch (flag_operand & AM_MASK) {
229    // No ModR/M byte indicated by these addressing modes, and no
230    // additional (e.g. immediate) parameters.
231    case AM_A:  // Direct address
232    case AM_F:  // EFLAGS register
233    case AM_X:  // Memory addressed by the DS:SI register pair
234    case AM_Y:  // Memory addressed by the ES:DI register pair
235    case AM_IMPLICIT:  // Parameter is implicit, occupies no space in
236                       // instruction
237      break;
238
239    // There is a ModR/M byte but it does not necessarily need
240    // to be decoded.
241    case AM_C:  // reg field of ModR/M selects a control register
242    case AM_D:  // reg field of ModR/M selects a debug register
243    case AM_G:  // reg field of ModR/M selects a general register
244    case AM_P:  // reg field of ModR/M selects an MMX register
245    case AM_R:  // mod field of ModR/M may refer only to a general register
246    case AM_S:  // reg field of ModR/M selects a segment register
247    case AM_T:  // reg field of ModR/M selects a test register
248    case AM_V:  // reg field of ModR/M selects a 128-bit XMM register
249      have_modrm_ = true;
250      break;
251
252    // In these addressing modes, there is a ModR/M byte and it needs to be
253    // decoded. No other (e.g. immediate) params than indicated in ModR/M.
254    case AM_E:  // Operand is either a general-purpose register or memory,
255                // specified by ModR/M byte
256    case AM_M:  // ModR/M byte will refer only to memory
257    case AM_Q:  // Operand is either an MMX register or memory (complex
258                // evaluation), specified by ModR/M byte
259    case AM_W:  // Operand is either a 128-bit XMM register or memory (complex
260                // eval), specified by ModR/M byte
261      have_modrm_ = true;
262      should_decode_modrm_ = true;
263      break;
264
265    // These addressing modes specify an immediate or an offset value
266    // directly, so we need to look at the operand type to see how many
267    // bytes.
268    case AM_I:  // Immediate data.
269    case AM_J:  // Jump to offset.
270    case AM_O:  // Operand is at offset.
271      switch (flag_operand & OT_MASK) {
272        case OT_B:  // Byte regardless of operand-size attribute.
273          operand_bytes_ += OS_BYTE;
274          break;
275        case OT_C:  // Byte or word, depending on operand-size attribute.
276          if (operand_is_32_bits_)
277            operand_bytes_ += OS_WORD;
278          else
279            operand_bytes_ += OS_BYTE;
280          break;
281        case OT_D:  // Doubleword, regardless of operand-size attribute.
282          operand_bytes_ += OS_DOUBLE_WORD;
283          break;
284        case OT_DQ:  // Double-quadword, regardless of operand-size attribute.
285          operand_bytes_ += OS_DOUBLE_QUAD_WORD;
286          break;
287        case OT_P:  // 32-bit or 48-bit pointer, depending on operand-size
288                    // attribute.
289          if (operand_is_32_bits_)
290            operand_bytes_ += OS_48_BIT_POINTER;
291          else
292            operand_bytes_ += OS_32_BIT_POINTER;
293          break;
294        case OT_PS:  // 128-bit packed single-precision floating-point data.
295          operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
296          break;
297        case OT_Q:  // Quadword, regardless of operand-size attribute.
298          operand_bytes_ += OS_QUAD_WORD;
299          break;
300        case OT_S:  // 6-byte pseudo-descriptor.
301          operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
302          break;
303        case OT_SD:  // Scalar Double-Precision Floating-Point Value
304        case OT_PD:  // Unaligned packed double-precision floating point value
305          operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
306          break;
307        case OT_SS:
308          // Scalar element of a 128-bit packed single-precision
309          // floating data.
310          // We simply return enItUnknown since we don't have to support
311          // floating point
312          succeeded = false;
313          break;
314        case OT_V:  // Word or doubleword, depending on operand-size attribute.
315          if (operand_is_32_bits_)
316            operand_bytes_ += OS_DOUBLE_WORD;
317          else
318            operand_bytes_ += OS_WORD;
319          break;
320        case OT_W:  // Word, regardless of operand-size attribute.
321          operand_bytes_ += OS_WORD;
322          break;
323
324        // Can safely ignore these.
325        case OT_A:  // Two one-word operands in memory or two double-word
326                    // operands in memory
327        case OT_PI:  // Quadword MMX technology register (e.g. mm0)
328        case OT_SI:  // Doubleword integer register (e.g., eax)
329          break;
330
331        default:
332          break;
333      }
334      break;
335
336    default:
337      break;
338  }
339
340  return succeeded;
341}
342
343bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
344                                    unsigned int* size) {
345  // If we don't need to decode, we just return the size of the ModR/M
346  // byte (there is never a SIB byte in this case).
347  if (!should_decode_modrm_) {
348    (*size)++;
349    return true;
350  }
351
352  // We never care about the reg field, only the combination of the mod
353  // and r/m fields, so let's start by packing those fields together into
354  // 5 bits.
355  unsigned char modrm = (*start_byte);
356  unsigned char mod = modrm & 0xC0;  // mask out top two bits to get mod field
357  modrm = modrm & 0x07;  // mask out bottom 3 bits to get r/m field
358  mod = mod >> 3;  // shift the mod field to the right place
359  modrm = mod | modrm;  // combine the r/m and mod fields as discussed
360  mod = mod >> 3;  // shift the mod field to bits 2..0
361
362  // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
363  // in bits 2..0, and mod contains the mod field in bits 2..0
364
365  const ModrmEntry* modrm_entry = 0;
366  if (address_is_32_bits_)
367    modrm_entry = &s_ia32_modrm_map_[modrm];
368  else
369    modrm_entry = &s_ia16_modrm_map_[modrm];
370
371  // Invariant: modrm_entry points to information that we need to decode
372  // the ModR/M byte.
373
374  // Add to the count of operand bytes, if the ModR/M byte indicates
375  // that some operands are encoded in the instruction.
376  if (modrm_entry->is_encoded_in_instruction_)
377    operand_bytes_ += modrm_entry->operand_size_;
378
379  // Process the SIB byte if necessary, and return the count
380  // of ModR/M and SIB bytes.
381  if (modrm_entry->use_sib_byte_) {
382    (*size)++;
383    return ProcessSib(start_byte + 1, mod, size);
384  } else {
385    (*size)++;
386    return true;
387  }
388}
389
390bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
391                                  unsigned char mod,
392                                  unsigned int* size) {
393  // get the mod field from the 2..0 bits of the SIB byte
394  unsigned char sib_base = (*start_byte) & 0x07;
395  if (0x05 == sib_base) {
396    switch (mod) {
397      case 0x00:  // mod == 00
398      case 0x02:  // mod == 10
399        operand_bytes_ += OS_DOUBLE_WORD;
400        break;
401      case 0x01:  // mod == 01
402        operand_bytes_ += OS_BYTE;
403        break;
404      case 0x03:  // mod == 11
405        // According to the IA-32 docs, there does not seem to be a disp
406        // value for this value of mod
407      default:
408        break;
409    }
410  }
411
412  (*size)++;
413  return true;
414}
415
416};  // namespace sidestep
417