1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Implementation of MiniDisassembler.
6
7#ifdef _WIN64
8#error The code in this file should not be used on 64-bit Windows.
9#endif
10
11#include "sandbox/win/src/sidestep/mini_disassembler.h"
12
13namespace sidestep {
14
15MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
16                                   bool address_default_is_32_bits)
17    : operand_default_is_32_bits_(operand_default_is_32_bits),
18      address_default_is_32_bits_(address_default_is_32_bits) {
19  Initialize();
20}
21
22MiniDisassembler::MiniDisassembler()
23    : operand_default_is_32_bits_(true),
24      address_default_is_32_bits_(true) {
25  Initialize();
26}
27
28InstructionType MiniDisassembler::Disassemble(
29    unsigned char* start_byte,
30    unsigned int* instruction_bytes) {
31  // Clean up any state from previous invocations.
32  Initialize();
33
34  // Start by processing any prefixes.
35  unsigned char* current_byte = start_byte;
36  unsigned int size = 0;
37  InstructionType instruction_type = ProcessPrefixes(current_byte, &size);
38
39  if (IT_UNKNOWN == instruction_type)
40    return instruction_type;
41
42  current_byte += size;
43  size = 0;
44
45  // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
46  // and address_is_32_bits_ flags are correctly set.
47
48  instruction_type = ProcessOpcode(current_byte, 0, &size);
49
50  // Check for error processing instruction
51  if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
52    return IT_UNKNOWN;
53  }
54
55  current_byte += size;
56
57  // Invariant: operand_bytes_ indicates the total size of operands
58  // specified by the opcode and/or ModR/M byte and/or SIB byte.
59  // pCurrentByte points to the first byte after the ModR/M byte, or after
60  // the SIB byte if it is present (i.e. the first byte of any operands
61  // encoded in the instruction).
62
63  // We get the total length of any prefixes, the opcode, and the ModR/M and
64  // SIB bytes if present, by taking the difference of the original starting
65  // address and the current byte (which points to the first byte of the
66  // operands if present, or to the first byte of the next instruction if
67  // they are not).  Adding the count of bytes in the operands encoded in
68  // the instruction gives us the full length of the instruction in bytes.
69  *instruction_bytes += operand_bytes_ + (current_byte - start_byte);
70
71  // Return the instruction type, which was set by ProcessOpcode().
72  return instruction_type_;
73}
74
75void MiniDisassembler::Initialize() {
76  operand_is_32_bits_ = operand_default_is_32_bits_;
77  address_is_32_bits_ = address_default_is_32_bits_;
78  operand_bytes_ = 0;
79  have_modrm_ = false;
80  should_decode_modrm_ = false;
81  instruction_type_ = IT_UNKNOWN;
82  got_f2_prefix_ = false;
83  got_f3_prefix_ = false;
84  got_66_prefix_ = false;
85}
86
87InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
88                                                  unsigned int* size) {
89  InstructionType instruction_type = IT_GENERIC;
90  const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
91
92  switch (opcode.type_) {
93    case IT_PREFIX_ADDRESS:
94      address_is_32_bits_ = !address_default_is_32_bits_;
95      goto nochangeoperand;
96    case IT_PREFIX_OPERAND:
97      operand_is_32_bits_ = !operand_default_is_32_bits_;
98      nochangeoperand:
99    case IT_PREFIX:
100
101      if (0xF2 == (*start_byte))
102        got_f2_prefix_ = true;
103      else if (0xF3 == (*start_byte))
104        got_f3_prefix_ = true;
105      else if (0x66 == (*start_byte))
106        got_66_prefix_ = true;
107
108      instruction_type = opcode.type_;
109      (*size)++;
110      // we got a prefix, so add one and check next byte
111      ProcessPrefixes(start_byte + 1, size);
112    default:
113      break;   // not a prefix byte
114  }
115
116  return instruction_type;
117}
118
119InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
120                                                unsigned int table_index,
121                                                unsigned int* size) {
122  const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
123  unsigned char current_byte = (*start_byte) >> table.shift_;
124  current_byte = current_byte & table.mask_;  // Mask out the bits we will use
125
126  // Check whether the byte we have is inside the table we have.
127  if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
128    instruction_type_ = IT_UNKNOWN;
129    return instruction_type_;
130  }
131
132  const Opcode& opcode = table.table_[current_byte];
133  if (IT_UNUSED == opcode.type_) {
134    // This instruction is not used by the IA-32 ISA, so we indicate
135    // this to the user.  Probably means that we were pointed to
136    // a byte in memory that was not the start of an instruction.
137    instruction_type_ = IT_UNUSED;
138    return instruction_type_;
139  } else if (IT_REFERENCE == opcode.type_) {
140    // We are looking at an opcode that has more bytes (or is continued
141    // in the ModR/M byte).  Recursively find the opcode definition in
142    // the table for the opcode's next byte.
143    (*size)++;
144    ProcessOpcode(start_byte + 1, opcode.table_index_, size);
145    return instruction_type_;
146  }
147
148  const SpecificOpcode* specific_opcode = reinterpret_cast<
149                                              const SpecificOpcode*>(&opcode);
150  if (opcode.is_prefix_dependent_) {
151    if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
152      specific_opcode = &opcode.opcode_if_f2_prefix_;
153    } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
154      specific_opcode = &opcode.opcode_if_f3_prefix_;
155    } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
156      specific_opcode = &opcode.opcode_if_66_prefix_;
157    }
158  }
159
160  // Inv: The opcode type is known.
161  instruction_type_ = specific_opcode->type_;
162
163  // Let's process the operand types to see if we have any immediate
164  // operands, and/or a ModR/M byte.
165
166  ProcessOperand(specific_opcode->flag_dest_);
167  ProcessOperand(specific_opcode->flag_source_);
168  ProcessOperand(specific_opcode->flag_aux_);
169
170  // Inv: We have processed the opcode and incremented operand_bytes_
171  // by the number of bytes of any operands specified by the opcode
172  // that are stored in the instruction (not registers etc.).  Now
173  // we need to return the total number of bytes for the opcode and
174  // for the ModR/M or SIB bytes if they are present.
175
176  if (table.mask_ != 0xff) {
177    if (have_modrm_) {
178      // we're looking at a ModR/M byte so we're not going to
179      // count that into the opcode size
180      ProcessModrm(start_byte, size);
181      return IT_GENERIC;
182    } else {
183      // need to count the ModR/M byte even if it's just being
184      // used for opcode extension
185      (*size)++;
186      return IT_GENERIC;
187    }
188  } else {
189    if (have_modrm_) {
190      // The ModR/M byte is the next byte.
191      (*size)++;
192      ProcessModrm(start_byte + 1, size);
193      return IT_GENERIC;
194    } else {
195      (*size)++;
196      return IT_GENERIC;
197    }
198  }
199}
200
201bool MiniDisassembler::ProcessOperand(int flag_operand) {
202  bool succeeded = true;
203  if (AM_NOT_USED == flag_operand)
204    return succeeded;
205
206  // Decide what to do based on the addressing mode.
207  switch (flag_operand & AM_MASK) {
208    // No ModR/M byte indicated by these addressing modes, and no
209    // additional (e.g. immediate) parameters.
210    case AM_A:  // Direct address
211    case AM_F:  // EFLAGS register
212    case AM_X:  // Memory addressed by the DS:SI register pair
213    case AM_Y:  // Memory addressed by the ES:DI register pair
214    case AM_IMPLICIT:  // Parameter is implicit, occupies no space in
215                       // instruction
216      break;
217
218    // There is a ModR/M byte but it does not necessarily need
219    // to be decoded.
220    case AM_C:  // reg field of ModR/M selects a control register
221    case AM_D:  // reg field of ModR/M selects a debug register
222    case AM_G:  // reg field of ModR/M selects a general register
223    case AM_P:  // reg field of ModR/M selects an MMX register
224    case AM_R:  // mod field of ModR/M may refer only to a general register
225    case AM_S:  // reg field of ModR/M selects a segment register
226    case AM_T:  // reg field of ModR/M selects a test register
227    case AM_V:  // reg field of ModR/M selects a 128-bit XMM register
228      have_modrm_ = true;
229      break;
230
231    // In these addressing modes, there is a ModR/M byte and it needs to be
232    // decoded. No other (e.g. immediate) params than indicated in ModR/M.
233    case AM_E:  // Operand is either a general-purpose register or memory,
234                // specified by ModR/M byte
235    case AM_M:  // ModR/M byte will refer only to memory
236    case AM_Q:  // Operand is either an MMX register or memory (complex
237                // evaluation), specified by ModR/M byte
238    case AM_W:  // Operand is either a 128-bit XMM register or memory (complex
239                // eval), specified by ModR/M byte
240      have_modrm_ = true;
241      should_decode_modrm_ = true;
242      break;
243
244    // These addressing modes specify an immediate or an offset value
245    // directly, so we need to look at the operand type to see how many
246    // bytes.
247    case AM_I:  // Immediate data.
248    case AM_J:  // Jump to offset.
249    case AM_O:  // Operand is at offset.
250      switch (flag_operand & OT_MASK) {
251        case OT_B:  // Byte regardless of operand-size attribute.
252          operand_bytes_ += OS_BYTE;
253          break;
254        case OT_C:  // Byte or word, depending on operand-size attribute.
255          if (operand_is_32_bits_)
256            operand_bytes_ += OS_WORD;
257          else
258            operand_bytes_ += OS_BYTE;
259          break;
260        case OT_D:  // Doubleword, regardless of operand-size attribute.
261          operand_bytes_ += OS_DOUBLE_WORD;
262          break;
263        case OT_DQ:  // Double-quadword, regardless of operand-size attribute.
264          operand_bytes_ += OS_DOUBLE_QUAD_WORD;
265          break;
266        case OT_P:  // 32-bit or 48-bit pointer, depending on operand-size
267                    // attribute.
268          if (operand_is_32_bits_)
269            operand_bytes_ += OS_48_BIT_POINTER;
270          else
271            operand_bytes_ += OS_32_BIT_POINTER;
272          break;
273        case OT_PS:  // 128-bit packed single-precision floating-point data.
274          operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
275          break;
276        case OT_Q:  // Quadword, regardless of operand-size attribute.
277          operand_bytes_ += OS_QUAD_WORD;
278          break;
279        case OT_S:  // 6-byte pseudo-descriptor.
280          operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
281          break;
282        case OT_SD:  // Scalar Double-Precision Floating-Point Value
283        case OT_PD:  // Unaligned packed double-precision floating point value
284          operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
285          break;
286        case OT_SS:
287          // Scalar element of a 128-bit packed single-precision
288          // floating data.
289          // We simply return enItUnknown since we don't have to support
290          // floating point
291          succeeded = false;
292          break;
293        case OT_V:  // Word or doubleword, depending on operand-size attribute.
294          if (operand_is_32_bits_)
295            operand_bytes_ += OS_DOUBLE_WORD;
296          else
297            operand_bytes_ += OS_WORD;
298          break;
299        case OT_W:  // Word, regardless of operand-size attribute.
300          operand_bytes_ += OS_WORD;
301          break;
302
303        // Can safely ignore these.
304        case OT_A:  // Two one-word operands in memory or two double-word
305                    // operands in memory
306        case OT_PI:  // Quadword MMX technology register (e.g. mm0)
307        case OT_SI:  // Doubleword integer register (e.g., eax)
308          break;
309
310        default:
311          break;
312      }
313      break;
314
315    default:
316      break;
317  }
318
319  return succeeded;
320}
321
322bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
323                                    unsigned int* size) {
324  // If we don't need to decode, we just return the size of the ModR/M
325  // byte (there is never a SIB byte in this case).
326  if (!should_decode_modrm_) {
327    (*size)++;
328    return true;
329  }
330
331  // We never care about the reg field, only the combination of the mod
332  // and r/m fields, so let's start by packing those fields together into
333  // 5 bits.
334  unsigned char modrm = (*start_byte);
335  unsigned char mod = modrm & 0xC0;  // mask out top two bits to get mod field
336  modrm = modrm & 0x07;  // mask out bottom 3 bits to get r/m field
337  mod = mod >> 3;  // shift the mod field to the right place
338  modrm = mod | modrm;  // combine the r/m and mod fields as discussed
339  mod = mod >> 3;  // shift the mod field to bits 2..0
340
341  // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
342  // in bits 2..0, and mod contains the mod field in bits 2..0
343
344  const ModrmEntry* modrm_entry = 0;
345  if (address_is_32_bits_)
346    modrm_entry = &s_ia32_modrm_map_[modrm];
347  else
348    modrm_entry = &s_ia16_modrm_map_[modrm];
349
350  // Invariant: modrm_entry points to information that we need to decode
351  // the ModR/M byte.
352
353  // Add to the count of operand bytes, if the ModR/M byte indicates
354  // that some operands are encoded in the instruction.
355  if (modrm_entry->is_encoded_in_instruction_)
356    operand_bytes_ += modrm_entry->operand_size_;
357
358  // Process the SIB byte if necessary, and return the count
359  // of ModR/M and SIB bytes.
360  if (modrm_entry->use_sib_byte_) {
361    (*size)++;
362    return ProcessSib(start_byte + 1, mod, size);
363  } else {
364    (*size)++;
365    return true;
366  }
367}
368
369bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
370                                  unsigned char mod,
371                                  unsigned int* size) {
372  // get the mod field from the 2..0 bits of the SIB byte
373  unsigned char sib_base = (*start_byte) & 0x07;
374  if (0x05 == sib_base) {
375    switch (mod) {
376      case 0x00:  // mod == 00
377      case 0x02:  // mod == 10
378        operand_bytes_ += OS_DOUBLE_WORD;
379        break;
380      case 0x01:  // mod == 01
381        operand_bytes_ += OS_BYTE;
382        break;
383      case 0x03:  // mod == 11
384        // According to the IA-32 docs, there does not seem to be a disp
385        // value for this value of mod
386      default:
387        break;
388    }
389  }
390
391  (*size)++;
392  return true;
393}
394
395};  // namespace sidestep
396