1// Copyright 2008, Google Inc. 2// All rights reserved. 3// 4// Redistribution and use in source and binary forms, with or without 5// modification, are permitted provided that the following conditions are 6// met: 7// 8// * Redistributions of source code must retain the above copyright 9// notice, this list of conditions and the following disclaimer. 10// * Redistributions in binary form must reproduce the above 11// copyright notice, this list of conditions and the following disclaimer 12// in the documentation and/or other materials provided with the 13// distribution. 14// * Neither the name of Google Inc. nor the names of its 15// contributors may be used to endorse or promote products derived from 16// this software without specific prior written permission. 17// 18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30// Implementation of MiniDisassembler. 31 32#include "sidestep/mini_disassembler.h" 33 34namespace sidestep { 35 36MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, 37 bool address_default_is_32_bits) 38 : operand_default_is_32_bits_(operand_default_is_32_bits), 39 address_default_is_32_bits_(address_default_is_32_bits) { 40 Initialize(); 41} 42 43MiniDisassembler::MiniDisassembler() 44 : operand_default_is_32_bits_(true), 45 address_default_is_32_bits_(true) { 46 Initialize(); 47} 48 49InstructionType MiniDisassembler::Disassemble( 50 unsigned char* start_byte, 51 unsigned int* instruction_bytes) { 52 // Clean up any state from previous invocations. 53 Initialize(); 54 55 // Start by processing any prefixes. 56 unsigned char* current_byte = start_byte; 57 unsigned int size = 0; 58 InstructionType instruction_type = ProcessPrefixes(current_byte, &size); 59 60 if (IT_UNKNOWN == instruction_type) 61 return instruction_type; 62 63 current_byte += size; 64 size = 0; 65 66 // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ 67 // and address_is_32_bits_ flags are correctly set. 68 69 instruction_type = ProcessOpcode(current_byte, 0, &size); 70 71 // Check for error processing instruction 72 if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { 73 return IT_UNKNOWN; 74 } 75 76 current_byte += size; 77 78 // Invariant: operand_bytes_ indicates the total size of operands 79 // specified by the opcode and/or ModR/M byte and/or SIB byte. 80 // pCurrentByte points to the first byte after the ModR/M byte, or after 81 // the SIB byte if it is present (i.e. the first byte of any operands 82 // encoded in the instruction). 83 84 // We get the total length of any prefixes, the opcode, and the ModR/M and 85 // SIB bytes if present, by taking the difference of the original starting 86 // address and the current byte (which points to the first byte of the 87 // operands if present, or to the first byte of the next instruction if 88 // they are not). Adding the count of bytes in the operands encoded in 89 // the instruction gives us the full length of the instruction in bytes. 90 *instruction_bytes += operand_bytes_ + (current_byte - start_byte); 91 92 // Return the instruction type, which was set by ProcessOpcode(). 93 return instruction_type_; 94} 95 96void MiniDisassembler::Initialize() { 97 operand_is_32_bits_ = operand_default_is_32_bits_; 98 address_is_32_bits_ = address_default_is_32_bits_; 99 operand_bytes_ = 0; 100 have_modrm_ = false; 101 should_decode_modrm_ = false; 102 instruction_type_ = IT_UNKNOWN; 103 got_f2_prefix_ = false; 104 got_f3_prefix_ = false; 105 got_66_prefix_ = false; 106} 107 108InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, 109 unsigned int* size) { 110 InstructionType instruction_type = IT_GENERIC; 111 const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; 112 113 switch (opcode.type_) { 114 case IT_PREFIX_ADDRESS: 115 address_is_32_bits_ = !address_default_is_32_bits_; 116 goto nochangeoperand; 117 case IT_PREFIX_OPERAND: 118 operand_is_32_bits_ = !operand_default_is_32_bits_; 119 nochangeoperand: 120 case IT_PREFIX: 121 122 if (0xF2 == (*start_byte)) 123 got_f2_prefix_ = true; 124 else if (0xF3 == (*start_byte)) 125 got_f3_prefix_ = true; 126 else if (0x66 == (*start_byte)) 127 got_66_prefix_ = true; 128 129 instruction_type = opcode.type_; 130 (*size)++; 131 // we got a prefix, so add one and check next byte 132 ProcessPrefixes(start_byte + 1, size); 133 default: 134 break; // not a prefix byte 135 } 136 137 return instruction_type; 138} 139 140InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, 141 unsigned int table_index, 142 unsigned int* size) { 143 const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table 144 unsigned char current_byte = (*start_byte) >> table.shift_; 145 current_byte = current_byte & table.mask_; // Mask out the bits we will use 146 147 // Check whether the byte we have is inside the table we have. 148 if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { 149 instruction_type_ = IT_UNKNOWN; 150 return instruction_type_; 151 } 152 153 const Opcode& opcode = table.table_[current_byte]; 154 if (IT_UNUSED == opcode.type_) { 155 // This instruction is not used by the IA-32 ISA, so we indicate 156 // this to the user. Probably means that we were pointed to 157 // a byte in memory that was not the start of an instruction. 158 instruction_type_ = IT_UNUSED; 159 return instruction_type_; 160 } else if (IT_REFERENCE == opcode.type_) { 161 // We are looking at an opcode that has more bytes (or is continued 162 // in the ModR/M byte). Recursively find the opcode definition in 163 // the table for the opcode's next byte. 164 (*size)++; 165 ProcessOpcode(start_byte + 1, opcode.table_index_, size); 166 return instruction_type_; 167 } 168 169 const SpecificOpcode* specific_opcode = reinterpret_cast< 170 const SpecificOpcode*>(&opcode); 171 if (opcode.is_prefix_dependent_) { 172 if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { 173 specific_opcode = &opcode.opcode_if_f2_prefix_; 174 } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { 175 specific_opcode = &opcode.opcode_if_f3_prefix_; 176 } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { 177 specific_opcode = &opcode.opcode_if_66_prefix_; 178 } 179 } 180 181 // Inv: The opcode type is known. 182 instruction_type_ = specific_opcode->type_; 183 184 // Let's process the operand types to see if we have any immediate 185 // operands, and/or a ModR/M byte. 186 187 ProcessOperand(specific_opcode->flag_dest_); 188 ProcessOperand(specific_opcode->flag_source_); 189 ProcessOperand(specific_opcode->flag_aux_); 190 191 // Inv: We have processed the opcode and incremented operand_bytes_ 192 // by the number of bytes of any operands specified by the opcode 193 // that are stored in the instruction (not registers etc.). Now 194 // we need to return the total number of bytes for the opcode and 195 // for the ModR/M or SIB bytes if they are present. 196 197 if (table.mask_ != 0xff) { 198 if (have_modrm_) { 199 // we're looking at a ModR/M byte so we're not going to 200 // count that into the opcode size 201 ProcessModrm(start_byte, size); 202 return IT_GENERIC; 203 } else { 204 // need to count the ModR/M byte even if it's just being 205 // used for opcode extension 206 (*size)++; 207 return IT_GENERIC; 208 } 209 } else { 210 if (have_modrm_) { 211 // The ModR/M byte is the next byte. 212 (*size)++; 213 ProcessModrm(start_byte + 1, size); 214 return IT_GENERIC; 215 } else { 216 (*size)++; 217 return IT_GENERIC; 218 } 219 } 220} 221 222bool MiniDisassembler::ProcessOperand(int flag_operand) { 223 bool succeeded = true; 224 if (AM_NOT_USED == flag_operand) 225 return succeeded; 226 227 // Decide what to do based on the addressing mode. 228 switch (flag_operand & AM_MASK) { 229 // No ModR/M byte indicated by these addressing modes, and no 230 // additional (e.g. immediate) parameters. 231 case AM_A: // Direct address 232 case AM_F: // EFLAGS register 233 case AM_X: // Memory addressed by the DS:SI register pair 234 case AM_Y: // Memory addressed by the ES:DI register pair 235 case AM_IMPLICIT: // Parameter is implicit, occupies no space in 236 // instruction 237 break; 238 239 // There is a ModR/M byte but it does not necessarily need 240 // to be decoded. 241 case AM_C: // reg field of ModR/M selects a control register 242 case AM_D: // reg field of ModR/M selects a debug register 243 case AM_G: // reg field of ModR/M selects a general register 244 case AM_P: // reg field of ModR/M selects an MMX register 245 case AM_R: // mod field of ModR/M may refer only to a general register 246 case AM_S: // reg field of ModR/M selects a segment register 247 case AM_T: // reg field of ModR/M selects a test register 248 case AM_V: // reg field of ModR/M selects a 128-bit XMM register 249 have_modrm_ = true; 250 break; 251 252 // In these addressing modes, there is a ModR/M byte and it needs to be 253 // decoded. No other (e.g. immediate) params than indicated in ModR/M. 254 case AM_E: // Operand is either a general-purpose register or memory, 255 // specified by ModR/M byte 256 case AM_M: // ModR/M byte will refer only to memory 257 case AM_Q: // Operand is either an MMX register or memory (complex 258 // evaluation), specified by ModR/M byte 259 case AM_W: // Operand is either a 128-bit XMM register or memory (complex 260 // eval), specified by ModR/M byte 261 have_modrm_ = true; 262 should_decode_modrm_ = true; 263 break; 264 265 // These addressing modes specify an immediate or an offset value 266 // directly, so we need to look at the operand type to see how many 267 // bytes. 268 case AM_I: // Immediate data. 269 case AM_J: // Jump to offset. 270 case AM_O: // Operand is at offset. 271 switch (flag_operand & OT_MASK) { 272 case OT_B: // Byte regardless of operand-size attribute. 273 operand_bytes_ += OS_BYTE; 274 break; 275 case OT_C: // Byte or word, depending on operand-size attribute. 276 if (operand_is_32_bits_) 277 operand_bytes_ += OS_WORD; 278 else 279 operand_bytes_ += OS_BYTE; 280 break; 281 case OT_D: // Doubleword, regardless of operand-size attribute. 282 operand_bytes_ += OS_DOUBLE_WORD; 283 break; 284 case OT_DQ: // Double-quadword, regardless of operand-size attribute. 285 operand_bytes_ += OS_DOUBLE_QUAD_WORD; 286 break; 287 case OT_P: // 32-bit or 48-bit pointer, depending on operand-size 288 // attribute. 289 if (operand_is_32_bits_) 290 operand_bytes_ += OS_48_BIT_POINTER; 291 else 292 operand_bytes_ += OS_32_BIT_POINTER; 293 break; 294 case OT_PS: // 128-bit packed single-precision floating-point data. 295 operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; 296 break; 297 case OT_Q: // Quadword, regardless of operand-size attribute. 298 operand_bytes_ += OS_QUAD_WORD; 299 break; 300 case OT_S: // 6-byte pseudo-descriptor. 301 operand_bytes_ += OS_PSEUDO_DESCRIPTOR; 302 break; 303 case OT_SD: // Scalar Double-Precision Floating-Point Value 304 case OT_PD: // Unaligned packed double-precision floating point value 305 operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; 306 break; 307 case OT_SS: 308 // Scalar element of a 128-bit packed single-precision 309 // floating data. 310 // We simply return enItUnknown since we don't have to support 311 // floating point 312 succeeded = false; 313 break; 314 case OT_V: // Word or doubleword, depending on operand-size attribute. 315 if (operand_is_32_bits_) 316 operand_bytes_ += OS_DOUBLE_WORD; 317 else 318 operand_bytes_ += OS_WORD; 319 break; 320 case OT_W: // Word, regardless of operand-size attribute. 321 operand_bytes_ += OS_WORD; 322 break; 323 324 // Can safely ignore these. 325 case OT_A: // Two one-word operands in memory or two double-word 326 // operands in memory 327 case OT_PI: // Quadword MMX technology register (e.g. mm0) 328 case OT_SI: // Doubleword integer register (e.g., eax) 329 break; 330 331 default: 332 break; 333 } 334 break; 335 336 default: 337 break; 338 } 339 340 return succeeded; 341} 342 343bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, 344 unsigned int* size) { 345 // If we don't need to decode, we just return the size of the ModR/M 346 // byte (there is never a SIB byte in this case). 347 if (!should_decode_modrm_) { 348 (*size)++; 349 return true; 350 } 351 352 // We never care about the reg field, only the combination of the mod 353 // and r/m fields, so let's start by packing those fields together into 354 // 5 bits. 355 unsigned char modrm = (*start_byte); 356 unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field 357 modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field 358 mod = mod >> 3; // shift the mod field to the right place 359 modrm = mod | modrm; // combine the r/m and mod fields as discussed 360 mod = mod >> 3; // shift the mod field to bits 2..0 361 362 // Invariant: modrm contains the mod field in bits 4..3 and the r/m field 363 // in bits 2..0, and mod contains the mod field in bits 2..0 364 365 const ModrmEntry* modrm_entry = 0; 366 if (address_is_32_bits_) 367 modrm_entry = &s_ia32_modrm_map_[modrm]; 368 else 369 modrm_entry = &s_ia16_modrm_map_[modrm]; 370 371 // Invariant: modrm_entry points to information that we need to decode 372 // the ModR/M byte. 373 374 // Add to the count of operand bytes, if the ModR/M byte indicates 375 // that some operands are encoded in the instruction. 376 if (modrm_entry->is_encoded_in_instruction_) 377 operand_bytes_ += modrm_entry->operand_size_; 378 379 // Process the SIB byte if necessary, and return the count 380 // of ModR/M and SIB bytes. 381 if (modrm_entry->use_sib_byte_) { 382 (*size)++; 383 return ProcessSib(start_byte + 1, mod, size); 384 } else { 385 (*size)++; 386 return true; 387 } 388} 389 390bool MiniDisassembler::ProcessSib(unsigned char* start_byte, 391 unsigned char mod, 392 unsigned int* size) { 393 // get the mod field from the 2..0 bits of the SIB byte 394 unsigned char sib_base = (*start_byte) & 0x07; 395 if (0x05 == sib_base) { 396 switch (mod) { 397 case 0x00: // mod == 00 398 case 0x02: // mod == 10 399 operand_bytes_ += OS_DOUBLE_WORD; 400 break; 401 case 0x01: // mod == 01 402 operand_bytes_ += OS_BYTE; 403 break; 404 case 0x03: // mod == 11 405 // According to the IA-32 docs, there does not seem to be a disp 406 // value for this value of mod 407 default: 408 break; 409 } 410 } 411 412 (*size)++; 413 return true; 414} 415 416}; // namespace sidestep 417