disassembler_x86.cc revision 706a10ea53a32455c6b3ffc5e5e0e1f6f191ec2a
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "disassembler_x86.h" 18 19#include "stringprintf.h" 20 21#include <stdint.h> 22#include <iostream> 23 24namespace art { 25namespace x86 { 26 27DisassemblerX86::DisassemblerX86() { 28} 29 30void DisassemblerX86::Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) { 31 size_t length = 0; 32 for (const uint8_t* cur = begin; cur < end; cur += length) { 33 length = DumpInstruction(os, cur); 34 } 35} 36 37static const char* gReg8Names[] = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" }; 38static const char* gReg16Names[] = { "ax", "cx", "dx", "bx", "sp", "bp", "di", "si" }; 39static const char* gReg32Names[] = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "edi", "esi" }; 40 41static void DumpReg0(std::ostream& os, uint8_t /*rex*/, size_t reg, 42 bool byte_operand, uint8_t size_override) { 43 DCHECK_LT(reg, 8u); 44 // TODO: combine rex into size 45 size_t size = byte_operand ? 1 : (size_override == 0x66 ? 2 : 4); 46 switch (size) { 47 case 1: os << gReg8Names[reg]; break; 48 case 2: os << gReg16Names[reg]; break; 49 case 4: os << gReg32Names[reg]; break; 50 default: LOG(FATAL) << "unexpected size " << size; 51 } 52} 53 54static void DumpReg(std::ostream& os, uint8_t rex, uint8_t reg, 55 bool byte_operand, uint8_t size_override) { 56 size_t reg_num = reg; // TODO: combine with REX.R on 64bit 57 DumpReg0(os, rex, reg_num, byte_operand, size_override); 58} 59 60static void DumpBaseReg(std::ostream& os, uint8_t rex, uint8_t reg, 61 bool byte_operand, uint8_t size_override) { 62 size_t reg_num = reg; // TODO: combine with REX.B on 64bit 63 DumpReg0(os, rex, reg_num, byte_operand, size_override); 64} 65 66static void DumpIndexReg(std::ostream& os, uint8_t rex, uint8_t reg, 67 bool byte_operand, uint8_t size_override) { 68 int reg_num = reg; // TODO: combine with REX.X on 64bit 69 DumpReg0(os, rex, reg_num, byte_operand, size_override); 70} 71 72static void DumpSegmentOverride(std::ostream& os, uint8_t segment_prefix) { 73 switch (segment_prefix) { 74 case 0x2E: os << "cs:"; break; 75 case 0x36: os << "ss:"; break; 76 case 0x3E: os << "ds:"; break; 77 case 0x26: os << "es:"; break; 78 case 0x64: os << "fs:"; break; 79 case 0x65: os << "gs:"; break; 80 default: break; 81 } 82} 83 84size_t DisassemblerX86::DumpInstruction(std::ostream& os, const uint8_t* instr) { 85 const uint8_t* begin_instr = instr; 86 bool have_prefixes = true; 87 uint8_t prefix[4] = {0, 0, 0, 0}; 88 const char** modrm_opcodes = NULL; 89 do { 90 switch (*instr) { 91 // Group 1 - lock and repeat prefixes: 92 case 0xF0: 93 case 0xF2: 94 case 0xF3: 95 prefix[0] = *instr; 96 break; 97 // Group 2 - segment override prefixes: 98 case 0x2E: 99 case 0x36: 100 case 0x3E: 101 case 0x26: 102 case 0x64: 103 case 0x65: 104 prefix[1] = *instr; 105 break; 106 // Group 3 - operand size override: 107 case 0x66: 108 prefix[2] = *instr; 109 break; 110 // Group 4 - address size override: 111 case 0x67: 112 prefix[3] = *instr; 113 break; 114 default: 115 have_prefixes = false; 116 break; 117 } 118 if (have_prefixes) { 119 instr++; 120 } 121 } while (have_prefixes); 122 uint8_t rex = (*instr >= 0x40 && *instr <= 0x4F) ? *instr : 0; 123 bool has_modrm = false; 124 bool reg_is_opcode = false; 125 size_t immediate_bytes = 0; 126 size_t branch_bytes = 0; 127 std::ostringstream opcode; 128 bool store = false; // stores to memory (ie rm is on the left) 129 bool load = false; // loads from memory (ie rm is on the right) 130 bool byte_operand = false; 131 bool ax = false; // implicit use of ax 132 bool reg_in_opcode = false; // low 3-bits of opcode encode register parameter 133 switch (*instr) { 134#define DISASSEMBLER_ENTRY(opname, \ 135 rm8_r8, rm32_r32, \ 136 r8_rm8, r32_rm32, \ 137 ax8_i8, ax32_i32) \ 138 case rm8_r8: opcode << #opname; store = true; has_modrm = true; byte_operand = true; break; \ 139 case rm32_r32: opcode << #opname; store = true; has_modrm = true; break; \ 140 case r8_rm8: opcode << #opname; load = true; has_modrm = true; byte_operand = true; break; \ 141 case r32_rm32: opcode << #opname; load = true; has_modrm = true; break; \ 142 case ax8_i8: opcode << #opname; ax = true; immediate_bytes = 1; byte_operand = true; break; \ 143 case ax32_i32: opcode << #opname; ax = true; immediate_bytes = 4; break; 144 145DISASSEMBLER_ENTRY(add, 146 0x00 /* RegMem8/Reg8 */, 0x01 /* RegMem32/Reg32 */, 147 0x02 /* Reg8/RegMem8 */, 0x03 /* Reg32/RegMem32 */, 148 0x04 /* Rax8/imm8 opcode */, 0x05 /* Rax32/imm32 */) 149DISASSEMBLER_ENTRY(or, 150 0x08 /* RegMem8/Reg8 */, 0x09 /* RegMem32/Reg32 */, 151 0x0A /* Reg8/RegMem8 */, 0x0B /* Reg32/RegMem32 */, 152 0x0C /* Rax8/imm8 opcode */, 0x0D /* Rax32/imm32 */) 153DISASSEMBLER_ENTRY(adc, 154 0x10 /* RegMem8/Reg8 */, 0x11 /* RegMem32/Reg32 */, 155 0x12 /* Reg8/RegMem8 */, 0x13 /* Reg32/RegMem32 */, 156 0x14 /* Rax8/imm8 opcode */, 0x15 /* Rax32/imm32 */) 157DISASSEMBLER_ENTRY(sbb, 158 0x18 /* RegMem8/Reg8 */, 0x19 /* RegMem32/Reg32 */, 159 0x1A /* Reg8/RegMem8 */, 0x1B /* Reg32/RegMem32 */, 160 0x1C /* Rax8/imm8 opcode */, 0x1D /* Rax32/imm32 */) 161DISASSEMBLER_ENTRY(and, 162 0x20 /* RegMem8/Reg8 */, 0x21 /* RegMem32/Reg32 */, 163 0x22 /* Reg8/RegMem8 */, 0x23 /* Reg32/RegMem32 */, 164 0x24 /* Rax8/imm8 opcode */, 0x25 /* Rax32/imm32 */) 165DISASSEMBLER_ENTRY(sub, 166 0x28 /* RegMem8/Reg8 */, 0x29 /* RegMem32/Reg32 */, 167 0x2A /* Reg8/RegMem8 */, 0x2B /* Reg32/RegMem32 */, 168 0x2C /* Rax8/imm8 opcode */, 0x2D /* Rax32/imm32 */) 169DISASSEMBLER_ENTRY(xor, 170 0x30 /* RegMem8/Reg8 */, 0x31 /* RegMem32/Reg32 */, 171 0x32 /* Reg8/RegMem8 */, 0x33 /* Reg32/RegMem32 */, 172 0x34 /* Rax8/imm8 opcode */, 0x35 /* Rax32/imm32 */) 173DISASSEMBLER_ENTRY(cmp, 174 0x38 /* RegMem8/Reg8 */, 0x39 /* RegMem32/Reg32 */, 175 0x3A /* Reg8/RegMem8 */, 0x3B /* Reg32/RegMem32 */, 176 0x3C /* Rax8/imm8 opcode */, 0x3D /* Rax32/imm32 */) 177 178#undef DISASSEMBLER_ENTRY 179 case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: 180 opcode << "push"; 181 reg_in_opcode = true; 182 break; 183 case 0x58: case 0x59: case 0x5A: case 0x5B: case 0x5C: case 0x5D: case 0x5E: case 0x5F: 184 opcode << "pop"; 185 reg_in_opcode = true; 186 break; 187 case 0x68: opcode << "push"; immediate_bytes = 4; break; 188 case 0x6A: opcode << "push"; immediate_bytes = 1; break; 189 case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: 190 case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E: case 0x7F: 191 static const char* condition_codes[] = 192 {"o", "no", "b/nae/c", "nb/ae/nc", "z/eq", "nz/ne", "be/na", "nbe/a", 193 "s", "ns", "p/pe", "np/po", "l/nge","nl/ge", "le/ng", "nle/g" 194 }; 195 opcode << "j" << condition_codes[*instr & 0xF]; 196 branch_bytes = 1; 197 break; 198 case 0x88: opcode << "mov"; store = true; has_modrm = true; byte_operand = true; break; 199 case 0x89: opcode << "mov"; store = true; has_modrm = true; break; 200 case 0x8A: opcode << "mov"; load = true; has_modrm = true; byte_operand = true; break; 201 case 0x8B: opcode << "mov"; load = true; has_modrm = true; break; 202 203 case 0x0F: // 2 byte extended opcode 204 instr++; 205 switch (*instr) { 206 case 0x38: // 3 byte extended opcode 207 opcode << StringPrintf("unknown opcode '0F 38 %02X'", *instr); 208 break; 209 case 0x3A: // 3 byte extended opcode 210 opcode << StringPrintf("unknown opcode '0F 3A %02X'", *instr); 211 break; 212 case 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: case 0x87: 213 case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E: case 0x8F: 214 opcode << "j" << condition_codes[*instr & 0xF]; 215 branch_bytes = 4; 216 break; 217 default: 218 opcode << StringPrintf("unknown opcode '0F %02X'", *instr); 219 break; 220 } 221 break; 222 case 0x80: case 0x81: case 0x82: case 0x83: 223 static const char* x80_opcodes[] = {"add", "or", "adc", "sbb", "and", "sub", "xor", "cmp"}; 224 modrm_opcodes = x80_opcodes; 225 has_modrm = true; 226 reg_is_opcode = true; 227 store = true; 228 byte_operand = (*instr & 1) == 0; 229 immediate_bytes = *instr == 0x81 ? 4 : 1; 230 break; 231 case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: 232 opcode << "mov"; 233 immediate_bytes = 1; 234 reg_in_opcode = true; 235 break; 236 case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF: 237 opcode << "mov"; 238 immediate_bytes = 4; 239 reg_in_opcode = true; 240 break; 241 case 0xC3: opcode << "ret"; break; 242 case 0xE9: opcode << "jmp"; branch_bytes = 4; break; 243 case 0xEB: opcode << "jmp"; branch_bytes = 1; break; 244 case 0xFF: 245 static const char* ff_opcodes[] = {"inc", "dec", "call", "call", "jmp", "jmp", "push", "unknown-ff"}; 246 modrm_opcodes = ff_opcodes; 247 has_modrm = true; 248 reg_is_opcode = true; 249 load = true; 250 break; 251 default: 252 opcode << StringPrintf("unknown opcode '%02X'", *instr); 253 break; 254 } 255 std::ostringstream args; 256 if (reg_in_opcode) { 257 DCHECK(!has_modrm); 258 DumpReg(args, rex, *instr & 0x7, false, prefix[2]); 259 } 260 instr++; 261 if (has_modrm) { 262 uint8_t modrm = *instr; 263 instr++; 264 uint8_t mod = modrm >> 6; 265 uint8_t reg_or_opcode = (modrm >> 3) & 7; 266 uint8_t rm = modrm & 7; 267 std::ostringstream address; 268 if (mod == 0 && rm == 5) { // fixed address 269 address << StringPrintf("[0x%X]", *reinterpret_cast<const uint32_t*>(instr)); 270 instr += 4; 271 } else if (rm == 4 && mod != 3) { // SIB 272 uint8_t sib = *instr; 273 instr++; 274 uint8_t ss = (sib >> 6) & 3; 275 uint8_t index = (sib >> 3) & 7; 276 uint8_t base = sib & 7; 277 address << "["; 278 if (base != 5 || mod != 0) { 279 DumpBaseReg(address, rex, base, byte_operand, prefix[2]); 280 if (index != 4) { 281 address << " + "; 282 } 283 } 284 if (index != 4) { 285 DumpIndexReg(address, rex, index, byte_operand, prefix[2]); 286 if (ss != 0) { 287 address << StringPrintf(" * %d", 1 << ss); 288 } 289 } 290 if (mod == 1) { 291 address << StringPrintf(" + %d", *reinterpret_cast<const int8_t*>(instr)); 292 instr++; 293 } else if (mod == 2) { 294 address << StringPrintf(" + %d", *reinterpret_cast<const int32_t*>(instr)); 295 instr += 4; 296 } 297 address << "]"; 298 } else { 299 if (mod != 3) { 300 address << "["; 301 } 302 DumpBaseReg(address, rex, rm, byte_operand, prefix[2]); 303 if (mod == 1) { 304 address << StringPrintf(" + %d", *reinterpret_cast<const int8_t*>(instr)); 305 instr++; 306 } else if (mod == 2) { 307 address << StringPrintf(" + %d", *reinterpret_cast<const int32_t*>(instr)); 308 instr += 4; 309 } 310 if (mod != 3) { 311 address << "]"; 312 } 313 } 314 315 if (reg_is_opcode) { 316 opcode << modrm_opcodes[reg_or_opcode]; 317 } 318 if (load) { 319 if (!reg_is_opcode) { 320 DumpReg(args, rex, reg_or_opcode, byte_operand, prefix[2]); 321 args << ", "; 322 } 323 DumpSegmentOverride(args, prefix[1]); 324 args << address.str(); 325 } else { 326 DCHECK(store); 327 DumpSegmentOverride(args, prefix[1]); 328 args << address.str(); 329 if (!reg_is_opcode) { 330 args << ", "; 331 DumpReg(args, rex, reg_or_opcode, byte_operand, prefix[2]); 332 } 333 } 334 } 335 if (ax) { 336 DumpReg(args, rex, 0 /* EAX */, byte_operand, prefix[2]); 337 } 338 if (immediate_bytes > 0) { 339 if (has_modrm || reg_in_opcode || ax) { 340 args << ", "; 341 } 342 if (immediate_bytes == 1) { 343 args << StringPrintf("%d", *reinterpret_cast<const int8_t*>(instr)); 344 instr++; 345 } else { 346 CHECK_EQ(immediate_bytes, 4u); 347 args << StringPrintf("%d", *reinterpret_cast<const int32_t*>(instr)); 348 instr += 4; 349 } 350 } else if (branch_bytes > 0) { 351 DCHECK(!has_modrm); 352 int32_t displacement; 353 if (branch_bytes == 1) { 354 displacement = *reinterpret_cast<const int8_t*>(instr); 355 instr++; 356 } else { 357 CHECK_EQ(branch_bytes, 4u); 358 displacement = *reinterpret_cast<const int32_t*>(instr); 359 instr += 4; 360 } 361 args << StringPrintf("%d (%p)", displacement, instr + displacement); 362 } 363 os << StringPrintf("\t\t\t%p: ", begin_instr); 364 for (size_t i = 0; begin_instr + i < instr; ++i) { 365 os << StringPrintf("%02X", begin_instr[i]); 366 } 367 os << StringPrintf("\t%-7s ", opcode.str().c_str()) << args.str() << std::endl; 368 return instr - begin_instr; 369} 370 371} // namespace x86 372} // namespace art 373