disassembler_x86.cc revision a33720c7370d1c9e0d6569d7126bb06f2083c614
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "disassembler_x86.h" 18 19#include <iostream> 20 21#include "base/logging.h" 22#include "base/stringprintf.h" 23#include "thread.h" 24 25namespace art { 26namespace x86 { 27 28size_t DisassemblerX86::Dump(std::ostream& os, const uint8_t* begin) { 29 return DumpInstruction(os, begin); 30} 31 32void DisassemblerX86::Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) { 33 size_t length = 0; 34 for (const uint8_t* cur = begin; cur < end; cur += length) { 35 length = DumpInstruction(os, cur); 36 } 37} 38 39static const char* gReg8Names[] = { 40 "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" 41}; 42static const char* gExtReg8Names[] = { 43 "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil", 44 "r8l", "r9l", "r10l", "r11l", "r12l", "r13l", "r14l", "r15l" 45}; 46static const char* gReg16Names[] = { 47 "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", 48 "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" 49}; 50static const char* gReg32Names[] = { 51 "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", 52 "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" 53}; 54static const char* gReg64Names[] = { 55 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", 56 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" 57}; 58 59// 64-bit opcode REX modifier. 60constexpr uint8_t REX_W = 0b1000; 61constexpr uint8_t REX_R = 0b0100; 62constexpr uint8_t REX_X = 0b0010; 63constexpr uint8_t REX_B = 0b0001; 64 65static void DumpReg0(std::ostream& os, uint8_t rex, size_t reg, 66 bool byte_operand, uint8_t size_override) { 67 DCHECK_LT(reg, (rex == 0) ? 8u : 16u); 68 bool rex_w = (rex & REX_W) != 0; 69 if (byte_operand) { 70 os << ((rex == 0) ? gReg8Names[reg] : gExtReg8Names[reg]); 71 } else if (rex_w) { 72 os << gReg64Names[reg]; 73 } else if (size_override == 0x66) { 74 os << gReg16Names[reg]; 75 } else { 76 os << gReg32Names[reg]; 77 } 78} 79 80enum RegFile { GPR, MMX, SSE }; 81 82static void DumpAnyReg(std::ostream& os, uint8_t rex, size_t reg, 83 bool byte_operand, uint8_t size_override, RegFile reg_file) { 84 if (reg_file == GPR) { 85 DumpReg0(os, rex, reg, byte_operand, size_override); 86 } else if (reg_file == SSE) { 87 os << "xmm" << reg; 88 } else { 89 os << "mm" << reg; 90 } 91} 92 93static void DumpReg(std::ostream& os, uint8_t rex, uint8_t reg, 94 bool byte_operand, uint8_t size_override, RegFile reg_file) { 95 bool rex_r = (rex & REX_R) != 0; 96 size_t reg_num = rex_r ? (reg + 8) : reg; 97 DumpAnyReg(os, rex, reg_num, byte_operand, size_override, reg_file); 98} 99 100static void DumpRmReg(std::ostream& os, uint8_t rex, uint8_t reg, 101 bool byte_operand, uint8_t size_override, RegFile reg_file) { 102 bool rex_b = (rex & REX_B) != 0; 103 size_t reg_num = rex_b ? (reg + 8) : reg; 104 DumpAnyReg(os, rex, reg_num, byte_operand, size_override, reg_file); 105} 106 107static void DumpAddrReg(std::ostream& os, uint8_t rex, uint8_t reg) { 108 if (rex != 0) { 109 os << gReg64Names[reg]; 110 } else { 111 os << gReg32Names[reg]; 112 } 113} 114 115static void DumpBaseReg(std::ostream& os, uint8_t rex, uint8_t reg) { 116 bool rex_b = (rex & REX_B) != 0; 117 size_t reg_num = rex_b ? (reg + 8) : reg; 118 DumpAddrReg(os, rex, reg_num); 119} 120 121static void DumpIndexReg(std::ostream& os, uint8_t rex, uint8_t reg) { 122 bool rex_x = (rex & REX_X) != 0; 123 uint8_t reg_num = rex_x ? (reg + 8) : reg; 124 DumpAddrReg(os, rex, reg_num); 125} 126 127static void DumpOpcodeReg(std::ostream& os, uint8_t rex, uint8_t reg) { 128 bool rex_b = (rex & REX_B) != 0; 129 size_t reg_num = rex_b ? (reg + 8) : reg; 130 DumpReg0(os, rex, reg_num, false, 0); 131} 132 133enum SegmentPrefix { 134 kCs = 0x2e, 135 kSs = 0x36, 136 kDs = 0x3e, 137 kEs = 0x26, 138 kFs = 0x64, 139 kGs = 0x65, 140}; 141 142static void DumpSegmentOverride(std::ostream& os, uint8_t segment_prefix) { 143 switch (segment_prefix) { 144 case kCs: os << "cs:"; break; 145 case kSs: os << "ss:"; break; 146 case kDs: os << "ds:"; break; 147 case kEs: os << "es:"; break; 148 case kFs: os << "fs:"; break; 149 case kGs: os << "gs:"; break; 150 default: break; 151 } 152} 153 154size_t DisassemblerX86::DumpInstruction(std::ostream& os, const uint8_t* instr) { 155 const uint8_t* begin_instr = instr; 156 bool have_prefixes = true; 157 uint8_t prefix[4] = {0, 0, 0, 0}; 158 const char** modrm_opcodes = NULL; 159 do { 160 switch (*instr) { 161 // Group 1 - lock and repeat prefixes: 162 case 0xF0: 163 case 0xF2: 164 case 0xF3: 165 prefix[0] = *instr; 166 break; 167 // Group 2 - segment override prefixes: 168 case kCs: 169 case kSs: 170 case kDs: 171 case kEs: 172 case kFs: 173 case kGs: 174 prefix[1] = *instr; 175 break; 176 // Group 3 - operand size override: 177 case 0x66: 178 prefix[2] = *instr; 179 break; 180 // Group 4 - address size override: 181 case 0x67: 182 prefix[3] = *instr; 183 break; 184 default: 185 have_prefixes = false; 186 break; 187 } 188 if (have_prefixes) { 189 instr++; 190 } 191 } while (have_prefixes); 192 uint8_t rex = (supports_rex_ && (*instr >= 0x40) && (*instr <= 0x4F)) ? *instr : 0; 193 if (rex != 0) { 194 instr++; 195 } 196 bool has_modrm = false; 197 bool reg_is_opcode = false; 198 size_t immediate_bytes = 0; 199 size_t branch_bytes = 0; 200 std::ostringstream opcode; 201 bool store = false; // stores to memory (ie rm is on the left) 202 bool load = false; // loads from memory (ie rm is on the right) 203 bool byte_operand = false; 204 bool target_specific = false; // register name depends on target (64 vs 32 bits). 205 bool ax = false; // implicit use of ax 206 bool cx = false; // implicit use of cx 207 bool reg_in_opcode = false; // low 3-bits of opcode encode register parameter 208 bool no_ops = false; 209 RegFile src_reg_file = GPR; 210 RegFile dst_reg_file = GPR; 211 switch (*instr) { 212#define DISASSEMBLER_ENTRY(opname, \ 213 rm8_r8, rm32_r32, \ 214 r8_rm8, r32_rm32, \ 215 ax8_i8, ax32_i32) \ 216 case rm8_r8: opcode << #opname; store = true; has_modrm = true; byte_operand = true; break; \ 217 case rm32_r32: opcode << #opname; store = true; has_modrm = true; break; \ 218 case r8_rm8: opcode << #opname; load = true; has_modrm = true; byte_operand = true; break; \ 219 case r32_rm32: opcode << #opname; load = true; has_modrm = true; break; \ 220 case ax8_i8: opcode << #opname; ax = true; immediate_bytes = 1; byte_operand = true; break; \ 221 case ax32_i32: opcode << #opname; ax = true; immediate_bytes = 4; break; 222 223DISASSEMBLER_ENTRY(add, 224 0x00 /* RegMem8/Reg8 */, 0x01 /* RegMem32/Reg32 */, 225 0x02 /* Reg8/RegMem8 */, 0x03 /* Reg32/RegMem32 */, 226 0x04 /* Rax8/imm8 opcode */, 0x05 /* Rax32/imm32 */) 227DISASSEMBLER_ENTRY(or, 228 0x08 /* RegMem8/Reg8 */, 0x09 /* RegMem32/Reg32 */, 229 0x0A /* Reg8/RegMem8 */, 0x0B /* Reg32/RegMem32 */, 230 0x0C /* Rax8/imm8 opcode */, 0x0D /* Rax32/imm32 */) 231DISASSEMBLER_ENTRY(adc, 232 0x10 /* RegMem8/Reg8 */, 0x11 /* RegMem32/Reg32 */, 233 0x12 /* Reg8/RegMem8 */, 0x13 /* Reg32/RegMem32 */, 234 0x14 /* Rax8/imm8 opcode */, 0x15 /* Rax32/imm32 */) 235DISASSEMBLER_ENTRY(sbb, 236 0x18 /* RegMem8/Reg8 */, 0x19 /* RegMem32/Reg32 */, 237 0x1A /* Reg8/RegMem8 */, 0x1B /* Reg32/RegMem32 */, 238 0x1C /* Rax8/imm8 opcode */, 0x1D /* Rax32/imm32 */) 239DISASSEMBLER_ENTRY(and, 240 0x20 /* RegMem8/Reg8 */, 0x21 /* RegMem32/Reg32 */, 241 0x22 /* Reg8/RegMem8 */, 0x23 /* Reg32/RegMem32 */, 242 0x24 /* Rax8/imm8 opcode */, 0x25 /* Rax32/imm32 */) 243DISASSEMBLER_ENTRY(sub, 244 0x28 /* RegMem8/Reg8 */, 0x29 /* RegMem32/Reg32 */, 245 0x2A /* Reg8/RegMem8 */, 0x2B /* Reg32/RegMem32 */, 246 0x2C /* Rax8/imm8 opcode */, 0x2D /* Rax32/imm32 */) 247DISASSEMBLER_ENTRY(xor, 248 0x30 /* RegMem8/Reg8 */, 0x31 /* RegMem32/Reg32 */, 249 0x32 /* Reg8/RegMem8 */, 0x33 /* Reg32/RegMem32 */, 250 0x34 /* Rax8/imm8 opcode */, 0x35 /* Rax32/imm32 */) 251DISASSEMBLER_ENTRY(cmp, 252 0x38 /* RegMem8/Reg8 */, 0x39 /* RegMem32/Reg32 */, 253 0x3A /* Reg8/RegMem8 */, 0x3B /* Reg32/RegMem32 */, 254 0x3C /* Rax8/imm8 opcode */, 0x3D /* Rax32/imm32 */) 255 256#undef DISASSEMBLER_ENTRY 257 case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: 258 opcode << "push"; 259 reg_in_opcode = true; 260 target_specific = true; 261 break; 262 case 0x58: case 0x59: case 0x5A: case 0x5B: case 0x5C: case 0x5D: case 0x5E: case 0x5F: 263 opcode << "pop"; 264 reg_in_opcode = true; 265 target_specific = true; 266 break; 267 case 0x63: 268 if (rex == 0x48) { 269 opcode << "movsxd"; 270 has_modrm = true; 271 load = true; 272 } else { 273 // In 32-bit mode (!supports_rex_) this is ARPL, with no REX prefix the functionality is the 274 // same as 'mov' but the use of the instruction is discouraged. 275 opcode << StringPrintf("unknown opcode '%02X'", *instr); 276 } 277 break; 278 case 0x68: opcode << "push"; immediate_bytes = 4; break; 279 case 0x69: opcode << "imul"; load = true; has_modrm = true; immediate_bytes = 4; break; 280 case 0x6A: opcode << "push"; immediate_bytes = 1; break; 281 case 0x6B: opcode << "imul"; load = true; has_modrm = true; immediate_bytes = 1; break; 282 case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: 283 case 0x78: case 0x79: case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E: case 0x7F: 284 static const char* condition_codes[] = 285 {"o", "no", "b/nae/c", "nb/ae/nc", "z/eq", "nz/ne", "be/na", "nbe/a", 286 "s", "ns", "p/pe", "np/po", "l/nge", "nl/ge", "le/ng", "nle/g" 287 }; 288 opcode << "j" << condition_codes[*instr & 0xF]; 289 branch_bytes = 1; 290 break; 291 case 0x86: case 0x87: 292 opcode << "xchg"; 293 store = true; 294 has_modrm = true; 295 byte_operand = (*instr == 0x86); 296 break; 297 case 0x88: opcode << "mov"; store = true; has_modrm = true; byte_operand = true; break; 298 case 0x89: opcode << "mov"; store = true; has_modrm = true; break; 299 case 0x8A: opcode << "mov"; load = true; has_modrm = true; byte_operand = true; break; 300 case 0x8B: opcode << "mov"; load = true; has_modrm = true; break; 301 302 case 0x0F: // 2 byte extended opcode 303 instr++; 304 switch (*instr) { 305 case 0x10: case 0x11: 306 if (prefix[0] == 0xF2) { 307 opcode << "movsd"; 308 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 309 } else if (prefix[0] == 0xF3) { 310 opcode << "movss"; 311 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 312 } else if (prefix[2] == 0x66) { 313 opcode << "movupd"; 314 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 315 } else { 316 opcode << "movups"; 317 } 318 has_modrm = true; 319 src_reg_file = dst_reg_file = SSE; 320 load = *instr == 0x10; 321 store = !load; 322 break; 323 case 0x12: case 0x13: 324 if (prefix[2] == 0x66) { 325 opcode << "movlpd"; 326 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 327 } else if (prefix[0] == 0) { 328 opcode << "movlps"; 329 } 330 has_modrm = true; 331 src_reg_file = dst_reg_file = SSE; 332 load = *instr == 0x12; 333 store = !load; 334 break; 335 case 0x16: case 0x17: 336 if (prefix[2] == 0x66) { 337 opcode << "movhpd"; 338 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 339 } else if (prefix[0] == 0) { 340 opcode << "movhps"; 341 } 342 has_modrm = true; 343 src_reg_file = dst_reg_file = SSE; 344 load = *instr == 0x16; 345 store = !load; 346 break; 347 case 0x28: case 0x29: 348 if (prefix[2] == 0x66) { 349 opcode << "movapd"; 350 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 351 } else if (prefix[0] == 0) { 352 opcode << "movaps"; 353 } 354 has_modrm = true; 355 src_reg_file = dst_reg_file = SSE; 356 load = *instr == 0x28; 357 store = !load; 358 break; 359 case 0x2A: 360 if (prefix[2] == 0x66) { 361 opcode << "cvtpi2pd"; 362 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 363 } else if (prefix[0] == 0xF2) { 364 opcode << "cvtsi2sd"; 365 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 366 } else if (prefix[0] == 0xF3) { 367 opcode << "cvtsi2ss"; 368 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 369 } else { 370 opcode << "cvtpi2ps"; 371 } 372 load = true; 373 has_modrm = true; 374 dst_reg_file = SSE; 375 break; 376 case 0x2C: 377 if (prefix[2] == 0x66) { 378 opcode << "cvttpd2pi"; 379 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 380 } else if (prefix[0] == 0xF2) { 381 opcode << "cvttsd2si"; 382 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 383 } else if (prefix[0] == 0xF3) { 384 opcode << "cvttss2si"; 385 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 386 } else { 387 opcode << "cvttps2pi"; 388 } 389 load = true; 390 has_modrm = true; 391 src_reg_file = SSE; 392 break; 393 case 0x2D: 394 if (prefix[2] == 0x66) { 395 opcode << "cvtpd2pi"; 396 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 397 } else if (prefix[0] == 0xF2) { 398 opcode << "cvtsd2si"; 399 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 400 } else if (prefix[0] == 0xF3) { 401 opcode << "cvtss2si"; 402 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 403 } else { 404 opcode << "cvtps2pi"; 405 } 406 load = true; 407 has_modrm = true; 408 src_reg_file = SSE; 409 break; 410 case 0x2E: 411 opcode << "u"; 412 // FALLTHROUGH 413 case 0x2F: 414 if (prefix[2] == 0x66) { 415 opcode << "comisd"; 416 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 417 } else { 418 opcode << "comiss"; 419 } 420 has_modrm = true; 421 load = true; 422 src_reg_file = dst_reg_file = SSE; 423 break; 424 case 0x38: // 3 byte extended opcode 425 instr++; 426 if (prefix[2] == 0x66) { 427 switch (*instr) { 428 case 0x40: 429 opcode << "pmulld"; 430 prefix[2] = 0; 431 has_modrm = true; 432 load = true; 433 src_reg_file = dst_reg_file = SSE; 434 break; 435 default: 436 opcode << StringPrintf("unknown opcode '0F 38 %02X'", *instr); 437 } 438 } else { 439 opcode << StringPrintf("unknown opcode '0F 38 %02X'", *instr); 440 } 441 break; 442 case 0x3A: // 3 byte extended opcode 443 instr++; 444 if (prefix[2] == 0x66) { 445 switch (*instr) { 446 case 0x14: 447 opcode << "pextrb"; 448 prefix[2] = 0; 449 has_modrm = true; 450 store = true; 451 dst_reg_file = SSE; 452 immediate_bytes = 1; 453 break; 454 case 0x16: 455 opcode << "pextrd"; 456 prefix[2] = 0; 457 has_modrm = true; 458 store = true; 459 dst_reg_file = SSE; 460 immediate_bytes = 1; 461 break; 462 default: 463 opcode << StringPrintf("unknown opcode '0F 3A %02X'", *instr); 464 } 465 } else { 466 opcode << StringPrintf("unknown opcode '0F 3A %02X'", *instr); 467 } 468 break; 469 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: 470 case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: 471 opcode << "cmov" << condition_codes[*instr & 0xF]; 472 has_modrm = true; 473 load = true; 474 break; 475 case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: 476 case 0x58: case 0x59: case 0x5C: case 0x5D: case 0x5E: case 0x5F: { 477 switch (*instr) { 478 case 0x50: opcode << "movmsk"; break; 479 case 0x51: opcode << "sqrt"; break; 480 case 0x52: opcode << "rsqrt"; break; 481 case 0x53: opcode << "rcp"; break; 482 case 0x54: opcode << "and"; break; 483 case 0x55: opcode << "andn"; break; 484 case 0x56: opcode << "or"; break; 485 case 0x57: opcode << "xor"; break; 486 case 0x58: opcode << "add"; break; 487 case 0x59: opcode << "mul"; break; 488 case 0x5C: opcode << "sub"; break; 489 case 0x5D: opcode << "min"; break; 490 case 0x5E: opcode << "div"; break; 491 case 0x5F: opcode << "max"; break; 492 default: LOG(FATAL) << "Unreachable"; 493 } 494 if (prefix[2] == 0x66) { 495 opcode << "pd"; 496 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 497 } else if (prefix[0] == 0xF2) { 498 opcode << "sd"; 499 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 500 } else if (prefix[0] == 0xF3) { 501 opcode << "ss"; 502 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 503 } else { 504 opcode << "ps"; 505 } 506 load = true; 507 has_modrm = true; 508 src_reg_file = dst_reg_file = SSE; 509 break; 510 } 511 case 0x5A: 512 if (prefix[2] == 0x66) { 513 opcode << "cvtpd2ps"; 514 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 515 } else if (prefix[0] == 0xF2) { 516 opcode << "cvtsd2ss"; 517 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 518 } else if (prefix[0] == 0xF3) { 519 opcode << "cvtss2sd"; 520 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 521 } else { 522 opcode << "cvtps2pd"; 523 } 524 load = true; 525 has_modrm = true; 526 src_reg_file = dst_reg_file = SSE; 527 break; 528 case 0x5B: 529 if (prefix[2] == 0x66) { 530 opcode << "cvtps2dq"; 531 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 532 } else if (prefix[0] == 0xF2) { 533 opcode << "bad opcode F2 0F 5B"; 534 } else if (prefix[0] == 0xF3) { 535 opcode << "cvttps2dq"; 536 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 537 } else { 538 opcode << "cvtdq2ps"; 539 } 540 load = true; 541 has_modrm = true; 542 src_reg_file = dst_reg_file = SSE; 543 break; 544 case 0x62: 545 if (prefix[2] == 0x66) { 546 src_reg_file = dst_reg_file = SSE; 547 prefix[2] = 0; // Clear prefix now. It has served its purpose as part of the opcode. 548 } else { 549 src_reg_file = dst_reg_file = MMX; 550 } 551 opcode << "punpckldq"; 552 load = true; 553 has_modrm = true; 554 break; 555 case 0x6E: 556 if (prefix[2] == 0x66) { 557 dst_reg_file = SSE; 558 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 559 } else { 560 dst_reg_file = MMX; 561 } 562 opcode << "movd"; 563 load = true; 564 has_modrm = true; 565 break; 566 case 0x6F: 567 if (prefix[2] == 0x66) { 568 src_reg_file = dst_reg_file = SSE; 569 opcode << "movdqa"; 570 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 571 } else if (prefix[0] == 0xF3) { 572 src_reg_file = dst_reg_file = SSE; 573 opcode << "movdqu"; 574 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 575 } else { 576 dst_reg_file = MMX; 577 opcode << "movq"; 578 } 579 load = true; 580 has_modrm = true; 581 break; 582 case 0x70: 583 if (prefix[2] == 0x66) { 584 opcode << "pshufd"; 585 prefix[2] = 0; 586 has_modrm = true; 587 store = true; 588 src_reg_file = dst_reg_file = SSE; 589 immediate_bytes = 1; 590 } else if (prefix[0] == 0xF2) { 591 opcode << "pshuflw"; 592 prefix[0] = 0; 593 has_modrm = true; 594 store = true; 595 src_reg_file = dst_reg_file = SSE; 596 immediate_bytes = 1; 597 } else { 598 opcode << StringPrintf("unknown opcode '0F %02X'", *instr); 599 } 600 break; 601 case 0x71: 602 if (prefix[2] == 0x66) { 603 dst_reg_file = SSE; 604 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 605 } else { 606 dst_reg_file = MMX; 607 } 608 static const char* x71_opcodes[] = {"unknown-71", "unknown-71", "psrlw", "unknown-71", "psraw", "unknown-71", "psllw", "unknown-71"}; 609 modrm_opcodes = x71_opcodes; 610 reg_is_opcode = true; 611 has_modrm = true; 612 store = true; 613 immediate_bytes = 1; 614 break; 615 case 0x72: 616 if (prefix[2] == 0x66) { 617 dst_reg_file = SSE; 618 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 619 } else { 620 dst_reg_file = MMX; 621 } 622 static const char* x72_opcodes[] = {"unknown-72", "unknown-72", "psrld", "unknown-72", "psrad", "unknown-72", "pslld", "unknown-72"}; 623 modrm_opcodes = x72_opcodes; 624 reg_is_opcode = true; 625 has_modrm = true; 626 store = true; 627 immediate_bytes = 1; 628 break; 629 case 0x73: 630 if (prefix[2] == 0x66) { 631 dst_reg_file = SSE; 632 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 633 } else { 634 dst_reg_file = MMX; 635 } 636 static const char* x73_opcodes[] = {"unknown-73", "unknown-73", "psrlq", "unknown-73", "unknown-73", "unknown-73", "psllq", "unknown-73"}; 637 modrm_opcodes = x73_opcodes; 638 reg_is_opcode = true; 639 has_modrm = true; 640 store = true; 641 immediate_bytes = 1; 642 break; 643 case 0x7E: 644 if (prefix[2] == 0x66) { 645 src_reg_file = SSE; 646 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 647 } else { 648 src_reg_file = MMX; 649 } 650 opcode << "movd"; 651 has_modrm = true; 652 store = true; 653 break; 654 case 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: case 0x87: 655 case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E: case 0x8F: 656 opcode << "j" << condition_codes[*instr & 0xF]; 657 branch_bytes = 4; 658 break; 659 case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: case 0x95: case 0x96: case 0x97: 660 case 0x98: case 0x99: case 0x9A: case 0x9B: case 0x9C: case 0x9D: case 0x9E: case 0x9F: 661 opcode << "set" << condition_codes[*instr & 0xF]; 662 modrm_opcodes = NULL; 663 reg_is_opcode = true; 664 has_modrm = true; 665 store = true; 666 break; 667 case 0xA4: 668 opcode << "shld"; 669 has_modrm = true; 670 load = true; 671 immediate_bytes = 1; 672 break; 673 case 0xAC: 674 opcode << "shrd"; 675 has_modrm = true; 676 load = true; 677 immediate_bytes = 1; 678 break; 679 case 0xAE: 680 if (prefix[0] == 0xF3) { 681 prefix[0] = 0; // clear prefix now it's served its purpose as part of the opcode 682 static const char* xAE_opcodes[] = {"rdfsbase", "rdgsbase", "wrfsbase", "wrgsbase", "unknown-AE", "unknown-AE", "unknown-AE", "unknown-AE"}; 683 modrm_opcodes = xAE_opcodes; 684 reg_is_opcode = true; 685 has_modrm = true; 686 uint8_t reg_or_opcode = (instr[1] >> 3) & 7; 687 switch (reg_or_opcode) { 688 case 0: 689 prefix[1] = kFs; 690 load = true; 691 break; 692 case 1: 693 prefix[1] = kGs; 694 load = true; 695 break; 696 case 2: 697 prefix[1] = kFs; 698 store = true; 699 break; 700 case 3: 701 prefix[1] = kGs; 702 store = true; 703 break; 704 default: 705 load = true; 706 break; 707 } 708 } else { 709 static const char* xAE_opcodes[] = {"unknown-AE", "unknown-AE", "unknown-AE", "unknown-AE", "unknown-AE", "lfence", "mfence", "sfence"}; 710 modrm_opcodes = xAE_opcodes; 711 reg_is_opcode = true; 712 has_modrm = true; 713 load = true; 714 no_ops = true; 715 } 716 break; 717 case 0xAF: opcode << "imul"; has_modrm = true; load = true; break; 718 case 0xB1: opcode << "cmpxchg"; has_modrm = true; store = true; break; 719 case 0xB6: opcode << "movzxb"; has_modrm = true; load = true; break; 720 case 0xB7: opcode << "movzxw"; has_modrm = true; load = true; break; 721 case 0xBE: opcode << "movsxb"; has_modrm = true; load = true; break; 722 case 0xBF: opcode << "movsxw"; has_modrm = true; load = true; break; 723 case 0xC5: 724 if (prefix[2] == 0x66) { 725 opcode << "pextrw"; 726 prefix[2] = 0; 727 has_modrm = true; 728 store = true; 729 src_reg_file = dst_reg_file = SSE; 730 immediate_bytes = 1; 731 } else { 732 opcode << StringPrintf("unknown opcode '0F %02X'", *instr); 733 } 734 break; 735 case 0xC7: 736 static const char* x0FxC7_opcodes[] = { "unknown-0f-c7", "cmpxchg8b", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7", "unknown-0f-c7" }; 737 modrm_opcodes = x0FxC7_opcodes; 738 has_modrm = true; 739 reg_is_opcode = true; 740 store = true; 741 break; 742 case 0xC8: case 0xC9: case 0xCA: case 0xCB: case 0xCC: case 0xCD: case 0xCE: case 0xCF: 743 opcode << "bswap"; 744 reg_in_opcode = true; 745 break; 746 case 0xDB: 747 if (prefix[2] == 0x66) { 748 src_reg_file = dst_reg_file = SSE; 749 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 750 } else { 751 src_reg_file = dst_reg_file = MMX; 752 } 753 opcode << "pand"; 754 prefix[2] = 0; 755 has_modrm = true; 756 load = true; 757 break; 758 case 0xD5: 759 if (prefix[2] == 0x66) { 760 opcode << "pmullw"; 761 prefix[2] = 0; 762 has_modrm = true; 763 load = true; 764 src_reg_file = dst_reg_file = SSE; 765 } else { 766 opcode << StringPrintf("unknown opcode '0F %02X'", *instr); 767 } 768 break; 769 case 0xEB: 770 if (prefix[2] == 0x66) { 771 src_reg_file = dst_reg_file = SSE; 772 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 773 } else { 774 src_reg_file = dst_reg_file = MMX; 775 } 776 opcode << "por"; 777 prefix[2] = 0; 778 has_modrm = true; 779 load = true; 780 break; 781 case 0xEF: 782 if (prefix[2] == 0x66) { 783 src_reg_file = dst_reg_file = SSE; 784 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 785 } else { 786 src_reg_file = dst_reg_file = MMX; 787 } 788 opcode << "pxor"; 789 prefix[2] = 0; 790 has_modrm = true; 791 load = true; 792 break; 793 case 0xF8: 794 if (prefix[2] == 0x66) { 795 src_reg_file = dst_reg_file = SSE; 796 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 797 } else { 798 src_reg_file = dst_reg_file = MMX; 799 } 800 opcode << "psubb"; 801 prefix[2] = 0; 802 has_modrm = true; 803 load = true; 804 break; 805 case 0xF9: 806 if (prefix[2] == 0x66) { 807 src_reg_file = dst_reg_file = SSE; 808 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 809 } else { 810 src_reg_file = dst_reg_file = MMX; 811 } 812 opcode << "psubw"; 813 prefix[2] = 0; 814 has_modrm = true; 815 load = true; 816 break; 817 case 0xFA: 818 if (prefix[2] == 0x66) { 819 src_reg_file = dst_reg_file = SSE; 820 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 821 } else { 822 src_reg_file = dst_reg_file = MMX; 823 } 824 opcode << "psubd"; 825 prefix[2] = 0; 826 has_modrm = true; 827 load = true; 828 break; 829 case 0xFC: 830 if (prefix[2] == 0x66) { 831 src_reg_file = dst_reg_file = SSE; 832 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 833 } else { 834 src_reg_file = dst_reg_file = MMX; 835 } 836 opcode << "paddb"; 837 prefix[2] = 0; 838 has_modrm = true; 839 load = true; 840 break; 841 case 0xFD: 842 if (prefix[2] == 0x66) { 843 src_reg_file = dst_reg_file = SSE; 844 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 845 } else { 846 src_reg_file = dst_reg_file = MMX; 847 } 848 opcode << "paddw"; 849 prefix[2] = 0; 850 has_modrm = true; 851 load = true; 852 break; 853 case 0xFE: 854 if (prefix[2] == 0x66) { 855 src_reg_file = dst_reg_file = SSE; 856 prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode 857 } else { 858 src_reg_file = dst_reg_file = MMX; 859 } 860 opcode << "paddd"; 861 prefix[2] = 0; 862 has_modrm = true; 863 load = true; 864 break; 865 default: 866 opcode << StringPrintf("unknown opcode '0F %02X'", *instr); 867 break; 868 } 869 break; 870 case 0x80: case 0x81: case 0x82: case 0x83: 871 static const char* x80_opcodes[] = {"add", "or", "adc", "sbb", "and", "sub", "xor", "cmp"}; 872 modrm_opcodes = x80_opcodes; 873 has_modrm = true; 874 reg_is_opcode = true; 875 store = true; 876 byte_operand = (*instr & 1) == 0; 877 immediate_bytes = *instr == 0x81 ? 4 : 1; 878 break; 879 case 0x84: case 0x85: 880 opcode << "test"; 881 has_modrm = true; 882 load = true; 883 byte_operand = (*instr & 1) == 0; 884 break; 885 case 0x8D: 886 opcode << "lea"; 887 has_modrm = true; 888 load = true; 889 break; 890 case 0x8F: 891 opcode << "pop"; 892 has_modrm = true; 893 reg_is_opcode = true; 894 store = true; 895 break; 896 case 0x99: 897 opcode << "cdq"; 898 break; 899 case 0xAF: 900 opcode << (prefix[2] == 0x66 ? "scasw" : "scasl"); 901 break; 902 case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: 903 opcode << "mov"; 904 immediate_bytes = 1; 905 byte_operand = true; 906 reg_in_opcode = true; 907 break; 908 case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF: 909 opcode << "mov"; 910 immediate_bytes = 4; 911 reg_in_opcode = true; 912 break; 913 case 0xC0: case 0xC1: 914 case 0xD0: case 0xD1: case 0xD2: case 0xD3: 915 static const char* shift_opcodes[] = 916 {"rol", "ror", "rcl", "rcr", "shl", "shr", "unknown-shift", "sar"}; 917 modrm_opcodes = shift_opcodes; 918 has_modrm = true; 919 reg_is_opcode = true; 920 store = true; 921 immediate_bytes = ((*instr & 0xf0) == 0xc0) ? 1 : 0; 922 cx = (*instr == 0xD2) || (*instr == 0xD3); 923 byte_operand = (*instr == 0xC0); 924 break; 925 case 0xC3: opcode << "ret"; break; 926 case 0xC6: 927 static const char* c6_opcodes[] = {"mov", "unknown-c6", "unknown-c6", "unknown-c6", "unknown-c6", "unknown-c6", "unknown-c6", "unknown-c6"}; 928 modrm_opcodes = c6_opcodes; 929 store = true; 930 immediate_bytes = 1; 931 has_modrm = true; 932 reg_is_opcode = true; 933 byte_operand = true; 934 break; 935 case 0xC7: 936 static const char* c7_opcodes[] = {"mov", "unknown-c7", "unknown-c7", "unknown-c7", "unknown-c7", "unknown-c7", "unknown-c7", "unknown-c7"}; 937 modrm_opcodes = c7_opcodes; 938 store = true; 939 immediate_bytes = 4; 940 has_modrm = true; 941 reg_is_opcode = true; 942 break; 943 case 0xCC: opcode << "int 3"; break; 944 case 0xD9: 945 static const char* d9_opcodes[] = {"flds", "unknown-d9", "fsts", "fstps", "fldenv", "fldcw", "fnstenv", "fnstcw"}; 946 modrm_opcodes = d9_opcodes; 947 store = true; 948 has_modrm = true; 949 reg_is_opcode = true; 950 break; 951 case 0xDB: 952 static const char* db_opcodes[] = {"fildl", "unknown-db", "unknown-db", "unknown-db", "unknown-db", "unknown-db", "unknown-db", "unknown-db"}; 953 modrm_opcodes = db_opcodes; 954 load = true; 955 has_modrm = true; 956 reg_is_opcode = true; 957 break; 958 case 0xDD: 959 static const char* dd_opcodes[] = {"fldl", "fisttp", "fstl", "fstpl", "frstor", "unknown-dd", "fnsave", "fnstsw"}; 960 modrm_opcodes = dd_opcodes; 961 store = true; 962 has_modrm = true; 963 reg_is_opcode = true; 964 break; 965 case 0xDF: 966 static const char* df_opcodes[] = {"fild", "unknown-df", "unknown-df", "unknown-df", "unknown-df", "fildll", "unknown-df", "unknown-df"}; 967 modrm_opcodes = df_opcodes; 968 load = true; 969 has_modrm = true; 970 reg_is_opcode = true; 971 break; 972 case 0xE3: opcode << "jecxz"; branch_bytes = 1; break; 973 case 0xE8: opcode << "call"; branch_bytes = 4; break; 974 case 0xE9: opcode << "jmp"; branch_bytes = 4; break; 975 case 0xEB: opcode << "jmp"; branch_bytes = 1; break; 976 case 0xF5: opcode << "cmc"; break; 977 case 0xF6: case 0xF7: 978 static const char* f7_opcodes[] = {"test", "unknown-f7", "not", "neg", "mul edx:eax, eax *", "imul edx:eax, eax *", "div edx:eax, edx:eax /", "idiv edx:eax, edx:eax /"}; 979 modrm_opcodes = f7_opcodes; 980 has_modrm = true; 981 reg_is_opcode = true; 982 store = true; 983 immediate_bytes = ((instr[1] & 0x38) == 0) ? 1 : 0; 984 break; 985 case 0xFF: 986 static const char* ff_opcodes[] = {"inc", "dec", "call", "call", "jmp", "jmp", "push", "unknown-ff"}; 987 modrm_opcodes = ff_opcodes; 988 has_modrm = true; 989 reg_is_opcode = true; 990 load = true; 991 break; 992 default: 993 opcode << StringPrintf("unknown opcode '%02X'", *instr); 994 break; 995 } 996 std::ostringstream args; 997 // We force the REX prefix to be available for 64-bit target 998 // in order to dump addr (base/index) registers correctly. 999 uint8_t rex64 = supports_rex_ ? (rex | 0x40) : rex; 1000 if (reg_in_opcode) { 1001 DCHECK(!has_modrm); 1002 // REX.W should be forced for 64-target and target-specific instructions (i.e., push or pop). 1003 uint8_t rex_w = (supports_rex_ && target_specific) ? (rex | 0x48) : rex; 1004 DumpOpcodeReg(args, rex_w, *instr & 0x7); 1005 } 1006 instr++; 1007 uint32_t address_bits = 0; 1008 if (has_modrm) { 1009 uint8_t modrm = *instr; 1010 instr++; 1011 uint8_t mod = modrm >> 6; 1012 uint8_t reg_or_opcode = (modrm >> 3) & 7; 1013 uint8_t rm = modrm & 7; 1014 std::ostringstream address; 1015 if (mod == 0 && rm == 5) { 1016 if (!supports_rex_) { // Absolute address. 1017 address_bits = *reinterpret_cast<const uint32_t*>(instr); 1018 address << StringPrintf("[0x%x]", address_bits); 1019 } else { // 64-bit RIP relative addressing. 1020 address << StringPrintf("[RIP + 0x%x]", *reinterpret_cast<const uint32_t*>(instr)); 1021 } 1022 instr += 4; 1023 } else if (rm == 4 && mod != 3) { // SIB 1024 uint8_t sib = *instr; 1025 instr++; 1026 uint8_t scale = (sib >> 6) & 3; 1027 uint8_t index = (sib >> 3) & 7; 1028 uint8_t base = sib & 7; 1029 address << "["; 1030 if (base != 5 || mod != 0) { 1031 DumpBaseReg(address, rex64, base); 1032 if (index != 4) { 1033 address << " + "; 1034 } 1035 } 1036 if (index != 4) { 1037 DumpIndexReg(address, rex64, index); 1038 if (scale != 0) { 1039 address << StringPrintf(" * %d", 1 << scale); 1040 } 1041 } 1042 if (mod == 0) { 1043 if (base == 5) { 1044 if (index != 4) { 1045 address << StringPrintf(" + %d", *reinterpret_cast<const int32_t*>(instr)); 1046 } else { 1047 // 64-bit low 32-bit absolute address, redundant absolute address encoding on 32-bit. 1048 address_bits = *reinterpret_cast<const uint32_t*>(instr); 1049 address << StringPrintf("%d", address_bits); 1050 } 1051 instr += 4; 1052 } 1053 } else if (mod == 1) { 1054 address << StringPrintf(" + %d", *reinterpret_cast<const int8_t*>(instr)); 1055 instr++; 1056 } else if (mod == 2) { 1057 address << StringPrintf(" + %d", *reinterpret_cast<const int32_t*>(instr)); 1058 instr += 4; 1059 } 1060 address << "]"; 1061 } else { 1062 if (mod == 3) { 1063 if (!no_ops) { 1064 DumpRmReg(address, rex, rm, byte_operand, prefix[2], load ? src_reg_file : dst_reg_file); 1065 } 1066 } else { 1067 address << "["; 1068 DumpBaseReg(address, rex64, rm); 1069 if (mod == 1) { 1070 address << StringPrintf(" + %d", *reinterpret_cast<const int8_t*>(instr)); 1071 instr++; 1072 } else if (mod == 2) { 1073 address << StringPrintf(" + %d", *reinterpret_cast<const int32_t*>(instr)); 1074 instr += 4; 1075 } 1076 address << "]"; 1077 } 1078 } 1079 1080 if (reg_is_opcode && modrm_opcodes != NULL) { 1081 opcode << modrm_opcodes[reg_or_opcode]; 1082 } 1083 1084 // Add opcode suffixes to indicate size. 1085 if (byte_operand) { 1086 opcode << 'b'; 1087 } else if ((rex & REX_W) != 0) { 1088 opcode << 'q'; 1089 } else if (prefix[2] == 0x66) { 1090 opcode << 'w'; 1091 } 1092 1093 if (load) { 1094 if (!reg_is_opcode) { 1095 DumpReg(args, rex, reg_or_opcode, byte_operand, prefix[2], dst_reg_file); 1096 args << ", "; 1097 } 1098 DumpSegmentOverride(args, prefix[1]); 1099 args << address.str(); 1100 } else { 1101 DCHECK(store); 1102 DumpSegmentOverride(args, prefix[1]); 1103 args << address.str(); 1104 if (!reg_is_opcode) { 1105 args << ", "; 1106 DumpReg(args, rex, reg_or_opcode, byte_operand, prefix[2], src_reg_file); 1107 } 1108 } 1109 } 1110 if (ax) { 1111 // If this opcode implicitly uses ax, ax is always the first arg. 1112 DumpReg(args, rex, 0 /* EAX */, byte_operand, prefix[2], GPR); 1113 } 1114 if (cx) { 1115 args << ", "; 1116 DumpReg(args, rex, 1 /* ECX */, true, prefix[2], GPR); 1117 } 1118 if (immediate_bytes > 0) { 1119 if (has_modrm || reg_in_opcode || ax || cx) { 1120 args << ", "; 1121 } 1122 if (immediate_bytes == 1) { 1123 args << StringPrintf("%d", *reinterpret_cast<const int8_t*>(instr)); 1124 instr++; 1125 } else { 1126 CHECK_EQ(immediate_bytes, 4u); 1127 if (prefix[2] == 0x66) { // Operand size override from 32-bit to 16-bit. 1128 args << StringPrintf("%d", *reinterpret_cast<const int16_t*>(instr)); 1129 instr += 2; 1130 } else { 1131 args << StringPrintf("%d", *reinterpret_cast<const int32_t*>(instr)); 1132 instr += 4; 1133 } 1134 } 1135 } else if (branch_bytes > 0) { 1136 DCHECK(!has_modrm); 1137 int32_t displacement; 1138 if (branch_bytes == 1) { 1139 displacement = *reinterpret_cast<const int8_t*>(instr); 1140 instr++; 1141 } else { 1142 CHECK_EQ(branch_bytes, 4u); 1143 displacement = *reinterpret_cast<const int32_t*>(instr); 1144 instr += 4; 1145 } 1146 args << StringPrintf("%+d (%p)", displacement, instr + displacement); 1147 } 1148 if (prefix[1] == kFs && !supports_rex_) { 1149 args << " ; "; 1150 Thread::DumpThreadOffset<4>(args, address_bits); 1151 } 1152 if (prefix[1] == kGs && supports_rex_) { 1153 args << " ; "; 1154 Thread::DumpThreadOffset<8>(args, address_bits); 1155 } 1156 std::stringstream hex; 1157 for (size_t i = 0; begin_instr + i < instr; ++i) { 1158 hex << StringPrintf("%02X", begin_instr[i]); 1159 } 1160 std::stringstream prefixed_opcode; 1161 switch (prefix[0]) { 1162 case 0xF0: prefixed_opcode << "lock "; break; 1163 case 0xF2: prefixed_opcode << "repne "; break; 1164 case 0xF3: prefixed_opcode << "repe "; break; 1165 case 0: break; 1166 default: LOG(FATAL) << "Unreachable"; 1167 } 1168 prefixed_opcode << opcode.str(); 1169 os << StringPrintf("%p: %22s \t%-7s ", begin_instr, hex.str().c_str(), 1170 prefixed_opcode.str().c_str()) 1171 << args.str() << '\n'; 1172 return instr - begin_instr; 1173} // NOLINT(readability/fn_size) 1174 1175} // namespace x86 1176} // namespace art 1177