1 2/*---------------------------------------------------------------*/ 3/*--- begin host_x86_defs.c ---*/ 4/*---------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2012 OpenWorks LLP 11 info@open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34*/ 35 36#include "libvex_basictypes.h" 37#include "libvex.h" 38#include "libvex_trc_values.h" 39 40#include "main_util.h" 41#include "host_generic_regs.h" 42#include "host_x86_defs.h" 43 44 45/* --------- Registers. --------- */ 46 47void ppHRegX86 ( HReg reg ) 48{ 49 Int r; 50 static HChar* ireg32_names[8] 51 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" }; 52 /* Be generic for all virtual regs. */ 53 if (hregIsVirtual(reg)) { 54 ppHReg(reg); 55 return; 56 } 57 /* But specific for real regs. */ 58 switch (hregClass(reg)) { 59 case HRcInt32: 60 r = hregNumber(reg); 61 vassert(r >= 0 && r < 8); 62 vex_printf("%s", ireg32_names[r]); 63 return; 64 case HRcFlt64: 65 r = hregNumber(reg); 66 vassert(r >= 0 && r < 6); 67 vex_printf("%%fake%d", r); 68 return; 69 case HRcVec128: 70 r = hregNumber(reg); 71 vassert(r >= 0 && r < 8); 72 vex_printf("%%xmm%d", r); 73 return; 74 default: 75 vpanic("ppHRegX86"); 76 } 77} 78 79HReg hregX86_EAX ( void ) { return mkHReg(0, HRcInt32, False); } 80HReg hregX86_ECX ( void ) { return mkHReg(1, HRcInt32, False); } 81HReg hregX86_EDX ( void ) { return mkHReg(2, HRcInt32, False); } 82HReg hregX86_EBX ( void ) { return mkHReg(3, HRcInt32, False); } 83HReg hregX86_ESP ( void ) { return mkHReg(4, HRcInt32, False); } 84HReg hregX86_EBP ( void ) { return mkHReg(5, HRcInt32, False); } 85HReg hregX86_ESI ( void ) { return mkHReg(6, HRcInt32, False); } 86HReg hregX86_EDI ( void ) { return mkHReg(7, HRcInt32, False); } 87 88HReg hregX86_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); } 89HReg hregX86_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); } 90HReg hregX86_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); } 91HReg hregX86_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); } 92HReg hregX86_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); } 93HReg hregX86_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); } 94 95HReg hregX86_XMM0 ( void ) { return mkHReg(0, HRcVec128, False); } 96HReg hregX86_XMM1 ( void ) { return mkHReg(1, HRcVec128, False); } 97HReg hregX86_XMM2 ( void ) { return mkHReg(2, HRcVec128, False); } 98HReg hregX86_XMM3 ( void ) { return mkHReg(3, HRcVec128, False); } 99HReg hregX86_XMM4 ( void ) { return mkHReg(4, HRcVec128, False); } 100HReg hregX86_XMM5 ( void ) { return mkHReg(5, HRcVec128, False); } 101HReg hregX86_XMM6 ( void ) { return mkHReg(6, HRcVec128, False); } 102HReg hregX86_XMM7 ( void ) { return mkHReg(7, HRcVec128, False); } 103 104 105void getAllocableRegs_X86 ( Int* nregs, HReg** arr ) 106{ 107 *nregs = 20; 108 *arr = LibVEX_Alloc(*nregs * sizeof(HReg)); 109 (*arr)[0] = hregX86_EAX(); 110 (*arr)[1] = hregX86_EBX(); 111 (*arr)[2] = hregX86_ECX(); 112 (*arr)[3] = hregX86_EDX(); 113 (*arr)[4] = hregX86_ESI(); 114 (*arr)[5] = hregX86_EDI(); 115 (*arr)[6] = hregX86_FAKE0(); 116 (*arr)[7] = hregX86_FAKE1(); 117 (*arr)[8] = hregX86_FAKE2(); 118 (*arr)[9] = hregX86_FAKE3(); 119 (*arr)[10] = hregX86_FAKE4(); 120 (*arr)[11] = hregX86_FAKE5(); 121 (*arr)[12] = hregX86_XMM0(); 122 (*arr)[13] = hregX86_XMM1(); 123 (*arr)[14] = hregX86_XMM2(); 124 (*arr)[15] = hregX86_XMM3(); 125 (*arr)[16] = hregX86_XMM4(); 126 (*arr)[17] = hregX86_XMM5(); 127 (*arr)[18] = hregX86_XMM6(); 128 (*arr)[19] = hregX86_XMM7(); 129} 130 131 132/* --------- Condition codes, Intel encoding. --------- */ 133 134HChar* showX86CondCode ( X86CondCode cond ) 135{ 136 switch (cond) { 137 case Xcc_O: return "o"; 138 case Xcc_NO: return "no"; 139 case Xcc_B: return "b"; 140 case Xcc_NB: return "nb"; 141 case Xcc_Z: return "z"; 142 case Xcc_NZ: return "nz"; 143 case Xcc_BE: return "be"; 144 case Xcc_NBE: return "nbe"; 145 case Xcc_S: return "s"; 146 case Xcc_NS: return "ns"; 147 case Xcc_P: return "p"; 148 case Xcc_NP: return "np"; 149 case Xcc_L: return "l"; 150 case Xcc_NL: return "nl"; 151 case Xcc_LE: return "le"; 152 case Xcc_NLE: return "nle"; 153 case Xcc_ALWAYS: return "ALWAYS"; 154 default: vpanic("ppX86CondCode"); 155 } 156} 157 158 159/* --------- X86AMode: memory address expressions. --------- */ 160 161X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) { 162 X86AMode* am = LibVEX_Alloc(sizeof(X86AMode)); 163 am->tag = Xam_IR; 164 am->Xam.IR.imm = imm32; 165 am->Xam.IR.reg = reg; 166 return am; 167} 168X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) { 169 X86AMode* am = LibVEX_Alloc(sizeof(X86AMode)); 170 am->tag = Xam_IRRS; 171 am->Xam.IRRS.imm = imm32; 172 am->Xam.IRRS.base = base; 173 am->Xam.IRRS.index = indEx; 174 am->Xam.IRRS.shift = shift; 175 vassert(shift >= 0 && shift <= 3); 176 return am; 177} 178 179X86AMode* dopyX86AMode ( X86AMode* am ) { 180 switch (am->tag) { 181 case Xam_IR: 182 return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg ); 183 case Xam_IRRS: 184 return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base, 185 am->Xam.IRRS.index, am->Xam.IRRS.shift ); 186 default: 187 vpanic("dopyX86AMode"); 188 } 189} 190 191void ppX86AMode ( X86AMode* am ) { 192 switch (am->tag) { 193 case Xam_IR: 194 if (am->Xam.IR.imm == 0) 195 vex_printf("("); 196 else 197 vex_printf("0x%x(", am->Xam.IR.imm); 198 ppHRegX86(am->Xam.IR.reg); 199 vex_printf(")"); 200 return; 201 case Xam_IRRS: 202 vex_printf("0x%x(", am->Xam.IRRS.imm); 203 ppHRegX86(am->Xam.IRRS.base); 204 vex_printf(","); 205 ppHRegX86(am->Xam.IRRS.index); 206 vex_printf(",%d)", 1 << am->Xam.IRRS.shift); 207 return; 208 default: 209 vpanic("ppX86AMode"); 210 } 211} 212 213static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) { 214 switch (am->tag) { 215 case Xam_IR: 216 addHRegUse(u, HRmRead, am->Xam.IR.reg); 217 return; 218 case Xam_IRRS: 219 addHRegUse(u, HRmRead, am->Xam.IRRS.base); 220 addHRegUse(u, HRmRead, am->Xam.IRRS.index); 221 return; 222 default: 223 vpanic("addRegUsage_X86AMode"); 224 } 225} 226 227static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) { 228 switch (am->tag) { 229 case Xam_IR: 230 am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg); 231 return; 232 case Xam_IRRS: 233 am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base); 234 am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index); 235 return; 236 default: 237 vpanic("mapRegs_X86AMode"); 238 } 239} 240 241/* --------- Operand, which can be reg, immediate or memory. --------- */ 242 243X86RMI* X86RMI_Imm ( UInt imm32 ) { 244 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); 245 op->tag = Xrmi_Imm; 246 op->Xrmi.Imm.imm32 = imm32; 247 return op; 248} 249X86RMI* X86RMI_Reg ( HReg reg ) { 250 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); 251 op->tag = Xrmi_Reg; 252 op->Xrmi.Reg.reg = reg; 253 return op; 254} 255X86RMI* X86RMI_Mem ( X86AMode* am ) { 256 X86RMI* op = LibVEX_Alloc(sizeof(X86RMI)); 257 op->tag = Xrmi_Mem; 258 op->Xrmi.Mem.am = am; 259 return op; 260} 261 262void ppX86RMI ( X86RMI* op ) { 263 switch (op->tag) { 264 case Xrmi_Imm: 265 vex_printf("$0x%x", op->Xrmi.Imm.imm32); 266 return; 267 case Xrmi_Reg: 268 ppHRegX86(op->Xrmi.Reg.reg); 269 return; 270 case Xrmi_Mem: 271 ppX86AMode(op->Xrmi.Mem.am); 272 return; 273 default: 274 vpanic("ppX86RMI"); 275 } 276} 277 278/* An X86RMI can only be used in a "read" context (what would it mean 279 to write or modify a literal?) and so we enumerate its registers 280 accordingly. */ 281static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) { 282 switch (op->tag) { 283 case Xrmi_Imm: 284 return; 285 case Xrmi_Reg: 286 addHRegUse(u, HRmRead, op->Xrmi.Reg.reg); 287 return; 288 case Xrmi_Mem: 289 addRegUsage_X86AMode(u, op->Xrmi.Mem.am); 290 return; 291 default: 292 vpanic("addRegUsage_X86RMI"); 293 } 294} 295 296static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) { 297 switch (op->tag) { 298 case Xrmi_Imm: 299 return; 300 case Xrmi_Reg: 301 op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg); 302 return; 303 case Xrmi_Mem: 304 mapRegs_X86AMode(m, op->Xrmi.Mem.am); 305 return; 306 default: 307 vpanic("mapRegs_X86RMI"); 308 } 309} 310 311 312/* --------- Operand, which can be reg or immediate only. --------- */ 313 314X86RI* X86RI_Imm ( UInt imm32 ) { 315 X86RI* op = LibVEX_Alloc(sizeof(X86RI)); 316 op->tag = Xri_Imm; 317 op->Xri.Imm.imm32 = imm32; 318 return op; 319} 320X86RI* X86RI_Reg ( HReg reg ) { 321 X86RI* op = LibVEX_Alloc(sizeof(X86RI)); 322 op->tag = Xri_Reg; 323 op->Xri.Reg.reg = reg; 324 return op; 325} 326 327void ppX86RI ( X86RI* op ) { 328 switch (op->tag) { 329 case Xri_Imm: 330 vex_printf("$0x%x", op->Xri.Imm.imm32); 331 return; 332 case Xri_Reg: 333 ppHRegX86(op->Xri.Reg.reg); 334 return; 335 default: 336 vpanic("ppX86RI"); 337 } 338} 339 340/* An X86RI can only be used in a "read" context (what would it mean 341 to write or modify a literal?) and so we enumerate its registers 342 accordingly. */ 343static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) { 344 switch (op->tag) { 345 case Xri_Imm: 346 return; 347 case Xri_Reg: 348 addHRegUse(u, HRmRead, op->Xri.Reg.reg); 349 return; 350 default: 351 vpanic("addRegUsage_X86RI"); 352 } 353} 354 355static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) { 356 switch (op->tag) { 357 case Xri_Imm: 358 return; 359 case Xri_Reg: 360 op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg); 361 return; 362 default: 363 vpanic("mapRegs_X86RI"); 364 } 365} 366 367 368/* --------- Operand, which can be reg or memory only. --------- */ 369 370X86RM* X86RM_Reg ( HReg reg ) { 371 X86RM* op = LibVEX_Alloc(sizeof(X86RM)); 372 op->tag = Xrm_Reg; 373 op->Xrm.Reg.reg = reg; 374 return op; 375} 376X86RM* X86RM_Mem ( X86AMode* am ) { 377 X86RM* op = LibVEX_Alloc(sizeof(X86RM)); 378 op->tag = Xrm_Mem; 379 op->Xrm.Mem.am = am; 380 return op; 381} 382 383void ppX86RM ( X86RM* op ) { 384 switch (op->tag) { 385 case Xrm_Mem: 386 ppX86AMode(op->Xrm.Mem.am); 387 return; 388 case Xrm_Reg: 389 ppHRegX86(op->Xrm.Reg.reg); 390 return; 391 default: 392 vpanic("ppX86RM"); 393 } 394} 395 396/* Because an X86RM can be both a source or destination operand, we 397 have to supply a mode -- pertaining to the operand as a whole -- 398 indicating how it's being used. */ 399static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) { 400 switch (op->tag) { 401 case Xrm_Mem: 402 /* Memory is read, written or modified. So we just want to 403 know the regs read by the amode. */ 404 addRegUsage_X86AMode(u, op->Xrm.Mem.am); 405 return; 406 case Xrm_Reg: 407 /* reg is read, written or modified. Add it in the 408 appropriate way. */ 409 addHRegUse(u, mode, op->Xrm.Reg.reg); 410 return; 411 default: 412 vpanic("addRegUsage_X86RM"); 413 } 414} 415 416static void mapRegs_X86RM ( HRegRemap* m, X86RM* op ) 417{ 418 switch (op->tag) { 419 case Xrm_Mem: 420 mapRegs_X86AMode(m, op->Xrm.Mem.am); 421 return; 422 case Xrm_Reg: 423 op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg); 424 return; 425 default: 426 vpanic("mapRegs_X86RM"); 427 } 428} 429 430 431/* --------- Instructions. --------- */ 432 433HChar* showX86UnaryOp ( X86UnaryOp op ) { 434 switch (op) { 435 case Xun_NOT: return "not"; 436 case Xun_NEG: return "neg"; 437 default: vpanic("showX86UnaryOp"); 438 } 439} 440 441HChar* showX86AluOp ( X86AluOp op ) { 442 switch (op) { 443 case Xalu_MOV: return "mov"; 444 case Xalu_CMP: return "cmp"; 445 case Xalu_ADD: return "add"; 446 case Xalu_SUB: return "sub"; 447 case Xalu_ADC: return "adc"; 448 case Xalu_SBB: return "sbb"; 449 case Xalu_AND: return "and"; 450 case Xalu_OR: return "or"; 451 case Xalu_XOR: return "xor"; 452 case Xalu_MUL: return "mul"; 453 default: vpanic("showX86AluOp"); 454 } 455} 456 457HChar* showX86ShiftOp ( X86ShiftOp op ) { 458 switch (op) { 459 case Xsh_SHL: return "shl"; 460 case Xsh_SHR: return "shr"; 461 case Xsh_SAR: return "sar"; 462 default: vpanic("showX86ShiftOp"); 463 } 464} 465 466HChar* showX86FpOp ( X86FpOp op ) { 467 switch (op) { 468 case Xfp_ADD: return "add"; 469 case Xfp_SUB: return "sub"; 470 case Xfp_MUL: return "mul"; 471 case Xfp_DIV: return "div"; 472 case Xfp_SCALE: return "scale"; 473 case Xfp_ATAN: return "atan"; 474 case Xfp_YL2X: return "yl2x"; 475 case Xfp_YL2XP1: return "yl2xp1"; 476 case Xfp_PREM: return "prem"; 477 case Xfp_PREM1: return "prem1"; 478 case Xfp_SQRT: return "sqrt"; 479 case Xfp_ABS: return "abs"; 480 case Xfp_NEG: return "chs"; 481 case Xfp_MOV: return "mov"; 482 case Xfp_SIN: return "sin"; 483 case Xfp_COS: return "cos"; 484 case Xfp_TAN: return "tan"; 485 case Xfp_ROUND: return "round"; 486 case Xfp_2XM1: return "2xm1"; 487 default: vpanic("showX86FpOp"); 488 } 489} 490 491HChar* showX86SseOp ( X86SseOp op ) { 492 switch (op) { 493 case Xsse_MOV: return "mov(?!)"; 494 case Xsse_ADDF: return "add"; 495 case Xsse_SUBF: return "sub"; 496 case Xsse_MULF: return "mul"; 497 case Xsse_DIVF: return "div"; 498 case Xsse_MAXF: return "max"; 499 case Xsse_MINF: return "min"; 500 case Xsse_CMPEQF: return "cmpFeq"; 501 case Xsse_CMPLTF: return "cmpFlt"; 502 case Xsse_CMPLEF: return "cmpFle"; 503 case Xsse_CMPUNF: return "cmpFun"; 504 case Xsse_RCPF: return "rcp"; 505 case Xsse_RSQRTF: return "rsqrt"; 506 case Xsse_SQRTF: return "sqrt"; 507 case Xsse_AND: return "and"; 508 case Xsse_OR: return "or"; 509 case Xsse_XOR: return "xor"; 510 case Xsse_ANDN: return "andn"; 511 case Xsse_ADD8: return "paddb"; 512 case Xsse_ADD16: return "paddw"; 513 case Xsse_ADD32: return "paddd"; 514 case Xsse_ADD64: return "paddq"; 515 case Xsse_QADD8U: return "paddusb"; 516 case Xsse_QADD16U: return "paddusw"; 517 case Xsse_QADD8S: return "paddsb"; 518 case Xsse_QADD16S: return "paddsw"; 519 case Xsse_SUB8: return "psubb"; 520 case Xsse_SUB16: return "psubw"; 521 case Xsse_SUB32: return "psubd"; 522 case Xsse_SUB64: return "psubq"; 523 case Xsse_QSUB8U: return "psubusb"; 524 case Xsse_QSUB16U: return "psubusw"; 525 case Xsse_QSUB8S: return "psubsb"; 526 case Xsse_QSUB16S: return "psubsw"; 527 case Xsse_MUL16: return "pmullw"; 528 case Xsse_MULHI16U: return "pmulhuw"; 529 case Xsse_MULHI16S: return "pmulhw"; 530 case Xsse_AVG8U: return "pavgb"; 531 case Xsse_AVG16U: return "pavgw"; 532 case Xsse_MAX16S: return "pmaxw"; 533 case Xsse_MAX8U: return "pmaxub"; 534 case Xsse_MIN16S: return "pminw"; 535 case Xsse_MIN8U: return "pminub"; 536 case Xsse_CMPEQ8: return "pcmpeqb"; 537 case Xsse_CMPEQ16: return "pcmpeqw"; 538 case Xsse_CMPEQ32: return "pcmpeqd"; 539 case Xsse_CMPGT8S: return "pcmpgtb"; 540 case Xsse_CMPGT16S: return "pcmpgtw"; 541 case Xsse_CMPGT32S: return "pcmpgtd"; 542 case Xsse_SHL16: return "psllw"; 543 case Xsse_SHL32: return "pslld"; 544 case Xsse_SHL64: return "psllq"; 545 case Xsse_SHR16: return "psrlw"; 546 case Xsse_SHR32: return "psrld"; 547 case Xsse_SHR64: return "psrlq"; 548 case Xsse_SAR16: return "psraw"; 549 case Xsse_SAR32: return "psrad"; 550 case Xsse_PACKSSD: return "packssdw"; 551 case Xsse_PACKSSW: return "packsswb"; 552 case Xsse_PACKUSW: return "packuswb"; 553 case Xsse_UNPCKHB: return "punpckhb"; 554 case Xsse_UNPCKHW: return "punpckhw"; 555 case Xsse_UNPCKHD: return "punpckhd"; 556 case Xsse_UNPCKHQ: return "punpckhq"; 557 case Xsse_UNPCKLB: return "punpcklb"; 558 case Xsse_UNPCKLW: return "punpcklw"; 559 case Xsse_UNPCKLD: return "punpckld"; 560 case Xsse_UNPCKLQ: return "punpcklq"; 561 default: vpanic("showX86SseOp"); 562 } 563} 564 565X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) { 566 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 567 i->tag = Xin_Alu32R; 568 i->Xin.Alu32R.op = op; 569 i->Xin.Alu32R.src = src; 570 i->Xin.Alu32R.dst = dst; 571 return i; 572} 573X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) { 574 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 575 i->tag = Xin_Alu32M; 576 i->Xin.Alu32M.op = op; 577 i->Xin.Alu32M.src = src; 578 i->Xin.Alu32M.dst = dst; 579 vassert(op != Xalu_MUL); 580 return i; 581} 582X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) { 583 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 584 i->tag = Xin_Sh32; 585 i->Xin.Sh32.op = op; 586 i->Xin.Sh32.src = src; 587 i->Xin.Sh32.dst = dst; 588 return i; 589} 590X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) { 591 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 592 i->tag = Xin_Test32; 593 i->Xin.Test32.imm32 = imm32; 594 i->Xin.Test32.dst = dst; 595 return i; 596} 597X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) { 598 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 599 i->tag = Xin_Unary32; 600 i->Xin.Unary32.op = op; 601 i->Xin.Unary32.dst = dst; 602 return i; 603} 604X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) { 605 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 606 i->tag = Xin_Lea32; 607 i->Xin.Lea32.am = am; 608 i->Xin.Lea32.dst = dst; 609 return i; 610} 611X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) { 612 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 613 i->tag = Xin_MulL; 614 i->Xin.MulL.syned = syned; 615 i->Xin.MulL.src = src; 616 return i; 617} 618X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) { 619 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 620 i->tag = Xin_Div; 621 i->Xin.Div.syned = syned; 622 i->Xin.Div.src = src; 623 return i; 624} 625X86Instr* X86Instr_Sh3232 ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) { 626 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 627 i->tag = Xin_Sh3232; 628 i->Xin.Sh3232.op = op; 629 i->Xin.Sh3232.amt = amt; 630 i->Xin.Sh3232.src = src; 631 i->Xin.Sh3232.dst = dst; 632 vassert(op == Xsh_SHL || op == Xsh_SHR); 633 return i; 634} 635X86Instr* X86Instr_Push( X86RMI* src ) { 636 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 637 i->tag = Xin_Push; 638 i->Xin.Push.src = src; 639 return i; 640} 641X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms ) { 642 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 643 i->tag = Xin_Call; 644 i->Xin.Call.cond = cond; 645 i->Xin.Call.target = target; 646 i->Xin.Call.regparms = regparms; 647 vassert(regparms >= 0 && regparms <= 3); 648 return i; 649} 650X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP, 651 X86CondCode cond, Bool toFastEP ) { 652 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 653 i->tag = Xin_XDirect; 654 i->Xin.XDirect.dstGA = dstGA; 655 i->Xin.XDirect.amEIP = amEIP; 656 i->Xin.XDirect.cond = cond; 657 i->Xin.XDirect.toFastEP = toFastEP; 658 return i; 659} 660X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP, 661 X86CondCode cond ) { 662 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 663 i->tag = Xin_XIndir; 664 i->Xin.XIndir.dstGA = dstGA; 665 i->Xin.XIndir.amEIP = amEIP; 666 i->Xin.XIndir.cond = cond; 667 return i; 668} 669X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP, 670 X86CondCode cond, IRJumpKind jk ) { 671 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 672 i->tag = Xin_XAssisted; 673 i->Xin.XAssisted.dstGA = dstGA; 674 i->Xin.XAssisted.amEIP = amEIP; 675 i->Xin.XAssisted.cond = cond; 676 i->Xin.XAssisted.jk = jk; 677 return i; 678} 679X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) { 680 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 681 i->tag = Xin_CMov32; 682 i->Xin.CMov32.cond = cond; 683 i->Xin.CMov32.src = src; 684 i->Xin.CMov32.dst = dst; 685 vassert(cond != Xcc_ALWAYS); 686 return i; 687} 688X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned, 689 X86AMode* src, HReg dst ) { 690 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 691 i->tag = Xin_LoadEX; 692 i->Xin.LoadEX.szSmall = szSmall; 693 i->Xin.LoadEX.syned = syned; 694 i->Xin.LoadEX.src = src; 695 i->Xin.LoadEX.dst = dst; 696 vassert(szSmall == 1 || szSmall == 2); 697 return i; 698} 699X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) { 700 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 701 i->tag = Xin_Store; 702 i->Xin.Store.sz = sz; 703 i->Xin.Store.src = src; 704 i->Xin.Store.dst = dst; 705 vassert(sz == 1 || sz == 2); 706 return i; 707} 708X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) { 709 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 710 i->tag = Xin_Set32; 711 i->Xin.Set32.cond = cond; 712 i->Xin.Set32.dst = dst; 713 return i; 714} 715X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) { 716 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 717 i->tag = Xin_Bsfr32; 718 i->Xin.Bsfr32.isFwds = isFwds; 719 i->Xin.Bsfr32.src = src; 720 i->Xin.Bsfr32.dst = dst; 721 return i; 722} 723X86Instr* X86Instr_MFence ( UInt hwcaps ) { 724 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 725 i->tag = Xin_MFence; 726 i->Xin.MFence.hwcaps = hwcaps; 727 vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1 728 |VEX_HWCAPS_X86_SSE2 729 |VEX_HWCAPS_X86_SSE3 730 |VEX_HWCAPS_X86_LZCNT))); 731 return i; 732} 733X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) { 734 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 735 i->tag = Xin_ACAS; 736 i->Xin.ACAS.addr = addr; 737 i->Xin.ACAS.sz = sz; 738 vassert(sz == 4 || sz == 2 || sz == 1); 739 return i; 740} 741X86Instr* X86Instr_DACAS ( X86AMode* addr ) { 742 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 743 i->tag = Xin_DACAS; 744 i->Xin.DACAS.addr = addr; 745 return i; 746} 747 748X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) { 749 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 750 i->tag = Xin_FpUnary; 751 i->Xin.FpUnary.op = op; 752 i->Xin.FpUnary.src = src; 753 i->Xin.FpUnary.dst = dst; 754 return i; 755} 756X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) { 757 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 758 i->tag = Xin_FpBinary; 759 i->Xin.FpBinary.op = op; 760 i->Xin.FpBinary.srcL = srcL; 761 i->Xin.FpBinary.srcR = srcR; 762 i->Xin.FpBinary.dst = dst; 763 return i; 764} 765X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) { 766 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 767 i->tag = Xin_FpLdSt; 768 i->Xin.FpLdSt.isLoad = isLoad; 769 i->Xin.FpLdSt.sz = sz; 770 i->Xin.FpLdSt.reg = reg; 771 i->Xin.FpLdSt.addr = addr; 772 vassert(sz == 4 || sz == 8 || sz == 10); 773 return i; 774} 775X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz, 776 HReg reg, X86AMode* addr ) { 777 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 778 i->tag = Xin_FpLdStI; 779 i->Xin.FpLdStI.isLoad = isLoad; 780 i->Xin.FpLdStI.sz = sz; 781 i->Xin.FpLdStI.reg = reg; 782 i->Xin.FpLdStI.addr = addr; 783 vassert(sz == 2 || sz == 4 || sz == 8); 784 return i; 785} 786X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) { 787 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 788 i->tag = Xin_Fp64to32; 789 i->Xin.Fp64to32.src = src; 790 i->Xin.Fp64to32.dst = dst; 791 return i; 792} 793X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) { 794 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 795 i->tag = Xin_FpCMov; 796 i->Xin.FpCMov.cond = cond; 797 i->Xin.FpCMov.src = src; 798 i->Xin.FpCMov.dst = dst; 799 vassert(cond != Xcc_ALWAYS); 800 return i; 801} 802X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) { 803 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 804 i->tag = Xin_FpLdCW; 805 i->Xin.FpLdCW.addr = addr; 806 return i; 807} 808X86Instr* X86Instr_FpStSW_AX ( void ) { 809 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 810 i->tag = Xin_FpStSW_AX; 811 return i; 812} 813X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) { 814 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 815 i->tag = Xin_FpCmp; 816 i->Xin.FpCmp.srcL = srcL; 817 i->Xin.FpCmp.srcR = srcR; 818 i->Xin.FpCmp.dst = dst; 819 return i; 820} 821X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) { 822 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 823 i->tag = Xin_SseConst; 824 i->Xin.SseConst.con = con; 825 i->Xin.SseConst.dst = dst; 826 vassert(hregClass(dst) == HRcVec128); 827 return i; 828} 829X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) { 830 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 831 i->tag = Xin_SseLdSt; 832 i->Xin.SseLdSt.isLoad = isLoad; 833 i->Xin.SseLdSt.reg = reg; 834 i->Xin.SseLdSt.addr = addr; 835 return i; 836} 837X86Instr* X86Instr_SseLdzLO ( Int sz, HReg reg, X86AMode* addr ) 838{ 839 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 840 i->tag = Xin_SseLdzLO; 841 i->Xin.SseLdzLO.sz = toUChar(sz); 842 i->Xin.SseLdzLO.reg = reg; 843 i->Xin.SseLdzLO.addr = addr; 844 vassert(sz == 4 || sz == 8); 845 return i; 846} 847X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) { 848 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 849 i->tag = Xin_Sse32Fx4; 850 i->Xin.Sse32Fx4.op = op; 851 i->Xin.Sse32Fx4.src = src; 852 i->Xin.Sse32Fx4.dst = dst; 853 vassert(op != Xsse_MOV); 854 return i; 855} 856X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) { 857 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 858 i->tag = Xin_Sse32FLo; 859 i->Xin.Sse32FLo.op = op; 860 i->Xin.Sse32FLo.src = src; 861 i->Xin.Sse32FLo.dst = dst; 862 vassert(op != Xsse_MOV); 863 return i; 864} 865X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) { 866 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 867 i->tag = Xin_Sse64Fx2; 868 i->Xin.Sse64Fx2.op = op; 869 i->Xin.Sse64Fx2.src = src; 870 i->Xin.Sse64Fx2.dst = dst; 871 vassert(op != Xsse_MOV); 872 return i; 873} 874X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) { 875 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 876 i->tag = Xin_Sse64FLo; 877 i->Xin.Sse64FLo.op = op; 878 i->Xin.Sse64FLo.src = src; 879 i->Xin.Sse64FLo.dst = dst; 880 vassert(op != Xsse_MOV); 881 return i; 882} 883X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) { 884 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 885 i->tag = Xin_SseReRg; 886 i->Xin.SseReRg.op = op; 887 i->Xin.SseReRg.src = re; 888 i->Xin.SseReRg.dst = rg; 889 return i; 890} 891X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) { 892 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 893 i->tag = Xin_SseCMov; 894 i->Xin.SseCMov.cond = cond; 895 i->Xin.SseCMov.src = src; 896 i->Xin.SseCMov.dst = dst; 897 vassert(cond != Xcc_ALWAYS); 898 return i; 899} 900X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) { 901 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 902 i->tag = Xin_SseShuf; 903 i->Xin.SseShuf.order = order; 904 i->Xin.SseShuf.src = src; 905 i->Xin.SseShuf.dst = dst; 906 vassert(order >= 0 && order <= 0xFF); 907 return i; 908} 909X86Instr* X86Instr_EvCheck ( X86AMode* amCounter, 910 X86AMode* amFailAddr ) { 911 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 912 i->tag = Xin_EvCheck; 913 i->Xin.EvCheck.amCounter = amCounter; 914 i->Xin.EvCheck.amFailAddr = amFailAddr; 915 return i; 916} 917X86Instr* X86Instr_ProfInc ( void ) { 918 X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); 919 i->tag = Xin_ProfInc; 920 return i; 921} 922 923void ppX86Instr ( X86Instr* i, Bool mode64 ) { 924 vassert(mode64 == False); 925 switch (i->tag) { 926 case Xin_Alu32R: 927 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op)); 928 ppX86RMI(i->Xin.Alu32R.src); 929 vex_printf(","); 930 ppHRegX86(i->Xin.Alu32R.dst); 931 return; 932 case Xin_Alu32M: 933 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op)); 934 ppX86RI(i->Xin.Alu32M.src); 935 vex_printf(","); 936 ppX86AMode(i->Xin.Alu32M.dst); 937 return; 938 case Xin_Sh32: 939 vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op)); 940 if (i->Xin.Sh32.src == 0) 941 vex_printf("%%cl,"); 942 else 943 vex_printf("$%d,", (Int)i->Xin.Sh32.src); 944 ppHRegX86(i->Xin.Sh32.dst); 945 return; 946 case Xin_Test32: 947 vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32); 948 ppX86RM(i->Xin.Test32.dst); 949 return; 950 case Xin_Unary32: 951 vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op)); 952 ppHRegX86(i->Xin.Unary32.dst); 953 return; 954 case Xin_Lea32: 955 vex_printf("leal "); 956 ppX86AMode(i->Xin.Lea32.am); 957 vex_printf(","); 958 ppHRegX86(i->Xin.Lea32.dst); 959 return; 960 case Xin_MulL: 961 vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u'); 962 ppX86RM(i->Xin.MulL.src); 963 return; 964 case Xin_Div: 965 vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u'); 966 ppX86RM(i->Xin.Div.src); 967 return; 968 case Xin_Sh3232: 969 vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op)); 970 if (i->Xin.Sh3232.amt == 0) 971 vex_printf(" %%cl,"); 972 else 973 vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt); 974 ppHRegX86(i->Xin.Sh3232.src); 975 vex_printf(","); 976 ppHRegX86(i->Xin.Sh3232.dst); 977 return; 978 case Xin_Push: 979 vex_printf("pushl "); 980 ppX86RMI(i->Xin.Push.src); 981 return; 982 case Xin_Call: 983 vex_printf("call%s[%d] ", 984 i->Xin.Call.cond==Xcc_ALWAYS 985 ? "" : showX86CondCode(i->Xin.Call.cond), 986 i->Xin.Call.regparms); 987 vex_printf("0x%x", i->Xin.Call.target); 988 break; 989 case Xin_XDirect: 990 vex_printf("(xDirect) "); 991 vex_printf("if (%%eflags.%s) { ", 992 showX86CondCode(i->Xin.XDirect.cond)); 993 vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA); 994 ppX86AMode(i->Xin.XDirect.amEIP); 995 vex_printf("; "); 996 vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }", 997 i->Xin.XDirect.toFastEP ? "fast" : "slow"); 998 return; 999 case Xin_XIndir: 1000 vex_printf("(xIndir) "); 1001 vex_printf("if (%%eflags.%s) { movl ", 1002 showX86CondCode(i->Xin.XIndir.cond)); 1003 ppHRegX86(i->Xin.XIndir.dstGA); 1004 vex_printf(","); 1005 ppX86AMode(i->Xin.XIndir.amEIP); 1006 vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }"); 1007 return; 1008 case Xin_XAssisted: 1009 vex_printf("(xAssisted) "); 1010 vex_printf("if (%%eflags.%s) { ", 1011 showX86CondCode(i->Xin.XAssisted.cond)); 1012 vex_printf("movl "); 1013 ppHRegX86(i->Xin.XAssisted.dstGA); 1014 vex_printf(","); 1015 ppX86AMode(i->Xin.XAssisted.amEIP); 1016 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp", 1017 (Int)i->Xin.XAssisted.jk); 1018 vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }"); 1019 return; 1020 case Xin_CMov32: 1021 vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond)); 1022 ppX86RM(i->Xin.CMov32.src); 1023 vex_printf(","); 1024 ppHRegX86(i->Xin.CMov32.dst); 1025 return; 1026 case Xin_LoadEX: 1027 vex_printf("mov%c%cl ", 1028 i->Xin.LoadEX.syned ? 's' : 'z', 1029 i->Xin.LoadEX.szSmall==1 ? 'b' : 'w'); 1030 ppX86AMode(i->Xin.LoadEX.src); 1031 vex_printf(","); 1032 ppHRegX86(i->Xin.LoadEX.dst); 1033 return; 1034 case Xin_Store: 1035 vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w'); 1036 ppHRegX86(i->Xin.Store.src); 1037 vex_printf(","); 1038 ppX86AMode(i->Xin.Store.dst); 1039 return; 1040 case Xin_Set32: 1041 vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond)); 1042 ppHRegX86(i->Xin.Set32.dst); 1043 return; 1044 case Xin_Bsfr32: 1045 vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r'); 1046 ppHRegX86(i->Xin.Bsfr32.src); 1047 vex_printf(","); 1048 ppHRegX86(i->Xin.Bsfr32.dst); 1049 return; 1050 case Xin_MFence: 1051 vex_printf("mfence(%s)", 1052 LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps)); 1053 return; 1054 case Xin_ACAS: 1055 vex_printf("lock cmpxchg%c ", 1056 i->Xin.ACAS.sz==1 ? 'b' 1057 : i->Xin.ACAS.sz==2 ? 'w' : 'l'); 1058 vex_printf("{%%eax->%%ebx},"); 1059 ppX86AMode(i->Xin.ACAS.addr); 1060 return; 1061 case Xin_DACAS: 1062 vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},"); 1063 ppX86AMode(i->Xin.DACAS.addr); 1064 return; 1065 case Xin_FpUnary: 1066 vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op)); 1067 ppHRegX86(i->Xin.FpUnary.src); 1068 vex_printf(","); 1069 ppHRegX86(i->Xin.FpUnary.dst); 1070 break; 1071 case Xin_FpBinary: 1072 vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op)); 1073 ppHRegX86(i->Xin.FpBinary.srcL); 1074 vex_printf(","); 1075 ppHRegX86(i->Xin.FpBinary.srcR); 1076 vex_printf(","); 1077 ppHRegX86(i->Xin.FpBinary.dst); 1078 break; 1079 case Xin_FpLdSt: 1080 if (i->Xin.FpLdSt.isLoad) { 1081 vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T' 1082 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); 1083 ppX86AMode(i->Xin.FpLdSt.addr); 1084 vex_printf(", "); 1085 ppHRegX86(i->Xin.FpLdSt.reg); 1086 } else { 1087 vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T' 1088 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F')); 1089 ppHRegX86(i->Xin.FpLdSt.reg); 1090 vex_printf(", "); 1091 ppX86AMode(i->Xin.FpLdSt.addr); 1092 } 1093 return; 1094 case Xin_FpLdStI: 1095 if (i->Xin.FpLdStI.isLoad) { 1096 vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1097 i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1098 ppX86AMode(i->Xin.FpLdStI.addr); 1099 vex_printf(", "); 1100 ppHRegX86(i->Xin.FpLdStI.reg); 1101 } else { 1102 vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" : 1103 i->Xin.FpLdStI.sz==4 ? "l" : "w"); 1104 ppHRegX86(i->Xin.FpLdStI.reg); 1105 vex_printf(", "); 1106 ppX86AMode(i->Xin.FpLdStI.addr); 1107 } 1108 return; 1109 case Xin_Fp64to32: 1110 vex_printf("gdtof "); 1111 ppHRegX86(i->Xin.Fp64to32.src); 1112 vex_printf(","); 1113 ppHRegX86(i->Xin.Fp64to32.dst); 1114 return; 1115 case Xin_FpCMov: 1116 vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond)); 1117 ppHRegX86(i->Xin.FpCMov.src); 1118 vex_printf(","); 1119 ppHRegX86(i->Xin.FpCMov.dst); 1120 return; 1121 case Xin_FpLdCW: 1122 vex_printf("fldcw "); 1123 ppX86AMode(i->Xin.FpLdCW.addr); 1124 return; 1125 case Xin_FpStSW_AX: 1126 vex_printf("fstsw %%ax"); 1127 return; 1128 case Xin_FpCmp: 1129 vex_printf("gcmp "); 1130 ppHRegX86(i->Xin.FpCmp.srcL); 1131 vex_printf(","); 1132 ppHRegX86(i->Xin.FpCmp.srcR); 1133 vex_printf(","); 1134 ppHRegX86(i->Xin.FpCmp.dst); 1135 break; 1136 case Xin_SseConst: 1137 vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con); 1138 ppHRegX86(i->Xin.SseConst.dst); 1139 break; 1140 case Xin_SseLdSt: 1141 vex_printf("movups "); 1142 if (i->Xin.SseLdSt.isLoad) { 1143 ppX86AMode(i->Xin.SseLdSt.addr); 1144 vex_printf(","); 1145 ppHRegX86(i->Xin.SseLdSt.reg); 1146 } else { 1147 ppHRegX86(i->Xin.SseLdSt.reg); 1148 vex_printf(","); 1149 ppX86AMode(i->Xin.SseLdSt.addr); 1150 } 1151 return; 1152 case Xin_SseLdzLO: 1153 vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d"); 1154 ppX86AMode(i->Xin.SseLdzLO.addr); 1155 vex_printf(","); 1156 ppHRegX86(i->Xin.SseLdzLO.reg); 1157 return; 1158 case Xin_Sse32Fx4: 1159 vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op)); 1160 ppHRegX86(i->Xin.Sse32Fx4.src); 1161 vex_printf(","); 1162 ppHRegX86(i->Xin.Sse32Fx4.dst); 1163 return; 1164 case Xin_Sse32FLo: 1165 vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op)); 1166 ppHRegX86(i->Xin.Sse32FLo.src); 1167 vex_printf(","); 1168 ppHRegX86(i->Xin.Sse32FLo.dst); 1169 return; 1170 case Xin_Sse64Fx2: 1171 vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op)); 1172 ppHRegX86(i->Xin.Sse64Fx2.src); 1173 vex_printf(","); 1174 ppHRegX86(i->Xin.Sse64Fx2.dst); 1175 return; 1176 case Xin_Sse64FLo: 1177 vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op)); 1178 ppHRegX86(i->Xin.Sse64FLo.src); 1179 vex_printf(","); 1180 ppHRegX86(i->Xin.Sse64FLo.dst); 1181 return; 1182 case Xin_SseReRg: 1183 vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op)); 1184 ppHRegX86(i->Xin.SseReRg.src); 1185 vex_printf(","); 1186 ppHRegX86(i->Xin.SseReRg.dst); 1187 return; 1188 case Xin_SseCMov: 1189 vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond)); 1190 ppHRegX86(i->Xin.SseCMov.src); 1191 vex_printf(","); 1192 ppHRegX86(i->Xin.SseCMov.dst); 1193 return; 1194 case Xin_SseShuf: 1195 vex_printf("pshufd $0x%x,", i->Xin.SseShuf.order); 1196 ppHRegX86(i->Xin.SseShuf.src); 1197 vex_printf(","); 1198 ppHRegX86(i->Xin.SseShuf.dst); 1199 return; 1200 case Xin_EvCheck: 1201 vex_printf("(evCheck) decl "); 1202 ppX86AMode(i->Xin.EvCheck.amCounter); 1203 vex_printf("; jns nofail; jmp *"); 1204 ppX86AMode(i->Xin.EvCheck.amFailAddr); 1205 vex_printf("; nofail:"); 1206 return; 1207 case Xin_ProfInc: 1208 vex_printf("(profInc) addl $1,NotKnownYet; " 1209 "adcl $0,NotKnownYet+4"); 1210 return; 1211 default: 1212 vpanic("ppX86Instr"); 1213 } 1214} 1215 1216/* --------- Helpers for register allocation. --------- */ 1217 1218void getRegUsage_X86Instr (HRegUsage* u, X86Instr* i, Bool mode64) 1219{ 1220 Bool unary; 1221 vassert(mode64 == False); 1222 initHRegUsage(u); 1223 switch (i->tag) { 1224 case Xin_Alu32R: 1225 addRegUsage_X86RMI(u, i->Xin.Alu32R.src); 1226 if (i->Xin.Alu32R.op == Xalu_MOV) { 1227 addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst); 1228 return; 1229 } 1230 if (i->Xin.Alu32R.op == Xalu_CMP) { 1231 addHRegUse(u, HRmRead, i->Xin.Alu32R.dst); 1232 return; 1233 } 1234 addHRegUse(u, HRmModify, i->Xin.Alu32R.dst); 1235 return; 1236 case Xin_Alu32M: 1237 addRegUsage_X86RI(u, i->Xin.Alu32M.src); 1238 addRegUsage_X86AMode(u, i->Xin.Alu32M.dst); 1239 return; 1240 case Xin_Sh32: 1241 addHRegUse(u, HRmModify, i->Xin.Sh32.dst); 1242 if (i->Xin.Sh32.src == 0) 1243 addHRegUse(u, HRmRead, hregX86_ECX()); 1244 return; 1245 case Xin_Test32: 1246 addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead); 1247 return; 1248 case Xin_Unary32: 1249 addHRegUse(u, HRmModify, i->Xin.Unary32.dst); 1250 return; 1251 case Xin_Lea32: 1252 addRegUsage_X86AMode(u, i->Xin.Lea32.am); 1253 addHRegUse(u, HRmWrite, i->Xin.Lea32.dst); 1254 return; 1255 case Xin_MulL: 1256 addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead); 1257 addHRegUse(u, HRmModify, hregX86_EAX()); 1258 addHRegUse(u, HRmWrite, hregX86_EDX()); 1259 return; 1260 case Xin_Div: 1261 addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead); 1262 addHRegUse(u, HRmModify, hregX86_EAX()); 1263 addHRegUse(u, HRmModify, hregX86_EDX()); 1264 return; 1265 case Xin_Sh3232: 1266 addHRegUse(u, HRmRead, i->Xin.Sh3232.src); 1267 addHRegUse(u, HRmModify, i->Xin.Sh3232.dst); 1268 if (i->Xin.Sh3232.amt == 0) 1269 addHRegUse(u, HRmRead, hregX86_ECX()); 1270 return; 1271 case Xin_Push: 1272 addRegUsage_X86RMI(u, i->Xin.Push.src); 1273 addHRegUse(u, HRmModify, hregX86_ESP()); 1274 return; 1275 case Xin_Call: 1276 /* This is a bit subtle. */ 1277 /* First off, claim it trashes all the caller-saved regs 1278 which fall within the register allocator's jurisdiction. 1279 These I believe to be %eax %ecx %edx and all the xmm 1280 registers. */ 1281 addHRegUse(u, HRmWrite, hregX86_EAX()); 1282 addHRegUse(u, HRmWrite, hregX86_ECX()); 1283 addHRegUse(u, HRmWrite, hregX86_EDX()); 1284 addHRegUse(u, HRmWrite, hregX86_XMM0()); 1285 addHRegUse(u, HRmWrite, hregX86_XMM1()); 1286 addHRegUse(u, HRmWrite, hregX86_XMM2()); 1287 addHRegUse(u, HRmWrite, hregX86_XMM3()); 1288 addHRegUse(u, HRmWrite, hregX86_XMM4()); 1289 addHRegUse(u, HRmWrite, hregX86_XMM5()); 1290 addHRegUse(u, HRmWrite, hregX86_XMM6()); 1291 addHRegUse(u, HRmWrite, hregX86_XMM7()); 1292 /* Now we have to state any parameter-carrying registers 1293 which might be read. This depends on the regparmness. */ 1294 switch (i->Xin.Call.regparms) { 1295 case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/ 1296 case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/ 1297 case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break; 1298 case 0: break; 1299 default: vpanic("getRegUsage_X86Instr:Call:regparms"); 1300 } 1301 /* Finally, there is the issue that the insn trashes a 1302 register because the literal target address has to be 1303 loaded into a register. Fortunately, for the 0/1/2 1304 regparm case, we can use EAX, EDX and ECX respectively, so 1305 this does not cause any further damage. For the 3-regparm 1306 case, we'll have to choose another register arbitrarily -- 1307 since A, D and C are used for parameters -- and so we might 1308 as well choose EDI. */ 1309 if (i->Xin.Call.regparms == 3) 1310 addHRegUse(u, HRmWrite, hregX86_EDI()); 1311 /* Upshot of this is that the assembler really must observe 1312 the here-stated convention of which register to use as an 1313 address temporary, depending on the regparmness: 0==EAX, 1314 1==EDX, 2==ECX, 3==EDI. */ 1315 return; 1316 /* XDirect/XIndir/XAssisted are also a bit subtle. They 1317 conditionally exit the block. Hence we only need to list (1) 1318 the registers that they read, and (2) the registers that they 1319 write in the case where the block is not exited. (2) is 1320 empty, hence only (1) is relevant here. */ 1321 case Xin_XDirect: 1322 addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP); 1323 return; 1324 case Xin_XIndir: 1325 addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA); 1326 addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP); 1327 return; 1328 case Xin_XAssisted: 1329 addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA); 1330 addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP); 1331 return; 1332 case Xin_CMov32: 1333 addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead); 1334 addHRegUse(u, HRmModify, i->Xin.CMov32.dst); 1335 return; 1336 case Xin_LoadEX: 1337 addRegUsage_X86AMode(u, i->Xin.LoadEX.src); 1338 addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst); 1339 return; 1340 case Xin_Store: 1341 addHRegUse(u, HRmRead, i->Xin.Store.src); 1342 addRegUsage_X86AMode(u, i->Xin.Store.dst); 1343 return; 1344 case Xin_Set32: 1345 addHRegUse(u, HRmWrite, i->Xin.Set32.dst); 1346 return; 1347 case Xin_Bsfr32: 1348 addHRegUse(u, HRmRead, i->Xin.Bsfr32.src); 1349 addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst); 1350 return; 1351 case Xin_MFence: 1352 return; 1353 case Xin_ACAS: 1354 addRegUsage_X86AMode(u, i->Xin.ACAS.addr); 1355 addHRegUse(u, HRmRead, hregX86_EBX()); 1356 addHRegUse(u, HRmModify, hregX86_EAX()); 1357 return; 1358 case Xin_DACAS: 1359 addRegUsage_X86AMode(u, i->Xin.DACAS.addr); 1360 addHRegUse(u, HRmRead, hregX86_ECX()); 1361 addHRegUse(u, HRmRead, hregX86_EBX()); 1362 addHRegUse(u, HRmModify, hregX86_EDX()); 1363 addHRegUse(u, HRmModify, hregX86_EAX()); 1364 return; 1365 case Xin_FpUnary: 1366 addHRegUse(u, HRmRead, i->Xin.FpUnary.src); 1367 addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); 1368 return; 1369 case Xin_FpBinary: 1370 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); 1371 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR); 1372 addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst); 1373 return; 1374 case Xin_FpLdSt: 1375 addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr); 1376 addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead, 1377 i->Xin.FpLdSt.reg); 1378 return; 1379 case Xin_FpLdStI: 1380 addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr); 1381 addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead, 1382 i->Xin.FpLdStI.reg); 1383 return; 1384 case Xin_Fp64to32: 1385 addHRegUse(u, HRmRead, i->Xin.Fp64to32.src); 1386 addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst); 1387 return; 1388 case Xin_FpCMov: 1389 addHRegUse(u, HRmRead, i->Xin.FpCMov.src); 1390 addHRegUse(u, HRmModify, i->Xin.FpCMov.dst); 1391 return; 1392 case Xin_FpLdCW: 1393 addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr); 1394 return; 1395 case Xin_FpStSW_AX: 1396 addHRegUse(u, HRmWrite, hregX86_EAX()); 1397 return; 1398 case Xin_FpCmp: 1399 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL); 1400 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR); 1401 addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst); 1402 addHRegUse(u, HRmWrite, hregX86_EAX()); 1403 return; 1404 case Xin_SseLdSt: 1405 addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr); 1406 addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead, 1407 i->Xin.SseLdSt.reg); 1408 return; 1409 case Xin_SseLdzLO: 1410 addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr); 1411 addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg); 1412 return; 1413 case Xin_SseConst: 1414 addHRegUse(u, HRmWrite, i->Xin.SseConst.dst); 1415 return; 1416 case Xin_Sse32Fx4: 1417 vassert(i->Xin.Sse32Fx4.op != Xsse_MOV); 1418 unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF 1419 || i->Xin.Sse32Fx4.op == Xsse_RSQRTF 1420 || i->Xin.Sse32Fx4.op == Xsse_SQRTF ); 1421 addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src); 1422 addHRegUse(u, unary ? HRmWrite : HRmModify, 1423 i->Xin.Sse32Fx4.dst); 1424 return; 1425 case Xin_Sse32FLo: 1426 vassert(i->Xin.Sse32FLo.op != Xsse_MOV); 1427 unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF 1428 || i->Xin.Sse32FLo.op == Xsse_RSQRTF 1429 || i->Xin.Sse32FLo.op == Xsse_SQRTF ); 1430 addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src); 1431 addHRegUse(u, unary ? HRmWrite : HRmModify, 1432 i->Xin.Sse32FLo.dst); 1433 return; 1434 case Xin_Sse64Fx2: 1435 vassert(i->Xin.Sse64Fx2.op != Xsse_MOV); 1436 unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF 1437 || i->Xin.Sse64Fx2.op == Xsse_RSQRTF 1438 || i->Xin.Sse64Fx2.op == Xsse_SQRTF ); 1439 addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src); 1440 addHRegUse(u, unary ? HRmWrite : HRmModify, 1441 i->Xin.Sse64Fx2.dst); 1442 return; 1443 case Xin_Sse64FLo: 1444 vassert(i->Xin.Sse64FLo.op != Xsse_MOV); 1445 unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF 1446 || i->Xin.Sse64FLo.op == Xsse_RSQRTF 1447 || i->Xin.Sse64FLo.op == Xsse_SQRTF ); 1448 addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src); 1449 addHRegUse(u, unary ? HRmWrite : HRmModify, 1450 i->Xin.Sse64FLo.dst); 1451 return; 1452 case Xin_SseReRg: 1453 if (i->Xin.SseReRg.op == Xsse_XOR 1454 && i->Xin.SseReRg.src == i->Xin.SseReRg.dst) { 1455 /* reg-alloc needs to understand 'xor r,r' as a write of r */ 1456 /* (as opposed to a rite of passage :-) */ 1457 addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst); 1458 } else { 1459 addHRegUse(u, HRmRead, i->Xin.SseReRg.src); 1460 addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV 1461 ? HRmWrite : HRmModify, 1462 i->Xin.SseReRg.dst); 1463 } 1464 return; 1465 case Xin_SseCMov: 1466 addHRegUse(u, HRmRead, i->Xin.SseCMov.src); 1467 addHRegUse(u, HRmModify, i->Xin.SseCMov.dst); 1468 return; 1469 case Xin_SseShuf: 1470 addHRegUse(u, HRmRead, i->Xin.SseShuf.src); 1471 addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst); 1472 return; 1473 case Xin_EvCheck: 1474 /* We expect both amodes only to mention %ebp, so this is in 1475 fact pointless, since %ebp isn't allocatable, but anyway.. */ 1476 addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter); 1477 addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr); 1478 return; 1479 case Xin_ProfInc: 1480 /* does not use any registers. */ 1481 return; 1482 default: 1483 ppX86Instr(i, False); 1484 vpanic("getRegUsage_X86Instr"); 1485 } 1486} 1487 1488/* local helper */ 1489static void mapReg( HRegRemap* m, HReg* r ) 1490{ 1491 *r = lookupHRegRemap(m, *r); 1492} 1493 1494void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) 1495{ 1496 vassert(mode64 == False); 1497 switch (i->tag) { 1498 case Xin_Alu32R: 1499 mapRegs_X86RMI(m, i->Xin.Alu32R.src); 1500 mapReg(m, &i->Xin.Alu32R.dst); 1501 return; 1502 case Xin_Alu32M: 1503 mapRegs_X86RI(m, i->Xin.Alu32M.src); 1504 mapRegs_X86AMode(m, i->Xin.Alu32M.dst); 1505 return; 1506 case Xin_Sh32: 1507 mapReg(m, &i->Xin.Sh32.dst); 1508 return; 1509 case Xin_Test32: 1510 mapRegs_X86RM(m, i->Xin.Test32.dst); 1511 return; 1512 case Xin_Unary32: 1513 mapReg(m, &i->Xin.Unary32.dst); 1514 return; 1515 case Xin_Lea32: 1516 mapRegs_X86AMode(m, i->Xin.Lea32.am); 1517 mapReg(m, &i->Xin.Lea32.dst); 1518 return; 1519 case Xin_MulL: 1520 mapRegs_X86RM(m, i->Xin.MulL.src); 1521 return; 1522 case Xin_Div: 1523 mapRegs_X86RM(m, i->Xin.Div.src); 1524 return; 1525 case Xin_Sh3232: 1526 mapReg(m, &i->Xin.Sh3232.src); 1527 mapReg(m, &i->Xin.Sh3232.dst); 1528 return; 1529 case Xin_Push: 1530 mapRegs_X86RMI(m, i->Xin.Push.src); 1531 return; 1532 case Xin_Call: 1533 return; 1534 case Xin_XDirect: 1535 mapRegs_X86AMode(m, i->Xin.XDirect.amEIP); 1536 return; 1537 case Xin_XIndir: 1538 mapReg(m, &i->Xin.XIndir.dstGA); 1539 mapRegs_X86AMode(m, i->Xin.XIndir.amEIP); 1540 return; 1541 case Xin_XAssisted: 1542 mapReg(m, &i->Xin.XAssisted.dstGA); 1543 mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP); 1544 return; 1545 case Xin_CMov32: 1546 mapRegs_X86RM(m, i->Xin.CMov32.src); 1547 mapReg(m, &i->Xin.CMov32.dst); 1548 return; 1549 case Xin_LoadEX: 1550 mapRegs_X86AMode(m, i->Xin.LoadEX.src); 1551 mapReg(m, &i->Xin.LoadEX.dst); 1552 return; 1553 case Xin_Store: 1554 mapReg(m, &i->Xin.Store.src); 1555 mapRegs_X86AMode(m, i->Xin.Store.dst); 1556 return; 1557 case Xin_Set32: 1558 mapReg(m, &i->Xin.Set32.dst); 1559 return; 1560 case Xin_Bsfr32: 1561 mapReg(m, &i->Xin.Bsfr32.src); 1562 mapReg(m, &i->Xin.Bsfr32.dst); 1563 return; 1564 case Xin_MFence: 1565 return; 1566 case Xin_ACAS: 1567 mapRegs_X86AMode(m, i->Xin.ACAS.addr); 1568 return; 1569 case Xin_DACAS: 1570 mapRegs_X86AMode(m, i->Xin.DACAS.addr); 1571 return; 1572 case Xin_FpUnary: 1573 mapReg(m, &i->Xin.FpUnary.src); 1574 mapReg(m, &i->Xin.FpUnary.dst); 1575 return; 1576 case Xin_FpBinary: 1577 mapReg(m, &i->Xin.FpBinary.srcL); 1578 mapReg(m, &i->Xin.FpBinary.srcR); 1579 mapReg(m, &i->Xin.FpBinary.dst); 1580 return; 1581 case Xin_FpLdSt: 1582 mapRegs_X86AMode(m, i->Xin.FpLdSt.addr); 1583 mapReg(m, &i->Xin.FpLdSt.reg); 1584 return; 1585 case Xin_FpLdStI: 1586 mapRegs_X86AMode(m, i->Xin.FpLdStI.addr); 1587 mapReg(m, &i->Xin.FpLdStI.reg); 1588 return; 1589 case Xin_Fp64to32: 1590 mapReg(m, &i->Xin.Fp64to32.src); 1591 mapReg(m, &i->Xin.Fp64to32.dst); 1592 return; 1593 case Xin_FpCMov: 1594 mapReg(m, &i->Xin.FpCMov.src); 1595 mapReg(m, &i->Xin.FpCMov.dst); 1596 return; 1597 case Xin_FpLdCW: 1598 mapRegs_X86AMode(m, i->Xin.FpLdCW.addr); 1599 return; 1600 case Xin_FpStSW_AX: 1601 return; 1602 case Xin_FpCmp: 1603 mapReg(m, &i->Xin.FpCmp.srcL); 1604 mapReg(m, &i->Xin.FpCmp.srcR); 1605 mapReg(m, &i->Xin.FpCmp.dst); 1606 return; 1607 case Xin_SseConst: 1608 mapReg(m, &i->Xin.SseConst.dst); 1609 return; 1610 case Xin_SseLdSt: 1611 mapReg(m, &i->Xin.SseLdSt.reg); 1612 mapRegs_X86AMode(m, i->Xin.SseLdSt.addr); 1613 break; 1614 case Xin_SseLdzLO: 1615 mapReg(m, &i->Xin.SseLdzLO.reg); 1616 mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr); 1617 break; 1618 case Xin_Sse32Fx4: 1619 mapReg(m, &i->Xin.Sse32Fx4.src); 1620 mapReg(m, &i->Xin.Sse32Fx4.dst); 1621 return; 1622 case Xin_Sse32FLo: 1623 mapReg(m, &i->Xin.Sse32FLo.src); 1624 mapReg(m, &i->Xin.Sse32FLo.dst); 1625 return; 1626 case Xin_Sse64Fx2: 1627 mapReg(m, &i->Xin.Sse64Fx2.src); 1628 mapReg(m, &i->Xin.Sse64Fx2.dst); 1629 return; 1630 case Xin_Sse64FLo: 1631 mapReg(m, &i->Xin.Sse64FLo.src); 1632 mapReg(m, &i->Xin.Sse64FLo.dst); 1633 return; 1634 case Xin_SseReRg: 1635 mapReg(m, &i->Xin.SseReRg.src); 1636 mapReg(m, &i->Xin.SseReRg.dst); 1637 return; 1638 case Xin_SseCMov: 1639 mapReg(m, &i->Xin.SseCMov.src); 1640 mapReg(m, &i->Xin.SseCMov.dst); 1641 return; 1642 case Xin_SseShuf: 1643 mapReg(m, &i->Xin.SseShuf.src); 1644 mapReg(m, &i->Xin.SseShuf.dst); 1645 return; 1646 case Xin_EvCheck: 1647 /* We expect both amodes only to mention %ebp, so this is in 1648 fact pointless, since %ebp isn't allocatable, but anyway.. */ 1649 mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter); 1650 mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr); 1651 return; 1652 case Xin_ProfInc: 1653 /* does not use any registers. */ 1654 return; 1655 1656 default: 1657 ppX86Instr(i, mode64); 1658 vpanic("mapRegs_X86Instr"); 1659 } 1660} 1661 1662/* Figure out if i represents a reg-reg move, and if so assign the 1663 source and destination to *src and *dst. If in doubt say No. Used 1664 by the register allocator to do move coalescing. 1665*/ 1666Bool isMove_X86Instr ( X86Instr* i, HReg* src, HReg* dst ) 1667{ 1668 /* Moves between integer regs */ 1669 if (i->tag == Xin_Alu32R) { 1670 if (i->Xin.Alu32R.op != Xalu_MOV) 1671 return False; 1672 if (i->Xin.Alu32R.src->tag != Xrmi_Reg) 1673 return False; 1674 *src = i->Xin.Alu32R.src->Xrmi.Reg.reg; 1675 *dst = i->Xin.Alu32R.dst; 1676 return True; 1677 } 1678 /* Moves between FP regs */ 1679 if (i->tag == Xin_FpUnary) { 1680 if (i->Xin.FpUnary.op != Xfp_MOV) 1681 return False; 1682 *src = i->Xin.FpUnary.src; 1683 *dst = i->Xin.FpUnary.dst; 1684 return True; 1685 } 1686 if (i->tag == Xin_SseReRg) { 1687 if (i->Xin.SseReRg.op != Xsse_MOV) 1688 return False; 1689 *src = i->Xin.SseReRg.src; 1690 *dst = i->Xin.SseReRg.dst; 1691 return True; 1692 } 1693 return False; 1694} 1695 1696 1697/* Generate x86 spill/reload instructions under the direction of the 1698 register allocator. Note it's critical these don't write the 1699 condition codes. */ 1700 1701void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1702 HReg rreg, Int offsetB, Bool mode64 ) 1703{ 1704 X86AMode* am; 1705 vassert(offsetB >= 0); 1706 vassert(!hregIsVirtual(rreg)); 1707 vassert(mode64 == False); 1708 *i1 = *i2 = NULL; 1709 am = X86AMode_IR(offsetB, hregX86_EBP()); 1710 switch (hregClass(rreg)) { 1711 case HRcInt32: 1712 *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am ); 1713 return; 1714 case HRcFlt64: 1715 *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am ); 1716 return; 1717 case HRcVec128: 1718 *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am ); 1719 return; 1720 default: 1721 ppHRegClass(hregClass(rreg)); 1722 vpanic("genSpill_X86: unimplemented regclass"); 1723 } 1724} 1725 1726void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, 1727 HReg rreg, Int offsetB, Bool mode64 ) 1728{ 1729 X86AMode* am; 1730 vassert(offsetB >= 0); 1731 vassert(!hregIsVirtual(rreg)); 1732 vassert(mode64 == False); 1733 *i1 = *i2 = NULL; 1734 am = X86AMode_IR(offsetB, hregX86_EBP()); 1735 switch (hregClass(rreg)) { 1736 case HRcInt32: 1737 *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg ); 1738 return; 1739 case HRcFlt64: 1740 *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am ); 1741 return; 1742 case HRcVec128: 1743 *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am ); 1744 return; 1745 default: 1746 ppHRegClass(hregClass(rreg)); 1747 vpanic("genReload_X86: unimplemented regclass"); 1748 } 1749} 1750 1751/* The given instruction reads the specified vreg exactly once, and 1752 that vreg is currently located at the given spill offset. If 1753 possible, return a variant of the instruction to one which instead 1754 references the spill slot directly. */ 1755 1756X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off ) 1757{ 1758 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ 1759 1760 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg 1761 Convert to: src=RMI_Mem, dst=Reg 1762 */ 1763 if (i->tag == Xin_Alu32R 1764 && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR 1765 || i->Xin.Alu32R.op == Xalu_XOR) 1766 && i->Xin.Alu32R.src->tag == Xrmi_Reg 1767 && i->Xin.Alu32R.src->Xrmi.Reg.reg == vreg) { 1768 vassert(i->Xin.Alu32R.dst != vreg); 1769 return X86Instr_Alu32R( 1770 i->Xin.Alu32R.op, 1771 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())), 1772 i->Xin.Alu32R.dst 1773 ); 1774 } 1775 1776 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg 1777 Convert to: src=RI_Imm, dst=Mem 1778 */ 1779 if (i->tag == Xin_Alu32R 1780 && (i->Xin.Alu32R.op == Xalu_CMP) 1781 && i->Xin.Alu32R.src->tag == Xrmi_Imm 1782 && i->Xin.Alu32R.dst == vreg) { 1783 return X86Instr_Alu32M( 1784 i->Xin.Alu32R.op, 1785 X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ), 1786 X86AMode_IR( spill_off, hregX86_EBP()) 1787 ); 1788 } 1789 1790 /* Deal with form: Push(RMI_Reg) 1791 Convert to: Push(RMI_Mem) 1792 */ 1793 if (i->tag == Xin_Push 1794 && i->Xin.Push.src->tag == Xrmi_Reg 1795 && i->Xin.Push.src->Xrmi.Reg.reg == vreg) { 1796 return X86Instr_Push( 1797 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())) 1798 ); 1799 } 1800 1801 /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src 1802 Convert to CMov32(RM_Mem, dst) */ 1803 if (i->tag == Xin_CMov32 1804 && i->Xin.CMov32.src->tag == Xrm_Reg 1805 && i->Xin.CMov32.src->Xrm.Reg.reg == vreg) { 1806 vassert(i->Xin.CMov32.dst != vreg); 1807 return X86Instr_CMov32( 1808 i->Xin.CMov32.cond, 1809 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )), 1810 i->Xin.CMov32.dst 1811 ); 1812 } 1813 1814 /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */ 1815 if (i->tag == Xin_Test32 1816 && i->Xin.Test32.dst->tag == Xrm_Reg 1817 && i->Xin.Test32.dst->Xrm.Reg.reg == vreg) { 1818 return X86Instr_Test32( 1819 i->Xin.Test32.imm32, 1820 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) ) 1821 ); 1822 } 1823 1824 return NULL; 1825} 1826 1827 1828/* --------- The x86 assembler (bleh.) --------- */ 1829 1830static UChar iregNo ( HReg r ) 1831{ 1832 UInt n; 1833 vassert(hregClass(r) == HRcInt32); 1834 vassert(!hregIsVirtual(r)); 1835 n = hregNumber(r); 1836 vassert(n <= 7); 1837 return toUChar(n); 1838} 1839 1840static UInt fregNo ( HReg r ) 1841{ 1842 UInt n; 1843 vassert(hregClass(r) == HRcFlt64); 1844 vassert(!hregIsVirtual(r)); 1845 n = hregNumber(r); 1846 vassert(n <= 5); 1847 return n; 1848} 1849 1850static UInt vregNo ( HReg r ) 1851{ 1852 UInt n; 1853 vassert(hregClass(r) == HRcVec128); 1854 vassert(!hregIsVirtual(r)); 1855 n = hregNumber(r); 1856 vassert(n <= 7); 1857 return n; 1858} 1859 1860static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem ) 1861{ 1862 return toUChar( ((mod & 3) << 6) 1863 | ((reg & 7) << 3) 1864 | (regmem & 7) ); 1865} 1866 1867static UChar mkSIB ( Int shift, Int regindex, Int regbase ) 1868{ 1869 return toUChar( ((shift & 3) << 6) 1870 | ((regindex & 7) << 3) 1871 | (regbase & 7) ); 1872} 1873 1874static UChar* emit32 ( UChar* p, UInt w32 ) 1875{ 1876 *p++ = toUChar( w32 & 0x000000FF); 1877 *p++ = toUChar((w32 >> 8) & 0x000000FF); 1878 *p++ = toUChar((w32 >> 16) & 0x000000FF); 1879 *p++ = toUChar((w32 >> 24) & 0x000000FF); 1880 return p; 1881} 1882 1883/* Does a sign-extend of the lowest 8 bits give 1884 the original number? */ 1885static Bool fits8bits ( UInt w32 ) 1886{ 1887 Int i32 = (Int)w32; 1888 return toBool(i32 == ((i32 << 24) >> 24)); 1889} 1890 1891 1892/* Forming mod-reg-rm bytes and scale-index-base bytes. 1893 1894 greg, 0(ereg) | ereg != ESP && ereg != EBP 1895 = 00 greg ereg 1896 1897 greg, d8(ereg) | ereg != ESP 1898 = 01 greg ereg, d8 1899 1900 greg, d32(ereg) | ereg != ESP 1901 = 10 greg ereg, d32 1902 1903 greg, d8(%esp) = 01 greg 100, 0x24, d8 1904 1905 ----------------------------------------------- 1906 1907 greg, d8(base,index,scale) 1908 | index != ESP 1909 = 01 greg 100, scale index base, d8 1910 1911 greg, d32(base,index,scale) 1912 | index != ESP 1913 = 10 greg 100, scale index base, d32 1914*/ 1915static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am ) 1916{ 1917 if (am->tag == Xam_IR) { 1918 if (am->Xam.IR.imm == 0 1919 && am->Xam.IR.reg != hregX86_ESP() 1920 && am->Xam.IR.reg != hregX86_EBP() ) { 1921 *p++ = mkModRegRM(0, iregNo(greg), iregNo(am->Xam.IR.reg)); 1922 return p; 1923 } 1924 if (fits8bits(am->Xam.IR.imm) 1925 && am->Xam.IR.reg != hregX86_ESP()) { 1926 *p++ = mkModRegRM(1, iregNo(greg), iregNo(am->Xam.IR.reg)); 1927 *p++ = toUChar(am->Xam.IR.imm & 0xFF); 1928 return p; 1929 } 1930 if (am->Xam.IR.reg != hregX86_ESP()) { 1931 *p++ = mkModRegRM(2, iregNo(greg), iregNo(am->Xam.IR.reg)); 1932 p = emit32(p, am->Xam.IR.imm); 1933 return p; 1934 } 1935 if (am->Xam.IR.reg == hregX86_ESP() 1936 && fits8bits(am->Xam.IR.imm)) { 1937 *p++ = mkModRegRM(1, iregNo(greg), 4); 1938 *p++ = 0x24; 1939 *p++ = toUChar(am->Xam.IR.imm & 0xFF); 1940 return p; 1941 } 1942 ppX86AMode(am); 1943 vpanic("doAMode_M: can't emit amode IR"); 1944 /*NOTREACHED*/ 1945 } 1946 if (am->tag == Xam_IRRS) { 1947 if (fits8bits(am->Xam.IRRS.imm) 1948 && am->Xam.IRRS.index != hregX86_ESP()) { 1949 *p++ = mkModRegRM(1, iregNo(greg), 4); 1950 *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index, 1951 am->Xam.IRRS.base); 1952 *p++ = toUChar(am->Xam.IRRS.imm & 0xFF); 1953 return p; 1954 } 1955 if (am->Xam.IRRS.index != hregX86_ESP()) { 1956 *p++ = mkModRegRM(2, iregNo(greg), 4); 1957 *p++ = mkSIB(am->Xam.IRRS.shift, am->Xam.IRRS.index, 1958 am->Xam.IRRS.base); 1959 p = emit32(p, am->Xam.IRRS.imm); 1960 return p; 1961 } 1962 ppX86AMode(am); 1963 vpanic("doAMode_M: can't emit amode IRRS"); 1964 /*NOTREACHED*/ 1965 } 1966 vpanic("doAMode_M: unknown amode"); 1967 /*NOTREACHED*/ 1968} 1969 1970 1971/* Emit a mod-reg-rm byte when the rm bit denotes a reg. */ 1972static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg ) 1973{ 1974 *p++ = mkModRegRM(3, iregNo(greg), iregNo(ereg)); 1975 return p; 1976} 1977 1978 1979/* Emit ffree %st(7) */ 1980static UChar* do_ffree_st7 ( UChar* p ) 1981{ 1982 *p++ = 0xDD; 1983 *p++ = 0xC7; 1984 return p; 1985} 1986 1987/* Emit fstp %st(i), 1 <= i <= 7 */ 1988static UChar* do_fstp_st ( UChar* p, Int i ) 1989{ 1990 vassert(1 <= i && i <= 7); 1991 *p++ = 0xDD; 1992 *p++ = toUChar(0xD8+i); 1993 return p; 1994} 1995 1996/* Emit fld %st(i), 0 <= i <= 6 */ 1997static UChar* do_fld_st ( UChar* p, Int i ) 1998{ 1999 vassert(0 <= i && i <= 6); 2000 *p++ = 0xD9; 2001 *p++ = toUChar(0xC0+i); 2002 return p; 2003} 2004 2005/* Emit f<op> %st(0) */ 2006static UChar* do_fop1_st ( UChar* p, X86FpOp op ) 2007{ 2008 switch (op) { 2009 case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break; 2010 case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break; 2011 case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; 2012 case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; 2013 case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; 2014 case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break; 2015 case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; 2016 case Xfp_MOV: break; 2017 case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */ 2018 *p++ = 0xD9; *p++ = 0xF2; /* fptan */ 2019 *p++ = 0xD9; *p++ = 0xF7; /* fincstp */ 2020 break; 2021 default: vpanic("do_fop1_st: unknown op"); 2022 } 2023 return p; 2024} 2025 2026/* Emit f<op> %st(i), 1 <= i <= 5 */ 2027static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i ) 2028{ 2029# define fake(_n) mkHReg((_n), HRcInt32, False) 2030 Int subopc; 2031 switch (op) { 2032 case Xfp_ADD: subopc = 0; break; 2033 case Xfp_SUB: subopc = 4; break; 2034 case Xfp_MUL: subopc = 1; break; 2035 case Xfp_DIV: subopc = 6; break; 2036 default: vpanic("do_fop2_st: unknown op"); 2037 } 2038 *p++ = 0xD8; 2039 p = doAMode_R(p, fake(subopc), fake(i)); 2040 return p; 2041# undef fake 2042} 2043 2044/* Push a 32-bit word on the stack. The word depends on tags[3:0]; 2045each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[]. 2046*/ 2047static UChar* push_word_from_tags ( UChar* p, UShort tags ) 2048{ 2049 UInt w; 2050 vassert(0 == (tags & ~0xF)); 2051 if (tags == 0) { 2052 /* pushl $0x00000000 */ 2053 *p++ = 0x6A; 2054 *p++ = 0x00; 2055 } 2056 else 2057 /* pushl $0xFFFFFFFF */ 2058 if (tags == 0xF) { 2059 *p++ = 0x6A; 2060 *p++ = 0xFF; 2061 } else { 2062 vassert(0); /* awaiting test case */ 2063 w = 0; 2064 if (tags & 1) w |= 0x000000FF; 2065 if (tags & 2) w |= 0x0000FF00; 2066 if (tags & 4) w |= 0x00FF0000; 2067 if (tags & 8) w |= 0xFF000000; 2068 *p++ = 0x68; 2069 p = emit32(p, w); 2070 } 2071 return p; 2072} 2073 2074/* Emit an instruction into buf and return the number of bytes used. 2075 Note that buf is not the insn's final place, and therefore it is 2076 imperative to emit position-independent code. If the emitted 2077 instruction was a profiler inc, set *is_profInc to True, else 2078 leave it unchanged. */ 2079 2080Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc, 2081 UChar* buf, Int nbuf, X86Instr* i, 2082 Bool mode64, 2083 void* disp_cp_chain_me_to_slowEP, 2084 void* disp_cp_chain_me_to_fastEP, 2085 void* disp_cp_xindir, 2086 void* disp_cp_xassisted ) 2087{ 2088 UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc; 2089 2090 UInt xtra; 2091 UChar* p = &buf[0]; 2092 UChar* ptmp; 2093 vassert(nbuf >= 32); 2094 vassert(mode64 == False); 2095 2096 /* Wrap an integer as a int register, for use assembling 2097 GrpN insns, in which the greg field is used as a sub-opcode 2098 and does not really contain a register. */ 2099# define fake(_n) mkHReg((_n), HRcInt32, False) 2100 2101 /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */ 2102 2103 switch (i->tag) { 2104 2105 case Xin_Alu32R: 2106 /* Deal specially with MOV */ 2107 if (i->Xin.Alu32R.op == Xalu_MOV) { 2108 switch (i->Xin.Alu32R.src->tag) { 2109 case Xrmi_Imm: 2110 *p++ = toUChar(0xB8 + iregNo(i->Xin.Alu32R.dst)); 2111 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2112 goto done; 2113 case Xrmi_Reg: 2114 *p++ = 0x89; 2115 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, 2116 i->Xin.Alu32R.dst); 2117 goto done; 2118 case Xrmi_Mem: 2119 *p++ = 0x8B; 2120 p = doAMode_M(p, i->Xin.Alu32R.dst, 2121 i->Xin.Alu32R.src->Xrmi.Mem.am); 2122 goto done; 2123 default: 2124 goto bad; 2125 } 2126 } 2127 /* MUL */ 2128 if (i->Xin.Alu32R.op == Xalu_MUL) { 2129 switch (i->Xin.Alu32R.src->tag) { 2130 case Xrmi_Reg: 2131 *p++ = 0x0F; 2132 *p++ = 0xAF; 2133 p = doAMode_R(p, i->Xin.Alu32R.dst, 2134 i->Xin.Alu32R.src->Xrmi.Reg.reg); 2135 goto done; 2136 case Xrmi_Mem: 2137 *p++ = 0x0F; 2138 *p++ = 0xAF; 2139 p = doAMode_M(p, i->Xin.Alu32R.dst, 2140 i->Xin.Alu32R.src->Xrmi.Mem.am); 2141 goto done; 2142 case Xrmi_Imm: 2143 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2144 *p++ = 0x6B; 2145 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); 2146 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2147 } else { 2148 *p++ = 0x69; 2149 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst); 2150 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2151 } 2152 goto done; 2153 default: 2154 goto bad; 2155 } 2156 } 2157 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */ 2158 opc = opc_rr = subopc_imm = opc_imma = 0; 2159 switch (i->Xin.Alu32R.op) { 2160 case Xalu_ADC: opc = 0x13; opc_rr = 0x11; 2161 subopc_imm = 2; opc_imma = 0x15; break; 2162 case Xalu_ADD: opc = 0x03; opc_rr = 0x01; 2163 subopc_imm = 0; opc_imma = 0x05; break; 2164 case Xalu_SUB: opc = 0x2B; opc_rr = 0x29; 2165 subopc_imm = 5; opc_imma = 0x2D; break; 2166 case Xalu_SBB: opc = 0x1B; opc_rr = 0x19; 2167 subopc_imm = 3; opc_imma = 0x1D; break; 2168 case Xalu_AND: opc = 0x23; opc_rr = 0x21; 2169 subopc_imm = 4; opc_imma = 0x25; break; 2170 case Xalu_XOR: opc = 0x33; opc_rr = 0x31; 2171 subopc_imm = 6; opc_imma = 0x35; break; 2172 case Xalu_OR: opc = 0x0B; opc_rr = 0x09; 2173 subopc_imm = 1; opc_imma = 0x0D; break; 2174 case Xalu_CMP: opc = 0x3B; opc_rr = 0x39; 2175 subopc_imm = 7; opc_imma = 0x3D; break; 2176 default: goto bad; 2177 } 2178 switch (i->Xin.Alu32R.src->tag) { 2179 case Xrmi_Imm: 2180 if (i->Xin.Alu32R.dst == hregX86_EAX() 2181 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2182 *p++ = toUChar(opc_imma); 2183 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2184 } else 2185 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) { 2186 *p++ = 0x83; 2187 p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst); 2188 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2189 } else { 2190 *p++ = 0x81; 2191 p = doAMode_R(p, fake(subopc_imm), i->Xin.Alu32R.dst); 2192 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32); 2193 } 2194 goto done; 2195 case Xrmi_Reg: 2196 *p++ = toUChar(opc_rr); 2197 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg, 2198 i->Xin.Alu32R.dst); 2199 goto done; 2200 case Xrmi_Mem: 2201 *p++ = toUChar(opc); 2202 p = doAMode_M(p, i->Xin.Alu32R.dst, 2203 i->Xin.Alu32R.src->Xrmi.Mem.am); 2204 goto done; 2205 default: 2206 goto bad; 2207 } 2208 break; 2209 2210 case Xin_Alu32M: 2211 /* Deal specially with MOV */ 2212 if (i->Xin.Alu32M.op == Xalu_MOV) { 2213 switch (i->Xin.Alu32M.src->tag) { 2214 case Xri_Reg: 2215 *p++ = 0x89; 2216 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2217 i->Xin.Alu32M.dst); 2218 goto done; 2219 case Xri_Imm: 2220 *p++ = 0xC7; 2221 p = doAMode_M(p, fake(0), i->Xin.Alu32M.dst); 2222 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2223 goto done; 2224 default: 2225 goto bad; 2226 } 2227 } 2228 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not 2229 allowed here. */ 2230 opc = subopc_imm = opc_imma = 0; 2231 switch (i->Xin.Alu32M.op) { 2232 case Xalu_ADD: opc = 0x01; subopc_imm = 0; break; 2233 case Xalu_SUB: opc = 0x29; subopc_imm = 5; break; 2234 case Xalu_CMP: opc = 0x39; subopc_imm = 7; break; 2235 default: goto bad; 2236 } 2237 switch (i->Xin.Alu32M.src->tag) { 2238 case Xri_Reg: 2239 *p++ = toUChar(opc); 2240 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg, 2241 i->Xin.Alu32M.dst); 2242 goto done; 2243 case Xri_Imm: 2244 if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) { 2245 *p++ = 0x83; 2246 p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); 2247 *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32); 2248 goto done; 2249 } else { 2250 *p++ = 0x81; 2251 p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst); 2252 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32); 2253 goto done; 2254 } 2255 default: 2256 goto bad; 2257 } 2258 break; 2259 2260 case Xin_Sh32: 2261 opc_cl = opc_imm = subopc = 0; 2262 switch (i->Xin.Sh32.op) { 2263 case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break; 2264 case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break; 2265 case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break; 2266 default: goto bad; 2267 } 2268 if (i->Xin.Sh32.src == 0) { 2269 *p++ = toUChar(opc_cl); 2270 p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst); 2271 } else { 2272 *p++ = toUChar(opc_imm); 2273 p = doAMode_R(p, fake(subopc), i->Xin.Sh32.dst); 2274 *p++ = (UChar)(i->Xin.Sh32.src); 2275 } 2276 goto done; 2277 2278 case Xin_Test32: 2279 if (i->Xin.Test32.dst->tag == Xrm_Reg) { 2280 /* testl $imm32, %reg */ 2281 *p++ = 0xF7; 2282 p = doAMode_R(p, fake(0), i->Xin.Test32.dst->Xrm.Reg.reg); 2283 p = emit32(p, i->Xin.Test32.imm32); 2284 goto done; 2285 } else { 2286 /* testl $imm32, amode */ 2287 *p++ = 0xF7; 2288 p = doAMode_M(p, fake(0), i->Xin.Test32.dst->Xrm.Mem.am); 2289 p = emit32(p, i->Xin.Test32.imm32); 2290 goto done; 2291 } 2292 2293 case Xin_Unary32: 2294 if (i->Xin.Unary32.op == Xun_NOT) { 2295 *p++ = 0xF7; 2296 p = doAMode_R(p, fake(2), i->Xin.Unary32.dst); 2297 goto done; 2298 } 2299 if (i->Xin.Unary32.op == Xun_NEG) { 2300 *p++ = 0xF7; 2301 p = doAMode_R(p, fake(3), i->Xin.Unary32.dst); 2302 goto done; 2303 } 2304 break; 2305 2306 case Xin_Lea32: 2307 *p++ = 0x8D; 2308 p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am); 2309 goto done; 2310 2311 case Xin_MulL: 2312 subopc = i->Xin.MulL.syned ? 5 : 4; 2313 *p++ = 0xF7; 2314 switch (i->Xin.MulL.src->tag) { 2315 case Xrm_Mem: 2316 p = doAMode_M(p, fake(subopc), 2317 i->Xin.MulL.src->Xrm.Mem.am); 2318 goto done; 2319 case Xrm_Reg: 2320 p = doAMode_R(p, fake(subopc), 2321 i->Xin.MulL.src->Xrm.Reg.reg); 2322 goto done; 2323 default: 2324 goto bad; 2325 } 2326 break; 2327 2328 case Xin_Div: 2329 subopc = i->Xin.Div.syned ? 7 : 6; 2330 *p++ = 0xF7; 2331 switch (i->Xin.Div.src->tag) { 2332 case Xrm_Mem: 2333 p = doAMode_M(p, fake(subopc), 2334 i->Xin.Div.src->Xrm.Mem.am); 2335 goto done; 2336 case Xrm_Reg: 2337 p = doAMode_R(p, fake(subopc), 2338 i->Xin.Div.src->Xrm.Reg.reg); 2339 goto done; 2340 default: 2341 goto bad; 2342 } 2343 break; 2344 2345 case Xin_Sh3232: 2346 vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR); 2347 if (i->Xin.Sh3232.amt == 0) { 2348 /* shldl/shrdl by %cl */ 2349 *p++ = 0x0F; 2350 if (i->Xin.Sh3232.op == Xsh_SHL) { 2351 *p++ = 0xA5; 2352 } else { 2353 *p++ = 0xAD; 2354 } 2355 p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst); 2356 goto done; 2357 } 2358 break; 2359 2360 case Xin_Push: 2361 switch (i->Xin.Push.src->tag) { 2362 case Xrmi_Mem: 2363 *p++ = 0xFF; 2364 p = doAMode_M(p, fake(6), i->Xin.Push.src->Xrmi.Mem.am); 2365 goto done; 2366 case Xrmi_Imm: 2367 *p++ = 0x68; 2368 p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32); 2369 goto done; 2370 case Xrmi_Reg: 2371 *p++ = toUChar(0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg)); 2372 goto done; 2373 default: 2374 goto bad; 2375 } 2376 2377 case Xin_Call: 2378 /* See detailed comment for Xin_Call in getRegUsage_X86Instr above 2379 for explanation of this. */ 2380 switch (i->Xin.Call.regparms) { 2381 case 0: irno = iregNo(hregX86_EAX()); break; 2382 case 1: irno = iregNo(hregX86_EDX()); break; 2383 case 2: irno = iregNo(hregX86_ECX()); break; 2384 case 3: irno = iregNo(hregX86_EDI()); break; 2385 default: vpanic(" emit_X86Instr:call:regparms"); 2386 } 2387 /* jump over the following two insns if the condition does not 2388 hold */ 2389 if (i->Xin.Call.cond != Xcc_ALWAYS) { 2390 *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1))); 2391 *p++ = 0x07; /* 7 bytes in the next two insns */ 2392 } 2393 /* movl $target, %tmp */ 2394 *p++ = toUChar(0xB8 + irno); 2395 p = emit32(p, i->Xin.Call.target); 2396 /* call *%tmp */ 2397 *p++ = 0xFF; 2398 *p++ = toUChar(0xD0 + irno); 2399 goto done; 2400 2401 case Xin_XDirect: { 2402 /* NB: what goes on here has to be very closely coordinated with the 2403 chainXDirect_X86 and unchainXDirect_X86 below. */ 2404 /* We're generating chain-me requests here, so we need to be 2405 sure this is actually allowed -- no-redir translations can't 2406 use chain-me's. Hence: */ 2407 vassert(disp_cp_chain_me_to_slowEP != NULL); 2408 vassert(disp_cp_chain_me_to_fastEP != NULL); 2409 2410 /* Use ptmp for backpatching conditional jumps. */ 2411 ptmp = NULL; 2412 2413 /* First off, if this is conditional, create a conditional 2414 jump over the rest of it. */ 2415 if (i->Xin.XDirect.cond != Xcc_ALWAYS) { 2416 /* jmp fwds if !condition */ 2417 *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1))); 2418 ptmp = p; /* fill in this bit later */ 2419 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2420 } 2421 2422 /* Update the guest EIP. */ 2423 /* movl $dstGA, amEIP */ 2424 *p++ = 0xC7; 2425 p = doAMode_M(p, fake(0), i->Xin.XDirect.amEIP); 2426 p = emit32(p, i->Xin.XDirect.dstGA); 2427 2428 /* --- FIRST PATCHABLE BYTE follows --- */ 2429 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling 2430 to) backs up the return address, so as to find the address of 2431 the first patchable byte. So: don't change the length of the 2432 two instructions below. */ 2433 /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */ 2434 *p++ = 0xBA; 2435 void* disp_cp_chain_me 2436 = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP 2437 : disp_cp_chain_me_to_slowEP; 2438 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_chain_me)); 2439 /* call *%edx */ 2440 *p++ = 0xFF; 2441 *p++ = 0xD2; 2442 /* --- END of PATCHABLE BYTES --- */ 2443 2444 /* Fix up the conditional jump, if there was one. */ 2445 if (i->Xin.XDirect.cond != Xcc_ALWAYS) { 2446 Int delta = p - ptmp; 2447 vassert(delta > 0 && delta < 40); 2448 *ptmp = toUChar(delta-1); 2449 } 2450 goto done; 2451 } 2452 2453 case Xin_XIndir: { 2454 /* We're generating transfers that could lead indirectly to a 2455 chain-me, so we need to be sure this is actually allowed -- 2456 no-redir translations are not allowed to reach normal 2457 translations without going through the scheduler. That means 2458 no XDirects or XIndirs out from no-redir translations. 2459 Hence: */ 2460 vassert(disp_cp_xindir != NULL); 2461 2462 /* Use ptmp for backpatching conditional jumps. */ 2463 ptmp = NULL; 2464 2465 /* First off, if this is conditional, create a conditional 2466 jump over the rest of it. */ 2467 if (i->Xin.XIndir.cond != Xcc_ALWAYS) { 2468 /* jmp fwds if !condition */ 2469 *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1))); 2470 ptmp = p; /* fill in this bit later */ 2471 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2472 } 2473 2474 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */ 2475 *p++ = 0x89; 2476 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP); 2477 2478 /* movl $disp_indir, %edx */ 2479 *p++ = 0xBA; 2480 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir)); 2481 /* jmp *%edx */ 2482 *p++ = 0xFF; 2483 *p++ = 0xE2; 2484 2485 /* Fix up the conditional jump, if there was one. */ 2486 if (i->Xin.XIndir.cond != Xcc_ALWAYS) { 2487 Int delta = p - ptmp; 2488 vassert(delta > 0 && delta < 40); 2489 *ptmp = toUChar(delta-1); 2490 } 2491 goto done; 2492 } 2493 2494 case Xin_XAssisted: { 2495 /* Use ptmp for backpatching conditional jumps. */ 2496 ptmp = NULL; 2497 2498 /* First off, if this is conditional, create a conditional 2499 jump over the rest of it. */ 2500 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) { 2501 /* jmp fwds if !condition */ 2502 *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1))); 2503 ptmp = p; /* fill in this bit later */ 2504 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */ 2505 } 2506 2507 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */ 2508 *p++ = 0x89; 2509 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP); 2510 /* movl $magic_number, %ebp. */ 2511 UInt trcval = 0; 2512 switch (i->Xin.XAssisted.jk) { 2513 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break; 2514 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break; 2515 case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break; 2516 case Ijk_Sys_int129: trcval = VEX_TRC_JMP_SYS_INT129; break; 2517 case Ijk_Sys_int130: trcval = VEX_TRC_JMP_SYS_INT130; break; 2518 case Ijk_Sys_sysenter: trcval = VEX_TRC_JMP_SYS_SYSENTER; break; 2519 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break; 2520 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break; 2521 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break; 2522 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break; 2523 case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break; 2524 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; 2525 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; 2526 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; 2527 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; 2528 /* We don't expect to see the following being assisted. */ 2529 case Ijk_Ret: 2530 case Ijk_Call: 2531 /* fallthrough */ 2532 default: 2533 ppIRJumpKind(i->Xin.XAssisted.jk); 2534 vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind"); 2535 } 2536 vassert(trcval != 0); 2537 *p++ = 0xBD; 2538 p = emit32(p, trcval); 2539 2540 /* movl $disp_indir, %edx */ 2541 *p++ = 0xBA; 2542 p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xassisted)); 2543 /* jmp *%edx */ 2544 *p++ = 0xFF; 2545 *p++ = 0xE2; 2546 2547 /* Fix up the conditional jump, if there was one. */ 2548 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) { 2549 Int delta = p - ptmp; 2550 vassert(delta > 0 && delta < 40); 2551 *ptmp = toUChar(delta-1); 2552 } 2553 goto done; 2554 } 2555 2556 case Xin_CMov32: 2557 vassert(i->Xin.CMov32.cond != Xcc_ALWAYS); 2558 2559 /* This generates cmov, which is illegal on P54/P55. */ 2560 /* 2561 *p++ = 0x0F; 2562 *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond)); 2563 if (i->Xin.CMov32.src->tag == Xrm_Reg) { 2564 p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg); 2565 goto done; 2566 } 2567 if (i->Xin.CMov32.src->tag == Xrm_Mem) { 2568 p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am); 2569 goto done; 2570 } 2571 */ 2572 2573 /* Alternative version which works on any x86 variant. */ 2574 /* jmp fwds if !condition */ 2575 *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1)); 2576 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 2577 ptmp = p; 2578 2579 switch (i->Xin.CMov32.src->tag) { 2580 case Xrm_Reg: 2581 /* Big sigh. This is movl E -> G ... */ 2582 *p++ = 0x89; 2583 p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg, 2584 i->Xin.CMov32.dst); 2585 2586 break; 2587 case Xrm_Mem: 2588 /* ... whereas this is movl G -> E. That's why the args 2589 to doAMode_R appear to be the wrong way round in the 2590 Xrm_Reg case. */ 2591 *p++ = 0x8B; 2592 p = doAMode_M(p, i->Xin.CMov32.dst, 2593 i->Xin.CMov32.src->Xrm.Mem.am); 2594 break; 2595 default: 2596 goto bad; 2597 } 2598 /* Fill in the jump offset. */ 2599 *(ptmp-1) = toUChar(p - ptmp); 2600 goto done; 2601 2602 break; 2603 2604 case Xin_LoadEX: 2605 if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) { 2606 /* movzbl */ 2607 *p++ = 0x0F; 2608 *p++ = 0xB6; 2609 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2610 goto done; 2611 } 2612 if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) { 2613 /* movzwl */ 2614 *p++ = 0x0F; 2615 *p++ = 0xB7; 2616 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2617 goto done; 2618 } 2619 if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) { 2620 /* movsbl */ 2621 *p++ = 0x0F; 2622 *p++ = 0xBE; 2623 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src); 2624 goto done; 2625 } 2626 break; 2627 2628 case Xin_Set32: 2629 /* Make the destination register be 1 or 0, depending on whether 2630 the relevant condition holds. We have to dodge and weave 2631 when the destination is %esi or %edi as we cannot directly 2632 emit the native 'setb %reg' for those. Further complication: 2633 the top 24 bits of the destination should be forced to zero, 2634 but doing 'xor %r,%r' kills the flag(s) we are about to read. 2635 Sigh. So start off my moving $0 into the dest. */ 2636 2637 /* Do we need to swap in %eax? */ 2638 if (iregNo(i->Xin.Set32.dst) >= 4) { 2639 /* xchg %eax, %dst */ 2640 *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst)); 2641 /* movl $0, %eax */ 2642 *p++ =toUChar(0xB8 + iregNo(hregX86_EAX())); 2643 p = emit32(p, 0); 2644 /* setb lo8(%eax) */ 2645 *p++ = 0x0F; 2646 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); 2647 p = doAMode_R(p, fake(0), hregX86_EAX()); 2648 /* xchg %eax, %dst */ 2649 *p++ = toUChar(0x90 + iregNo(i->Xin.Set32.dst)); 2650 } else { 2651 /* movl $0, %dst */ 2652 *p++ = toUChar(0xB8 + iregNo(i->Xin.Set32.dst)); 2653 p = emit32(p, 0); 2654 /* setb lo8(%dst) */ 2655 *p++ = 0x0F; 2656 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond)); 2657 p = doAMode_R(p, fake(0), i->Xin.Set32.dst); 2658 } 2659 goto done; 2660 2661 case Xin_Bsfr32: 2662 *p++ = 0x0F; 2663 if (i->Xin.Bsfr32.isFwds) { 2664 *p++ = 0xBC; 2665 } else { 2666 *p++ = 0xBD; 2667 } 2668 p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src); 2669 goto done; 2670 2671 case Xin_MFence: 2672 /* see comment in hdefs.h re this insn */ 2673 if (0) vex_printf("EMIT FENCE\n"); 2674 if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3 2675 |VEX_HWCAPS_X86_SSE2)) { 2676 /* mfence */ 2677 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; 2678 goto done; 2679 } 2680 if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_SSE1) { 2681 /* sfence */ 2682 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8; 2683 /* lock addl $0,0(%esp) */ 2684 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; 2685 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; 2686 goto done; 2687 } 2688 if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) { 2689 /* lock addl $0,0(%esp) */ 2690 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44; 2691 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00; 2692 goto done; 2693 } 2694 vpanic("emit_X86Instr:mfence:hwcaps"); 2695 /*NOTREACHED*/ 2696 break; 2697 2698 case Xin_ACAS: 2699 /* lock */ 2700 *p++ = 0xF0; 2701 /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value 2702 in %ebx. The new-value register is hardwired to be %ebx 2703 since letting it be any integer register gives the problem 2704 that %sil and %dil are unaddressible on x86 and hence we 2705 would have to resort to the same kind of trickery as with 2706 byte-sized Xin.Store, just below. Given that this isn't 2707 performance critical, it is simpler just to force the 2708 register operand to %ebx (could equally be %ecx or %edx). 2709 (Although %ebx is more consistent with cmpxchg8b.) */ 2710 if (i->Xin.ACAS.sz == 2) *p++ = 0x66; 2711 *p++ = 0x0F; 2712 if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1; 2713 p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr); 2714 goto done; 2715 2716 case Xin_DACAS: 2717 /* lock */ 2718 *p++ = 0xF0; 2719 /* cmpxchg8b m64. Expected-value in %edx:%eax, new value 2720 in %ecx:%ebx. All 4 regs are hardwired in the ISA, so 2721 aren't encoded in the insn. */ 2722 *p++ = 0x0F; 2723 *p++ = 0xC7; 2724 p = doAMode_M(p, fake(1), i->Xin.DACAS.addr); 2725 goto done; 2726 2727 case Xin_Store: 2728 if (i->Xin.Store.sz == 2) { 2729 /* This case, at least, is simple, given that we can 2730 reference the low 16 bits of any integer register. */ 2731 *p++ = 0x66; 2732 *p++ = 0x89; 2733 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); 2734 goto done; 2735 } 2736 2737 if (i->Xin.Store.sz == 1) { 2738 /* We have to do complex dodging and weaving if src is not 2739 the low 8 bits of %eax/%ebx/%ecx/%edx. */ 2740 if (iregNo(i->Xin.Store.src) < 4) { 2741 /* we're OK, can do it directly */ 2742 *p++ = 0x88; 2743 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst); 2744 goto done; 2745 } else { 2746 /* Bleh. This means the source is %edi or %esi. Since 2747 the address mode can only mention three registers, at 2748 least one of %eax/%ebx/%ecx/%edx must be available to 2749 temporarily swap the source into, so the store can 2750 happen. So we have to look at the regs mentioned 2751 in the amode. */ 2752 HReg swap = INVALID_HREG; 2753 HReg eax = hregX86_EAX(), ebx = hregX86_EBX(), 2754 ecx = hregX86_ECX(), edx = hregX86_EDX(); 2755 Bool a_ok = True, b_ok = True, c_ok = True, d_ok = True; 2756 HRegUsage u; 2757 Int j; 2758 initHRegUsage(&u); 2759 addRegUsage_X86AMode(&u, i->Xin.Store.dst); 2760 for (j = 0; j < u.n_used; j++) { 2761 HReg r = u.hreg[j]; 2762 if (r == eax) a_ok = False; 2763 if (r == ebx) b_ok = False; 2764 if (r == ecx) c_ok = False; 2765 if (r == edx) d_ok = False; 2766 } 2767 if (a_ok) swap = eax; 2768 if (b_ok) swap = ebx; 2769 if (c_ok) swap = ecx; 2770 if (d_ok) swap = edx; 2771 vassert(swap != INVALID_HREG); 2772 /* xchgl %source, %swap. Could do better if swap is %eax. */ 2773 *p++ = 0x87; 2774 p = doAMode_R(p, i->Xin.Store.src, swap); 2775 /* movb lo8{%swap}, (dst) */ 2776 *p++ = 0x88; 2777 p = doAMode_M(p, swap, i->Xin.Store.dst); 2778 /* xchgl %source, %swap. Could do better if swap is %eax. */ 2779 *p++ = 0x87; 2780 p = doAMode_R(p, i->Xin.Store.src, swap); 2781 goto done; 2782 } 2783 } /* if (i->Xin.Store.sz == 1) */ 2784 break; 2785 2786 case Xin_FpUnary: 2787 /* gop %src, %dst 2788 --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst) 2789 */ 2790 p = do_ffree_st7(p); 2791 p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src)); 2792 p = do_fop1_st(p, i->Xin.FpUnary.op); 2793 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst)); 2794 goto done; 2795 2796 case Xin_FpBinary: 2797 if (i->Xin.FpBinary.op == Xfp_YL2X 2798 || i->Xin.FpBinary.op == Xfp_YL2XP1) { 2799 /* Have to do this specially. */ 2800 /* ffree %st7 ; fld %st(srcL) ; 2801 ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */ 2802 p = do_ffree_st7(p); 2803 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 2804 p = do_ffree_st7(p); 2805 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); 2806 *p++ = 0xD9; 2807 *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9); 2808 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 2809 goto done; 2810 } 2811 if (i->Xin.FpBinary.op == Xfp_ATAN) { 2812 /* Have to do this specially. */ 2813 /* ffree %st7 ; fld %st(srcL) ; 2814 ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */ 2815 p = do_ffree_st7(p); 2816 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 2817 p = do_ffree_st7(p); 2818 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR)); 2819 *p++ = 0xD9; *p++ = 0xF3; 2820 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 2821 goto done; 2822 } 2823 if (i->Xin.FpBinary.op == Xfp_PREM 2824 || i->Xin.FpBinary.op == Xfp_PREM1 2825 || i->Xin.FpBinary.op == Xfp_SCALE) { 2826 /* Have to do this specially. */ 2827 /* ffree %st7 ; fld %st(srcR) ; 2828 ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ; 2829 fincstp ; ffree %st7 */ 2830 p = do_ffree_st7(p); 2831 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR)); 2832 p = do_ffree_st7(p); 2833 p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL)); 2834 *p++ = 0xD9; 2835 switch (i->Xin.FpBinary.op) { 2836 case Xfp_PREM: *p++ = 0xF8; break; 2837 case Xfp_PREM1: *p++ = 0xF5; break; 2838 case Xfp_SCALE: *p++ = 0xFD; break; 2839 default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)"); 2840 } 2841 p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst)); 2842 *p++ = 0xD9; *p++ = 0xF7; 2843 p = do_ffree_st7(p); 2844 goto done; 2845 } 2846 /* General case */ 2847 /* gop %srcL, %srcR, %dst 2848 --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst) 2849 */ 2850 p = do_ffree_st7(p); 2851 p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL)); 2852 p = do_fop2_st(p, i->Xin.FpBinary.op, 2853 1+hregNumber(i->Xin.FpBinary.srcR)); 2854 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst)); 2855 goto done; 2856 2857 case Xin_FpLdSt: 2858 if (i->Xin.FpLdSt.isLoad) { 2859 /* Load from memory into %fakeN. 2860 --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1) 2861 */ 2862 p = do_ffree_st7(p); 2863 switch (i->Xin.FpLdSt.sz) { 2864 case 4: 2865 *p++ = 0xD9; 2866 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); 2867 break; 2868 case 8: 2869 *p++ = 0xDD; 2870 p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr); 2871 break; 2872 case 10: 2873 *p++ = 0xDB; 2874 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdSt.addr); 2875 break; 2876 default: 2877 vpanic("emitX86Instr(FpLdSt,load)"); 2878 } 2879 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg)); 2880 goto done; 2881 } else { 2882 /* Store from %fakeN into memory. 2883 --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode 2884 */ 2885 p = do_ffree_st7(p); 2886 p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg)); 2887 switch (i->Xin.FpLdSt.sz) { 2888 case 4: 2889 *p++ = 0xD9; 2890 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); 2891 break; 2892 case 8: 2893 *p++ = 0xDD; 2894 p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr); 2895 break; 2896 case 10: 2897 *p++ = 0xDB; 2898 p = doAMode_M(p, fake(7)/*subopcode*/, i->Xin.FpLdSt.addr); 2899 break; 2900 default: 2901 vpanic("emitX86Instr(FpLdSt,store)"); 2902 } 2903 goto done; 2904 } 2905 break; 2906 2907 case Xin_FpLdStI: 2908 if (i->Xin.FpLdStI.isLoad) { 2909 /* Load from memory into %fakeN, converting from an int. 2910 --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1) 2911 */ 2912 switch (i->Xin.FpLdStI.sz) { 2913 case 8: opc = 0xDF; subopc_imm = 5; break; 2914 case 4: opc = 0xDB; subopc_imm = 0; break; 2915 case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break; 2916 default: vpanic("emitX86Instr(Xin_FpLdStI-load)"); 2917 } 2918 p = do_ffree_st7(p); 2919 *p++ = toUChar(opc); 2920 p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); 2921 p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg)); 2922 goto done; 2923 } else { 2924 /* Store from %fakeN into memory, converting to an int. 2925 --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode 2926 */ 2927 switch (i->Xin.FpLdStI.sz) { 2928 case 8: opc = 0xDF; subopc_imm = 7; break; 2929 case 4: opc = 0xDB; subopc_imm = 3; break; 2930 case 2: opc = 0xDF; subopc_imm = 3; break; 2931 default: vpanic("emitX86Instr(Xin_FpLdStI-store)"); 2932 } 2933 p = do_ffree_st7(p); 2934 p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg)); 2935 *p++ = toUChar(opc); 2936 p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr); 2937 goto done; 2938 } 2939 break; 2940 2941 case Xin_Fp64to32: 2942 /* ffree %st7 ; fld %st(src) */ 2943 p = do_ffree_st7(p); 2944 p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src)); 2945 /* subl $4, %esp */ 2946 *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04; 2947 /* fstps (%esp) */ 2948 *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24; 2949 /* flds (%esp) */ 2950 *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24; 2951 /* addl $4, %esp */ 2952 *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04; 2953 /* fstp %st(1+dst) */ 2954 p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst)); 2955 goto done; 2956 2957 case Xin_FpCMov: 2958 /* jmp fwds if !condition */ 2959 *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1)); 2960 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 2961 ptmp = p; 2962 2963 /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */ 2964 p = do_ffree_st7(p); 2965 p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src)); 2966 p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst)); 2967 2968 /* Fill in the jump offset. */ 2969 *(ptmp-1) = toUChar(p - ptmp); 2970 goto done; 2971 2972 case Xin_FpLdCW: 2973 *p++ = 0xD9; 2974 p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdCW.addr); 2975 goto done; 2976 2977 case Xin_FpStSW_AX: 2978 /* note, this emits fnstsw %ax, not fstsw %ax */ 2979 *p++ = 0xDF; 2980 *p++ = 0xE0; 2981 goto done; 2982 2983 case Xin_FpCmp: 2984 /* gcmp %fL, %fR, %dst 2985 -> ffree %st7; fpush %fL ; fucomp %(fR+1) ; 2986 fnstsw %ax ; movl %eax, %dst 2987 */ 2988 /* ffree %st7 */ 2989 p = do_ffree_st7(p); 2990 /* fpush %fL */ 2991 p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL)); 2992 /* fucomp %(fR+1) */ 2993 *p++ = 0xDD; 2994 *p++ = toUChar(0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR)))); 2995 /* fnstsw %ax */ 2996 *p++ = 0xDF; 2997 *p++ = 0xE0; 2998 /* movl %eax, %dst */ 2999 *p++ = 0x89; 3000 p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst); 3001 goto done; 3002 3003 case Xin_SseConst: { 3004 UShort con = i->Xin.SseConst.con; 3005 p = push_word_from_tags(p, toUShort((con >> 12) & 0xF)); 3006 p = push_word_from_tags(p, toUShort((con >> 8) & 0xF)); 3007 p = push_word_from_tags(p, toUShort((con >> 4) & 0xF)); 3008 p = push_word_from_tags(p, toUShort(con & 0xF)); 3009 /* movl (%esp), %xmm-dst */ 3010 *p++ = 0x0F; 3011 *p++ = 0x10; 3012 *p++ = toUChar(0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst))); 3013 *p++ = 0x24; 3014 /* addl $16, %esp */ 3015 *p++ = 0x83; 3016 *p++ = 0xC4; 3017 *p++ = 0x10; 3018 goto done; 3019 } 3020 3021 case Xin_SseLdSt: 3022 *p++ = 0x0F; 3023 *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11); 3024 p = doAMode_M(p, fake(vregNo(i->Xin.SseLdSt.reg)), i->Xin.SseLdSt.addr); 3025 goto done; 3026 3027 case Xin_SseLdzLO: 3028 vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8); 3029 /* movs[sd] amode, %xmm-dst */ 3030 *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2); 3031 *p++ = 0x0F; 3032 *p++ = 0x10; 3033 p = doAMode_M(p, fake(vregNo(i->Xin.SseLdzLO.reg)), 3034 i->Xin.SseLdzLO.addr); 3035 goto done; 3036 3037 case Xin_Sse32Fx4: 3038 xtra = 0; 3039 *p++ = 0x0F; 3040 switch (i->Xin.Sse32Fx4.op) { 3041 case Xsse_ADDF: *p++ = 0x58; break; 3042 case Xsse_DIVF: *p++ = 0x5E; break; 3043 case Xsse_MAXF: *p++ = 0x5F; break; 3044 case Xsse_MINF: *p++ = 0x5D; break; 3045 case Xsse_MULF: *p++ = 0x59; break; 3046 case Xsse_RCPF: *p++ = 0x53; break; 3047 case Xsse_RSQRTF: *p++ = 0x52; break; 3048 case Xsse_SQRTF: *p++ = 0x51; break; 3049 case Xsse_SUBF: *p++ = 0x5C; break; 3050 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3051 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3052 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3053 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3054 default: goto bad; 3055 } 3056 p = doAMode_R(p, fake(vregNo(i->Xin.Sse32Fx4.dst)), 3057 fake(vregNo(i->Xin.Sse32Fx4.src)) ); 3058 if (xtra & 0x100) 3059 *p++ = toUChar(xtra & 0xFF); 3060 goto done; 3061 3062 case Xin_Sse64Fx2: 3063 xtra = 0; 3064 *p++ = 0x66; 3065 *p++ = 0x0F; 3066 switch (i->Xin.Sse64Fx2.op) { 3067 case Xsse_ADDF: *p++ = 0x58; break; 3068 case Xsse_DIVF: *p++ = 0x5E; break; 3069 case Xsse_MAXF: *p++ = 0x5F; break; 3070 case Xsse_MINF: *p++ = 0x5D; break; 3071 case Xsse_MULF: *p++ = 0x59; break; 3072 case Xsse_RCPF: *p++ = 0x53; break; 3073 case Xsse_RSQRTF: *p++ = 0x52; break; 3074 case Xsse_SQRTF: *p++ = 0x51; break; 3075 case Xsse_SUBF: *p++ = 0x5C; break; 3076 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3077 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3078 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3079 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3080 default: goto bad; 3081 } 3082 p = doAMode_R(p, fake(vregNo(i->Xin.Sse64Fx2.dst)), 3083 fake(vregNo(i->Xin.Sse64Fx2.src)) ); 3084 if (xtra & 0x100) 3085 *p++ = toUChar(xtra & 0xFF); 3086 goto done; 3087 3088 case Xin_Sse32FLo: 3089 xtra = 0; 3090 *p++ = 0xF3; 3091 *p++ = 0x0F; 3092 switch (i->Xin.Sse32FLo.op) { 3093 case Xsse_ADDF: *p++ = 0x58; break; 3094 case Xsse_DIVF: *p++ = 0x5E; break; 3095 case Xsse_MAXF: *p++ = 0x5F; break; 3096 case Xsse_MINF: *p++ = 0x5D; break; 3097 case Xsse_MULF: *p++ = 0x59; break; 3098 case Xsse_RCPF: *p++ = 0x53; break; 3099 case Xsse_RSQRTF: *p++ = 0x52; break; 3100 case Xsse_SQRTF: *p++ = 0x51; break; 3101 case Xsse_SUBF: *p++ = 0x5C; break; 3102 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3103 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3104 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3105 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3106 default: goto bad; 3107 } 3108 p = doAMode_R(p, fake(vregNo(i->Xin.Sse32FLo.dst)), 3109 fake(vregNo(i->Xin.Sse32FLo.src)) ); 3110 if (xtra & 0x100) 3111 *p++ = toUChar(xtra & 0xFF); 3112 goto done; 3113 3114 case Xin_Sse64FLo: 3115 xtra = 0; 3116 *p++ = 0xF2; 3117 *p++ = 0x0F; 3118 switch (i->Xin.Sse64FLo.op) { 3119 case Xsse_ADDF: *p++ = 0x58; break; 3120 case Xsse_DIVF: *p++ = 0x5E; break; 3121 case Xsse_MAXF: *p++ = 0x5F; break; 3122 case Xsse_MINF: *p++ = 0x5D; break; 3123 case Xsse_MULF: *p++ = 0x59; break; 3124 case Xsse_RCPF: *p++ = 0x53; break; 3125 case Xsse_RSQRTF: *p++ = 0x52; break; 3126 case Xsse_SQRTF: *p++ = 0x51; break; 3127 case Xsse_SUBF: *p++ = 0x5C; break; 3128 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break; 3129 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break; 3130 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break; 3131 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break; 3132 default: goto bad; 3133 } 3134 p = doAMode_R(p, fake(vregNo(i->Xin.Sse64FLo.dst)), 3135 fake(vregNo(i->Xin.Sse64FLo.src)) ); 3136 if (xtra & 0x100) 3137 *p++ = toUChar(xtra & 0xFF); 3138 goto done; 3139 3140 case Xin_SseReRg: 3141# define XX(_n) *p++ = (_n) 3142 switch (i->Xin.SseReRg.op) { 3143 case Xsse_MOV: /*movups*/ XX(0x0F); XX(0x10); break; 3144 case Xsse_OR: XX(0x0F); XX(0x56); break; 3145 case Xsse_XOR: XX(0x0F); XX(0x57); break; 3146 case Xsse_AND: XX(0x0F); XX(0x54); break; 3147 case Xsse_PACKSSD: XX(0x66); XX(0x0F); XX(0x6B); break; 3148 case Xsse_PACKSSW: XX(0x66); XX(0x0F); XX(0x63); break; 3149 case Xsse_PACKUSW: XX(0x66); XX(0x0F); XX(0x67); break; 3150 case Xsse_ADD8: XX(0x66); XX(0x0F); XX(0xFC); break; 3151 case Xsse_ADD16: XX(0x66); XX(0x0F); XX(0xFD); break; 3152 case Xsse_ADD32: XX(0x66); XX(0x0F); XX(0xFE); break; 3153 case Xsse_ADD64: XX(0x66); XX(0x0F); XX(0xD4); break; 3154 case Xsse_QADD8S: XX(0x66); XX(0x0F); XX(0xEC); break; 3155 case Xsse_QADD16S: XX(0x66); XX(0x0F); XX(0xED); break; 3156 case Xsse_QADD8U: XX(0x66); XX(0x0F); XX(0xDC); break; 3157 case Xsse_QADD16U: XX(0x66); XX(0x0F); XX(0xDD); break; 3158 case Xsse_AVG8U: XX(0x66); XX(0x0F); XX(0xE0); break; 3159 case Xsse_AVG16U: XX(0x66); XX(0x0F); XX(0xE3); break; 3160 case Xsse_CMPEQ8: XX(0x66); XX(0x0F); XX(0x74); break; 3161 case Xsse_CMPEQ16: XX(0x66); XX(0x0F); XX(0x75); break; 3162 case Xsse_CMPEQ32: XX(0x66); XX(0x0F); XX(0x76); break; 3163 case Xsse_CMPGT8S: XX(0x66); XX(0x0F); XX(0x64); break; 3164 case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break; 3165 case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break; 3166 case Xsse_MAX16S: XX(0x66); XX(0x0F); XX(0xEE); break; 3167 case Xsse_MAX8U: XX(0x66); XX(0x0F); XX(0xDE); break; 3168 case Xsse_MIN16S: XX(0x66); XX(0x0F); XX(0xEA); break; 3169 case Xsse_MIN8U: XX(0x66); XX(0x0F); XX(0xDA); break; 3170 case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break; 3171 case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break; 3172 case Xsse_MUL16: XX(0x66); XX(0x0F); XX(0xD5); break; 3173 case Xsse_SHL16: XX(0x66); XX(0x0F); XX(0xF1); break; 3174 case Xsse_SHL32: XX(0x66); XX(0x0F); XX(0xF2); break; 3175 case Xsse_SHL64: XX(0x66); XX(0x0F); XX(0xF3); break; 3176 case Xsse_SAR16: XX(0x66); XX(0x0F); XX(0xE1); break; 3177 case Xsse_SAR32: XX(0x66); XX(0x0F); XX(0xE2); break; 3178 case Xsse_SHR16: XX(0x66); XX(0x0F); XX(0xD1); break; 3179 case Xsse_SHR32: XX(0x66); XX(0x0F); XX(0xD2); break; 3180 case Xsse_SHR64: XX(0x66); XX(0x0F); XX(0xD3); break; 3181 case Xsse_SUB8: XX(0x66); XX(0x0F); XX(0xF8); break; 3182 case Xsse_SUB16: XX(0x66); XX(0x0F); XX(0xF9); break; 3183 case Xsse_SUB32: XX(0x66); XX(0x0F); XX(0xFA); break; 3184 case Xsse_SUB64: XX(0x66); XX(0x0F); XX(0xFB); break; 3185 case Xsse_QSUB8S: XX(0x66); XX(0x0F); XX(0xE8); break; 3186 case Xsse_QSUB16S: XX(0x66); XX(0x0F); XX(0xE9); break; 3187 case Xsse_QSUB8U: XX(0x66); XX(0x0F); XX(0xD8); break; 3188 case Xsse_QSUB16U: XX(0x66); XX(0x0F); XX(0xD9); break; 3189 case Xsse_UNPCKHB: XX(0x66); XX(0x0F); XX(0x68); break; 3190 case Xsse_UNPCKHW: XX(0x66); XX(0x0F); XX(0x69); break; 3191 case Xsse_UNPCKHD: XX(0x66); XX(0x0F); XX(0x6A); break; 3192 case Xsse_UNPCKHQ: XX(0x66); XX(0x0F); XX(0x6D); break; 3193 case Xsse_UNPCKLB: XX(0x66); XX(0x0F); XX(0x60); break; 3194 case Xsse_UNPCKLW: XX(0x66); XX(0x0F); XX(0x61); break; 3195 case Xsse_UNPCKLD: XX(0x66); XX(0x0F); XX(0x62); break; 3196 case Xsse_UNPCKLQ: XX(0x66); XX(0x0F); XX(0x6C); break; 3197 default: goto bad; 3198 } 3199 p = doAMode_R(p, fake(vregNo(i->Xin.SseReRg.dst)), 3200 fake(vregNo(i->Xin.SseReRg.src)) ); 3201# undef XX 3202 goto done; 3203 3204 case Xin_SseCMov: 3205 /* jmp fwds if !condition */ 3206 *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1)); 3207 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */ 3208 ptmp = p; 3209 3210 /* movaps %src, %dst */ 3211 *p++ = 0x0F; 3212 *p++ = 0x28; 3213 p = doAMode_R(p, fake(vregNo(i->Xin.SseCMov.dst)), 3214 fake(vregNo(i->Xin.SseCMov.src)) ); 3215 3216 /* Fill in the jump offset. */ 3217 *(ptmp-1) = toUChar(p - ptmp); 3218 goto done; 3219 3220 case Xin_SseShuf: 3221 *p++ = 0x66; 3222 *p++ = 0x0F; 3223 *p++ = 0x70; 3224 p = doAMode_R(p, fake(vregNo(i->Xin.SseShuf.dst)), 3225 fake(vregNo(i->Xin.SseShuf.src)) ); 3226 *p++ = (UChar)(i->Xin.SseShuf.order); 3227 goto done; 3228 3229 case Xin_EvCheck: { 3230 /* We generate: 3231 (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER) 3232 (2 bytes) jns nofail expected taken 3233 (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR) 3234 nofail: 3235 */ 3236 /* This is heavily asserted re instruction lengths. It needs to 3237 be. If we get given unexpected forms of .amCounter or 3238 .amFailAddr -- basically, anything that's not of the form 3239 uimm7(%ebp) -- they are likely to fail. */ 3240 /* Note also that after the decl we must be very careful not to 3241 read the carry flag, else we get a partial flags stall. 3242 js/jns avoids that, though. */ 3243 UChar* p0 = p; 3244 /* --- decl 8(%ebp) --- */ 3245 /* "fake(1)" because + there's no register in this encoding; 3246 instead the register + field is used as a sub opcode. The 3247 encoding for "decl r/m32" + is FF /1, hence the fake(1). */ 3248 *p++ = 0xFF; 3249 p = doAMode_M(p, fake(1), i->Xin.EvCheck.amCounter); 3250 vassert(p - p0 == 3); 3251 /* --- jns nofail --- */ 3252 *p++ = 0x79; 3253 *p++ = 0x03; /* need to check this 0x03 after the next insn */ 3254 vassert(p - p0 == 5); 3255 /* --- jmp* 0(%ebp) --- */ 3256 /* The encoding is FF /4. */ 3257 *p++ = 0xFF; 3258 p = doAMode_M(p, fake(4), i->Xin.EvCheck.amFailAddr); 3259 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */ 3260 /* And crosscheck .. */ 3261 vassert(evCheckSzB_X86() == 8); 3262 goto done; 3263 } 3264 3265 case Xin_ProfInc: { 3266 /* We generate addl $1,NotKnownYet 3267 adcl $0,NotKnownYet+4 3268 in the expectation that a later call to LibVEX_patchProfCtr 3269 will be used to fill in the immediate fields once the right 3270 value is known. 3271 83 05 00 00 00 00 01 3272 83 15 00 00 00 00 00 3273 */ 3274 *p++ = 0x83; *p++ = 0x05; 3275 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; 3276 *p++ = 0x01; 3277 *p++ = 0x83; *p++ = 0x15; 3278 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; 3279 *p++ = 0x00; 3280 /* Tell the caller .. */ 3281 vassert(!(*is_profInc)); 3282 *is_profInc = True; 3283 goto done; 3284 } 3285 3286 default: 3287 goto bad; 3288 } 3289 3290 bad: 3291 ppX86Instr(i, mode64); 3292 vpanic("emit_X86Instr"); 3293 /*NOTREACHED*/ 3294 3295 done: 3296 vassert(p - &buf[0] <= 32); 3297 return p - &buf[0]; 3298 3299# undef fake 3300} 3301 3302 3303/* How big is an event check? See case for Xin_EvCheck in 3304 emit_X86Instr just above. That crosschecks what this returns, so 3305 we can tell if we're inconsistent. */ 3306Int evCheckSzB_X86 ( void ) 3307{ 3308 return 8; 3309} 3310 3311 3312/* NB: what goes on here has to be very closely coordinated with the 3313 emitInstr case for XDirect, above. */ 3314VexInvalRange chainXDirect_X86 ( void* place_to_chain, 3315 void* disp_cp_chain_me_EXPECTED, 3316 void* place_to_jump_to ) 3317{ 3318 /* What we're expecting to see is: 3319 movl $disp_cp_chain_me_EXPECTED, %edx 3320 call *%edx 3321 viz 3322 BA <4 bytes value == disp_cp_chain_me_EXPECTED> 3323 FF D2 3324 */ 3325 UChar* p = (UChar*)place_to_chain; 3326 vassert(p[0] == 0xBA); 3327 vassert(*(UInt*)(&p[1]) == (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED)); 3328 vassert(p[5] == 0xFF); 3329 vassert(p[6] == 0xD2); 3330 /* And what we want to change it to is: 3331 jmp disp32 where disp32 is relative to the next insn 3332 ud2; 3333 viz 3334 E9 <4 bytes == disp32> 3335 0F 0B 3336 The replacement has the same length as the original. 3337 */ 3338 /* This is the delta we need to put into a JMP d32 insn. It's 3339 relative to the start of the next insn, hence the -5. */ 3340 Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5; 3341 3342 /* And make the modifications. */ 3343 p[0] = 0xE9; 3344 p[1] = (delta >> 0) & 0xFF; 3345 p[2] = (delta >> 8) & 0xFF; 3346 p[3] = (delta >> 16) & 0xFF; 3347 p[4] = (delta >> 24) & 0xFF; 3348 p[5] = 0x0F; p[6] = 0x0B; 3349 /* sanity check on the delta -- top 32 are all 0 or all 1 */ 3350 delta >>= 32; 3351 vassert(delta == 0LL || delta == -1LL); 3352 VexInvalRange vir = {0, 0}; 3353 return vir; 3354} 3355 3356 3357/* NB: what goes on here has to be very closely coordinated with the 3358 emitInstr case for XDirect, above. */ 3359VexInvalRange unchainXDirect_X86 ( void* place_to_unchain, 3360 void* place_to_jump_to_EXPECTED, 3361 void* disp_cp_chain_me ) 3362{ 3363 /* What we're expecting to see is: 3364 jmp d32 3365 ud2; 3366 viz 3367 E9 <4 bytes == disp32> 3368 0F 0B 3369 */ 3370 UChar* p = (UChar*)place_to_unchain; 3371 Bool valid = False; 3372 if (p[0] == 0xE9 3373 && p[5] == 0x0F && p[6] == 0x0B) { 3374 /* Check the offset is right. */ 3375 Int s32 = *(Int*)(&p[1]); 3376 if ((UChar*)p + 5 + s32 == (UChar*)place_to_jump_to_EXPECTED) { 3377 valid = True; 3378 if (0) 3379 vex_printf("QQQ unchainXDirect_X86: found valid\n"); 3380 } 3381 } 3382 vassert(valid); 3383 /* And what we want to change it to is: 3384 movl $disp_cp_chain_me, %edx 3385 call *%edx 3386 viz 3387 BA <4 bytes value == disp_cp_chain_me_EXPECTED> 3388 FF D2 3389 So it's the same length (convenient, huh). 3390 */ 3391 p[0] = 0xBA; 3392 *(UInt*)(&p[1]) = (UInt)Ptr_to_ULong(disp_cp_chain_me); 3393 p[5] = 0xFF; 3394 p[6] = 0xD2; 3395 VexInvalRange vir = {0, 0}; 3396 return vir; 3397} 3398 3399 3400/* Patch the counter address into a profile inc point, as previously 3401 created by the Xin_ProfInc case for emit_X86Instr. */ 3402VexInvalRange patchProfInc_X86 ( void* place_to_patch, 3403 ULong* location_of_counter ) 3404{ 3405 vassert(sizeof(ULong*) == 4); 3406 UChar* p = (UChar*)place_to_patch; 3407 vassert(p[0] == 0x83); 3408 vassert(p[1] == 0x05); 3409 vassert(p[2] == 0x00); 3410 vassert(p[3] == 0x00); 3411 vassert(p[4] == 0x00); 3412 vassert(p[5] == 0x00); 3413 vassert(p[6] == 0x01); 3414 vassert(p[7] == 0x83); 3415 vassert(p[8] == 0x15); 3416 vassert(p[9] == 0x00); 3417 vassert(p[10] == 0x00); 3418 vassert(p[11] == 0x00); 3419 vassert(p[12] == 0x00); 3420 vassert(p[13] == 0x00); 3421 UInt imm32 = (UInt)Ptr_to_ULong(location_of_counter); 3422 p[2] = imm32 & 0xFF; imm32 >>= 8; 3423 p[3] = imm32 & 0xFF; imm32 >>= 8; 3424 p[4] = imm32 & 0xFF; imm32 >>= 8; 3425 p[5] = imm32 & 0xFF; imm32 >>= 8; 3426 imm32 = 4 + (UInt)Ptr_to_ULong(location_of_counter); 3427 p[9] = imm32 & 0xFF; imm32 >>= 8; 3428 p[10] = imm32 & 0xFF; imm32 >>= 8; 3429 p[11] = imm32 & 0xFF; imm32 >>= 8; 3430 p[12] = imm32 & 0xFF; imm32 >>= 8; 3431 VexInvalRange vir = {0, 0}; 3432 return vir; 3433} 3434 3435 3436/*---------------------------------------------------------------*/ 3437/*--- end host_x86_defs.c ---*/ 3438/*---------------------------------------------------------------*/ 3439