rtasm_x86sse.c revision f4dd0991719ef3e2606920c5100b372181c60899
1/************************************************************************** 2 * 3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included 13 * in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 **************************************************************************/ 23 24#include "pipe/p_config.h" 25#include "util/u_cpu_detect.h" 26 27#if defined(PIPE_ARCH_X86) || (defined(PIPE_ARCH_X86_64) && !defined(__MINGW32__)) 28 29#include "pipe/p_compiler.h" 30#include "util/u_debug.h" 31#include "util/u_pointer.h" 32 33#include "rtasm_execmem.h" 34#include "rtasm_x86sse.h" 35 36#define DISASSEM 0 37#define X86_TWOB 0x0f 38 39 40#define DUMP_SSE 0 41 42 43void x86_print_reg( struct x86_reg reg ) 44{ 45 if (reg.mod != mod_REG) 46 debug_printf( "[" ); 47 48 switch( reg.file ) { 49 case file_REG32: 50 switch( reg.idx ) { 51 case reg_AX: debug_printf( "EAX" ); break; 52 case reg_CX: debug_printf( "ECX" ); break; 53 case reg_DX: debug_printf( "EDX" ); break; 54 case reg_BX: debug_printf( "EBX" ); break; 55 case reg_SP: debug_printf( "ESP" ); break; 56 case reg_BP: debug_printf( "EBP" ); break; 57 case reg_SI: debug_printf( "ESI" ); break; 58 case reg_DI: debug_printf( "EDI" ); break; 59 } 60 break; 61 case file_MMX: 62 debug_printf( "MMX%u", reg.idx ); 63 break; 64 case file_XMM: 65 debug_printf( "XMM%u", reg.idx ); 66 break; 67 case file_x87: 68 debug_printf( "fp%u", reg.idx ); 69 break; 70 } 71 72 if (reg.mod == mod_DISP8 || 73 reg.mod == mod_DISP32) 74 debug_printf("+%d", reg.disp); 75 76 if (reg.mod != mod_REG) 77 debug_printf( "]" ); 78} 79 80#if DUMP_SSE 81 82#define DUMP_START() debug_printf( "\n" ) 83#define DUMP_END() debug_printf( "\n" ) 84 85#define DUMP() do { \ 86 const char *foo = __FUNCTION__; \ 87 while (*foo && *foo != '_') \ 88 foo++; \ 89 if (*foo) \ 90 foo++; \ 91 debug_printf( "\n%4x %14s ", p->csr - p->store, foo ); \ 92} while (0) 93 94#define DUMP_I( I ) do { \ 95 DUMP(); \ 96 debug_printf( "%u", I ); \ 97} while( 0 ) 98 99#define DUMP_R( R0 ) do { \ 100 DUMP(); \ 101 x86_print_reg( R0 ); \ 102} while( 0 ) 103 104#define DUMP_RR( R0, R1 ) do { \ 105 DUMP(); \ 106 x86_print_reg( R0 ); \ 107 debug_printf( ", " ); \ 108 x86_print_reg( R1 ); \ 109} while( 0 ) 110 111#define DUMP_RI( R0, I ) do { \ 112 DUMP(); \ 113 x86_print_reg( R0 ); \ 114 debug_printf( ", %u", I ); \ 115} while( 0 ) 116 117#define DUMP_RRI( R0, R1, I ) do { \ 118 DUMP(); \ 119 x86_print_reg( R0 ); \ 120 debug_printf( ", " ); \ 121 x86_print_reg( R1 ); \ 122 debug_printf( ", %u", I ); \ 123} while( 0 ) 124 125#else 126 127#define DUMP_START() 128#define DUMP_END() 129#define DUMP( ) 130#define DUMP_I( I ) 131#define DUMP_R( R0 ) 132#define DUMP_RR( R0, R1 ) 133#define DUMP_RI( R0, I ) 134#define DUMP_RRI( R0, R1, I ) 135 136#endif 137 138 139static void do_realloc( struct x86_function *p ) 140{ 141 if (p->store == p->error_overflow) { 142 p->csr = p->store; 143 } 144 else if (p->size == 0) { 145 p->size = 1024; 146 p->store = rtasm_exec_malloc(p->size); 147 p->csr = p->store; 148 } 149 else { 150 uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store ); 151 unsigned char *tmp = p->store; 152 p->size *= 2; 153 p->store = rtasm_exec_malloc(p->size); 154 155 if (p->store) { 156 memcpy(p->store, tmp, used); 157 p->csr = p->store + used; 158 } 159 else { 160 p->csr = p->store; 161 } 162 163 rtasm_exec_free(tmp); 164 } 165 166 if (p->store == NULL) { 167 p->store = p->csr = p->error_overflow; 168 p->size = sizeof(p->error_overflow); 169 } 170} 171 172/* Emit bytes to the instruction stream: 173 */ 174static unsigned char *reserve( struct x86_function *p, int bytes ) 175{ 176 if (p->csr + bytes - p->store > (int) p->size) 177 do_realloc(p); 178 179 { 180 unsigned char *csr = p->csr; 181 p->csr += bytes; 182 return csr; 183 } 184} 185 186 187 188static void emit_1b( struct x86_function *p, char b0 ) 189{ 190 char *csr = (char *)reserve(p, 1); 191 *csr = b0; 192} 193 194static void emit_1i( struct x86_function *p, int i0 ) 195{ 196 int *icsr = (int *)reserve(p, sizeof(i0)); 197 *icsr = i0; 198} 199 200static void emit_1ub( struct x86_function *p, unsigned char b0 ) 201{ 202 unsigned char *csr = reserve(p, 1); 203 *csr++ = b0; 204} 205 206static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) 207{ 208 unsigned char *csr = reserve(p, 2); 209 *csr++ = b0; 210 *csr++ = b1; 211} 212 213static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) 214{ 215 unsigned char *csr = reserve(p, 3); 216 *csr++ = b0; 217 *csr++ = b1; 218 *csr++ = b2; 219} 220 221 222/* Build a modRM byte + possible displacement. No treatment of SIB 223 * indexing. BZZT - no way to encode an absolute address. 224 * 225 * This is the "/r" field in the x86 manuals... 226 */ 227static void emit_modrm( struct x86_function *p, 228 struct x86_reg reg, 229 struct x86_reg regmem ) 230{ 231 unsigned char val = 0; 232 233 assert(reg.mod == mod_REG); 234 235 /* TODO: support extended x86-64 registers */ 236 assert(reg.idx < 8); 237 assert(regmem.idx < 8); 238 239 val |= regmem.mod << 6; /* mod field */ 240 val |= reg.idx << 3; /* reg field */ 241 val |= regmem.idx; /* r/m field */ 242 243 emit_1ub(p, val); 244 245 /* Oh-oh we've stumbled into the SIB thing. 246 */ 247 if (regmem.file == file_REG32 && 248 regmem.idx == reg_SP && 249 regmem.mod != mod_REG) { 250 emit_1ub(p, 0x24); /* simplistic! */ 251 } 252 253 switch (regmem.mod) { 254 case mod_REG: 255 case mod_INDIRECT: 256 break; 257 case mod_DISP8: 258 emit_1b(p, (char) regmem.disp); 259 break; 260 case mod_DISP32: 261 emit_1i(p, regmem.disp); 262 break; 263 default: 264 assert(0); 265 break; 266 } 267} 268 269/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. 270 */ 271static void emit_modrm_noreg( struct x86_function *p, 272 unsigned op, 273 struct x86_reg regmem ) 274{ 275 struct x86_reg dummy = x86_make_reg(file_REG32, op); 276 emit_modrm(p, dummy, regmem); 277} 278 279/* Many x86 instructions have two opcodes to cope with the situations 280 * where the destination is a register or memory reference 281 * respectively. This function selects the correct opcode based on 282 * the arguments presented. 283 */ 284static void emit_op_modrm( struct x86_function *p, 285 unsigned char op_dst_is_reg, 286 unsigned char op_dst_is_mem, 287 struct x86_reg dst, 288 struct x86_reg src ) 289{ 290 switch (dst.mod) { 291 case mod_REG: 292 emit_1ub(p, op_dst_is_reg); 293 emit_modrm(p, dst, src); 294 break; 295 case mod_INDIRECT: 296 case mod_DISP32: 297 case mod_DISP8: 298 assert(src.mod == mod_REG); 299 emit_1ub(p, op_dst_is_mem); 300 emit_modrm(p, src, dst); 301 break; 302 default: 303 assert(0); 304 break; 305 } 306} 307 308 309 310 311 312 313 314/* Create and manipulate registers and regmem values: 315 */ 316struct x86_reg x86_make_reg( enum x86_reg_file file, 317 enum x86_reg_name idx ) 318{ 319 struct x86_reg reg; 320 321 reg.file = file; 322 reg.idx = idx; 323 reg.mod = mod_REG; 324 reg.disp = 0; 325 326 return reg; 327} 328 329struct x86_reg x86_make_disp( struct x86_reg reg, 330 int disp ) 331{ 332 assert(reg.file == file_REG32); 333 334 if (reg.mod == mod_REG) 335 reg.disp = disp; 336 else 337 reg.disp += disp; 338 339 if (reg.disp == 0 && reg.idx != reg_BP) 340 reg.mod = mod_INDIRECT; 341 else if (reg.disp <= 127 && reg.disp >= -128) 342 reg.mod = mod_DISP8; 343 else 344 reg.mod = mod_DISP32; 345 346 return reg; 347} 348 349struct x86_reg x86_deref( struct x86_reg reg ) 350{ 351 return x86_make_disp(reg, 0); 352} 353 354struct x86_reg x86_get_base_reg( struct x86_reg reg ) 355{ 356 return x86_make_reg( reg.file, reg.idx ); 357} 358 359int x86_get_label( struct x86_function *p ) 360{ 361 return p->csr - p->store; 362} 363 364 365 366/*********************************************************************** 367 * x86 instructions 368 */ 369 370 371void x64_rexw(struct x86_function *p) 372{ 373 if(x86_target(p) != X86_32) 374 emit_1ub(p, 0x48); 375} 376 377void x86_jcc( struct x86_function *p, 378 enum x86_cc cc, 379 int label ) 380{ 381 int offset = label - (x86_get_label(p) + 2); 382 DUMP_I(cc); 383 384 if (offset < 0) { 385 /*assert(p->csr - p->store > -offset);*/ 386 if (p->csr - p->store <= -offset) { 387 /* probably out of memory (using the error_overflow buffer) */ 388 return; 389 } 390 } 391 392 if (offset <= 127 && offset >= -128) { 393 emit_1ub(p, 0x70 + cc); 394 emit_1b(p, (char) offset); 395 } 396 else { 397 offset = label - (x86_get_label(p) + 6); 398 emit_2ub(p, 0x0f, 0x80 + cc); 399 emit_1i(p, offset); 400 } 401} 402 403/* Always use a 32bit offset for forward jumps: 404 */ 405int x86_jcc_forward( struct x86_function *p, 406 enum x86_cc cc ) 407{ 408 DUMP_I(cc); 409 emit_2ub(p, 0x0f, 0x80 + cc); 410 emit_1i(p, 0); 411 return x86_get_label(p); 412} 413 414int x86_jmp_forward( struct x86_function *p) 415{ 416 DUMP(); 417 emit_1ub(p, 0xe9); 418 emit_1i(p, 0); 419 return x86_get_label(p); 420} 421 422int x86_call_forward( struct x86_function *p) 423{ 424 DUMP(); 425 426 emit_1ub(p, 0xe8); 427 emit_1i(p, 0); 428 return x86_get_label(p); 429} 430 431/* Fixup offset from forward jump: 432 */ 433void x86_fixup_fwd_jump( struct x86_function *p, 434 int fixup ) 435{ 436 *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup; 437} 438 439void x86_jmp( struct x86_function *p, int label) 440{ 441 DUMP_I( label ); 442 emit_1ub(p, 0xe9); 443 emit_1i(p, label - x86_get_label(p) - 4); 444} 445 446void x86_call( struct x86_function *p, struct x86_reg reg) 447{ 448 DUMP_R( reg ); 449 emit_1ub(p, 0xff); 450 emit_modrm_noreg(p, 2, reg); 451} 452 453 454void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) 455{ 456 DUMP_RI( dst, imm ); 457 assert(dst.file == file_REG32); 458 assert(dst.mod == mod_REG); 459 emit_1ub(p, 0xb8 + dst.idx); 460 emit_1i(p, imm); 461} 462 463void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm ) 464{ 465 DUMP_RI( dst, imm ); 466 if(dst.mod == mod_REG) 467 x86_mov_reg_imm(p, dst, imm); 468 else 469 { 470 emit_1ub(p, 0xc7); 471 emit_modrm_noreg(p, 0, dst); 472 emit_1i(p, imm); 473 } 474} 475 476void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm ) 477{ 478 DUMP_RI( dst, imm ); 479 emit_1ub(p, 0x66); 480 if(dst.mod == mod_REG) 481 { 482 emit_1ub(p, 0xb8 + dst.idx); 483 emit_2ub(p, imm & 0xff, imm >> 8); 484 } 485 else 486 { 487 emit_1ub(p, 0xc7); 488 emit_modrm_noreg(p, 0, dst); 489 emit_2ub(p, imm & 0xff, imm >> 8); 490 } 491} 492 493void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm ) 494{ 495 DUMP_RI( dst, imm ); 496 if(dst.mod == mod_REG) 497 { 498 emit_1ub(p, 0xb0 + dst.idx); 499 emit_1ub(p, imm); 500 } 501 else 502 { 503 emit_1ub(p, 0xc6); 504 emit_modrm_noreg(p, 0, dst); 505 emit_1ub(p, imm); 506 } 507} 508 509/** 510 * Immediate group 1 instructions. 511 */ 512static INLINE void 513x86_group1_imm( struct x86_function *p, 514 unsigned op, struct x86_reg dst, int imm ) 515{ 516 assert(dst.file == file_REG32); 517 assert(dst.mod == mod_REG); 518 if(-0x80 <= imm && imm < 0x80) { 519 emit_1ub(p, 0x83); 520 emit_modrm_noreg(p, op, dst); 521 emit_1b(p, (char)imm); 522 } 523 else { 524 emit_1ub(p, 0x81); 525 emit_modrm_noreg(p, op, dst); 526 emit_1i(p, imm); 527 } 528} 529 530void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) 531{ 532 DUMP_RI( dst, imm ); 533 x86_group1_imm(p, 0, dst, imm); 534} 535 536void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) 537{ 538 DUMP_RI( dst, imm ); 539 x86_group1_imm(p, 1, dst, imm); 540} 541 542void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) 543{ 544 DUMP_RI( dst, imm ); 545 x86_group1_imm(p, 4, dst, imm); 546} 547 548void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) 549{ 550 DUMP_RI( dst, imm ); 551 x86_group1_imm(p, 5, dst, imm); 552} 553 554void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) 555{ 556 DUMP_RI( dst, imm ); 557 x86_group1_imm(p, 6, dst, imm); 558} 559 560void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) 561{ 562 DUMP_RI( dst, imm ); 563 x86_group1_imm(p, 7, dst, imm); 564} 565 566 567void x86_push( struct x86_function *p, 568 struct x86_reg reg ) 569{ 570 DUMP_R( reg ); 571 if (reg.mod == mod_REG) 572 emit_1ub(p, 0x50 + reg.idx); 573 else 574 { 575 emit_1ub(p, 0xff); 576 emit_modrm_noreg(p, 6, reg); 577 } 578 579 580 p->stack_offset += sizeof(void*); 581} 582 583void x86_push_imm32( struct x86_function *p, 584 int imm32 ) 585{ 586 DUMP_I( imm32 ); 587 emit_1ub(p, 0x68); 588 emit_1i(p, imm32); 589 590 p->stack_offset += sizeof(void*); 591} 592 593 594void x86_pop( struct x86_function *p, 595 struct x86_reg reg ) 596{ 597 DUMP_R( reg ); 598 assert(reg.mod == mod_REG); 599 emit_1ub(p, 0x58 + reg.idx); 600 p->stack_offset -= sizeof(void*); 601} 602 603void x86_inc( struct x86_function *p, 604 struct x86_reg reg ) 605{ 606 DUMP_R( reg ); 607 if(x86_target(p) == X86_32 && reg.mod == mod_REG) 608 { 609 emit_1ub(p, 0x40 + reg.idx); 610 return; 611 } 612 emit_1ub(p, 0xff); 613 emit_modrm_noreg(p, 0, reg); 614} 615 616void x86_dec( struct x86_function *p, 617 struct x86_reg reg ) 618{ 619 DUMP_R( reg ); 620 if(x86_target(p) == X86_32 && reg.mod == mod_REG) 621 { 622 emit_1ub(p, 0x48 + reg.idx); 623 return; 624 } 625 emit_1ub(p, 0xff); 626 emit_modrm_noreg(p, 1, reg); 627} 628 629void x86_ret( struct x86_function *p ) 630{ 631 DUMP(); 632 assert(p->stack_offset == 0); 633 emit_1ub(p, 0xc3); 634} 635 636void x86_retw( struct x86_function *p, unsigned short imm ) 637{ 638 DUMP(); 639 emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff); 640} 641 642void x86_sahf( struct x86_function *p ) 643{ 644 DUMP(); 645 emit_1ub(p, 0x9e); 646} 647 648void x86_mov( struct x86_function *p, 649 struct x86_reg dst, 650 struct x86_reg src ) 651{ 652 DUMP_RR( dst, src ); 653 /* special hack for reading arguments until we support x86-64 registers everywhere */ 654 if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) 655 { 656 uint8_t rex = 0x40; 657 if(dst.idx >= 8) 658 { 659 rex |= 4; 660 dst.idx -= 8; 661 } 662 if(src.idx >= 8) 663 { 664 rex |= 1; 665 src.idx -= 8; 666 } 667 emit_1ub(p, rex); 668 } 669 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 670} 671 672void x86_mov16( struct x86_function *p, 673 struct x86_reg dst, 674 struct x86_reg src ) 675{ 676 DUMP_RR( dst, src ); 677 emit_1ub(p, 0x66); 678 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 679} 680 681void x86_mov8( struct x86_function *p, 682 struct x86_reg dst, 683 struct x86_reg src ) 684{ 685 DUMP_RR( dst, src ); 686 emit_op_modrm( p, 0x8a, 0x88, dst, src ); 687} 688 689void x64_mov64( struct x86_function *p, 690 struct x86_reg dst, 691 struct x86_reg src ) 692{ 693 uint8_t rex = 0x48; 694 DUMP_RR( dst, src ); 695 assert(x86_target(p) != X86_32); 696 697 /* special hack for reading arguments until we support x86-64 registers everywhere */ 698 if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) 699 { 700 if(dst.idx >= 8) 701 { 702 rex |= 4; 703 dst.idx -= 8; 704 } 705 if(src.idx >= 8) 706 { 707 rex |= 1; 708 src.idx -= 8; 709 } 710 } 711 emit_1ub(p, rex); 712 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 713} 714 715void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 716{ 717 DUMP_RR( dst, src ); 718 emit_2ub(p, 0x0f, 0xb6); 719 emit_modrm(p, dst, src); 720} 721 722void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 723{ 724 DUMP_RR( dst, src ); 725 emit_2ub(p, 0x0f, 0xb7); 726 emit_modrm(p, dst, src); 727} 728 729void x86_cmovcc( struct x86_function *p, 730 struct x86_reg dst, 731 struct x86_reg src, 732 enum x86_cc cc) 733{ 734 DUMP_RRI( dst, src, cc ); 735 emit_2ub( p, 0x0f, 0x40 + cc ); 736 emit_modrm( p, dst, src ); 737} 738 739void x86_xor( struct x86_function *p, 740 struct x86_reg dst, 741 struct x86_reg src ) 742{ 743 DUMP_RR( dst, src ); 744 emit_op_modrm( p, 0x33, 0x31, dst, src ); 745} 746 747void x86_cmp( struct x86_function *p, 748 struct x86_reg dst, 749 struct x86_reg src ) 750{ 751 DUMP_RR( dst, src ); 752 emit_op_modrm( p, 0x3b, 0x39, dst, src ); 753} 754 755void x86_lea( struct x86_function *p, 756 struct x86_reg dst, 757 struct x86_reg src ) 758{ 759 DUMP_RR( dst, src ); 760 emit_1ub(p, 0x8d); 761 emit_modrm( p, dst, src ); 762} 763 764void x86_test( struct x86_function *p, 765 struct x86_reg dst, 766 struct x86_reg src ) 767{ 768 DUMP_RR( dst, src ); 769 emit_1ub(p, 0x85); 770 emit_modrm( p, dst, src ); 771} 772 773void x86_add( struct x86_function *p, 774 struct x86_reg dst, 775 struct x86_reg src ) 776{ 777 DUMP_RR( dst, src ); 778 emit_op_modrm(p, 0x03, 0x01, dst, src ); 779} 780 781/* Calculate EAX * src, results in EDX:EAX. 782 */ 783void x86_mul( struct x86_function *p, 784 struct x86_reg src ) 785{ 786 DUMP_R( src ); 787 emit_1ub(p, 0xf7); 788 emit_modrm_noreg(p, 4, src ); 789} 790 791 792void x86_imul( struct x86_function *p, 793 struct x86_reg dst, 794 struct x86_reg src ) 795{ 796 DUMP_RR( dst, src ); 797 emit_2ub(p, X86_TWOB, 0xAF); 798 emit_modrm(p, dst, src); 799} 800 801 802void x86_sub( struct x86_function *p, 803 struct x86_reg dst, 804 struct x86_reg src ) 805{ 806 DUMP_RR( dst, src ); 807 emit_op_modrm(p, 0x2b, 0x29, dst, src ); 808} 809 810void x86_or( struct x86_function *p, 811 struct x86_reg dst, 812 struct x86_reg src ) 813{ 814 DUMP_RR( dst, src ); 815 emit_op_modrm( p, 0x0b, 0x09, dst, src ); 816} 817 818void x86_and( struct x86_function *p, 819 struct x86_reg dst, 820 struct x86_reg src ) 821{ 822 DUMP_RR( dst, src ); 823 emit_op_modrm( p, 0x23, 0x21, dst, src ); 824} 825 826void x86_div( struct x86_function *p, 827 struct x86_reg src ) 828{ 829 assert(src.file == file_REG32 && src.mod == mod_REG); 830 emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src); 831} 832 833void x86_bswap( struct x86_function *p, struct x86_reg reg ) 834{ 835 DUMP_R(reg); 836 assert(reg.file == file_REG32); 837 assert(reg.mod == mod_REG); 838 emit_2ub(p, 0x0f, 0xc8 + reg.idx); 839} 840 841void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 842{ 843 DUMP_RI(reg, imm); 844 if(imm == 1) 845 { 846 emit_1ub(p, 0xd1); 847 emit_modrm_noreg(p, 5, reg); 848 } 849 else 850 { 851 emit_1ub(p, 0xc1); 852 emit_modrm_noreg(p, 5, reg); 853 emit_1ub(p, imm); 854 } 855} 856 857void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 858{ 859 DUMP_RI(reg, imm); 860 if(imm == 1) 861 { 862 emit_1ub(p, 0xd1); 863 emit_modrm_noreg(p, 7, reg); 864 } 865 else 866 { 867 emit_1ub(p, 0xc1); 868 emit_modrm_noreg(p, 7, reg); 869 emit_1ub(p, imm); 870 } 871} 872 873void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 874{ 875 DUMP_RI(reg, imm); 876 if(imm == 1) 877 { 878 emit_1ub(p, 0xd1); 879 emit_modrm_noreg(p, 4, reg); 880 } 881 else 882 { 883 emit_1ub(p, 0xc1); 884 emit_modrm_noreg(p, 4, reg); 885 emit_1ub(p, imm); 886 } 887} 888 889 890/*********************************************************************** 891 * SSE instructions 892 */ 893 894void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) 895{ 896 DUMP_R( ptr ); 897 assert(ptr.mod != mod_REG); 898 emit_2ub(p, 0x0f, 0x18); 899 emit_modrm_noreg(p, 0, ptr); 900} 901 902void sse_prefetch0( struct x86_function *p, struct x86_reg ptr) 903{ 904 DUMP_R( ptr ); 905 assert(ptr.mod != mod_REG); 906 emit_2ub(p, 0x0f, 0x18); 907 emit_modrm_noreg(p, 1, ptr); 908} 909 910void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) 911{ 912 DUMP_R( ptr ); 913 assert(ptr.mod != mod_REG); 914 emit_2ub(p, 0x0f, 0x18); 915 emit_modrm_noreg(p, 2, ptr); 916} 917 918void sse_movntps( struct x86_function *p, 919 struct x86_reg dst, 920 struct x86_reg src) 921{ 922 DUMP_RR( dst, src ); 923 924 assert(dst.mod != mod_REG); 925 assert(src.mod == mod_REG); 926 emit_2ub(p, 0x0f, 0x2b); 927 emit_modrm(p, src, dst); 928} 929 930 931 932 933void sse_movss( struct x86_function *p, 934 struct x86_reg dst, 935 struct x86_reg src ) 936{ 937 DUMP_RR( dst, src ); 938 emit_2ub(p, 0xF3, X86_TWOB); 939 emit_op_modrm( p, 0x10, 0x11, dst, src ); 940} 941 942void sse_movaps( struct x86_function *p, 943 struct x86_reg dst, 944 struct x86_reg src ) 945{ 946 DUMP_RR( dst, src ); 947 emit_1ub(p, X86_TWOB); 948 emit_op_modrm( p, 0x28, 0x29, dst, src ); 949} 950 951void sse_movups( struct x86_function *p, 952 struct x86_reg dst, 953 struct x86_reg src ) 954{ 955 DUMP_RR( dst, src ); 956 emit_1ub(p, X86_TWOB); 957 emit_op_modrm( p, 0x10, 0x11, dst, src ); 958} 959 960void sse_movhps( struct x86_function *p, 961 struct x86_reg dst, 962 struct x86_reg src ) 963{ 964 DUMP_RR( dst, src ); 965 assert(dst.mod != mod_REG || src.mod != mod_REG); 966 emit_1ub(p, X86_TWOB); 967 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ 968} 969 970void sse_movlps( struct x86_function *p, 971 struct x86_reg dst, 972 struct x86_reg src ) 973{ 974 DUMP_RR( dst, src ); 975 assert(dst.mod != mod_REG || src.mod != mod_REG); 976 emit_1ub(p, X86_TWOB); 977 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ 978} 979 980void sse_maxps( struct x86_function *p, 981 struct x86_reg dst, 982 struct x86_reg src ) 983{ 984 DUMP_RR( dst, src ); 985 emit_2ub(p, X86_TWOB, 0x5F); 986 emit_modrm( p, dst, src ); 987} 988 989void sse_maxss( struct x86_function *p, 990 struct x86_reg dst, 991 struct x86_reg src ) 992{ 993 DUMP_RR( dst, src ); 994 emit_3ub(p, 0xF3, X86_TWOB, 0x5F); 995 emit_modrm( p, dst, src ); 996} 997 998void sse_divss( struct x86_function *p, 999 struct x86_reg dst, 1000 struct x86_reg src ) 1001{ 1002 DUMP_RR( dst, src ); 1003 emit_3ub(p, 0xF3, X86_TWOB, 0x5E); 1004 emit_modrm( p, dst, src ); 1005} 1006 1007void sse_minps( struct x86_function *p, 1008 struct x86_reg dst, 1009 struct x86_reg src ) 1010{ 1011 DUMP_RR( dst, src ); 1012 emit_2ub(p, X86_TWOB, 0x5D); 1013 emit_modrm( p, dst, src ); 1014} 1015 1016void sse_subps( struct x86_function *p, 1017 struct x86_reg dst, 1018 struct x86_reg src ) 1019{ 1020 DUMP_RR( dst, src ); 1021 emit_2ub(p, X86_TWOB, 0x5C); 1022 emit_modrm( p, dst, src ); 1023} 1024 1025void sse_mulps( struct x86_function *p, 1026 struct x86_reg dst, 1027 struct x86_reg src ) 1028{ 1029 DUMP_RR( dst, src ); 1030 emit_2ub(p, X86_TWOB, 0x59); 1031 emit_modrm( p, dst, src ); 1032} 1033 1034void sse_mulss( struct x86_function *p, 1035 struct x86_reg dst, 1036 struct x86_reg src ) 1037{ 1038 DUMP_RR( dst, src ); 1039 emit_3ub(p, 0xF3, X86_TWOB, 0x59); 1040 emit_modrm( p, dst, src ); 1041} 1042 1043void sse_addps( struct x86_function *p, 1044 struct x86_reg dst, 1045 struct x86_reg src ) 1046{ 1047 DUMP_RR( dst, src ); 1048 emit_2ub(p, X86_TWOB, 0x58); 1049 emit_modrm( p, dst, src ); 1050} 1051 1052void sse_addss( struct x86_function *p, 1053 struct x86_reg dst, 1054 struct x86_reg src ) 1055{ 1056 DUMP_RR( dst, src ); 1057 emit_3ub(p, 0xF3, X86_TWOB, 0x58); 1058 emit_modrm( p, dst, src ); 1059} 1060 1061void sse_andnps( struct x86_function *p, 1062 struct x86_reg dst, 1063 struct x86_reg src ) 1064{ 1065 DUMP_RR( dst, src ); 1066 emit_2ub(p, X86_TWOB, 0x55); 1067 emit_modrm( p, dst, src ); 1068} 1069 1070void sse_andps( struct x86_function *p, 1071 struct x86_reg dst, 1072 struct x86_reg src ) 1073{ 1074 DUMP_RR( dst, src ); 1075 emit_2ub(p, X86_TWOB, 0x54); 1076 emit_modrm( p, dst, src ); 1077} 1078 1079void sse_rsqrtps( struct x86_function *p, 1080 struct x86_reg dst, 1081 struct x86_reg src ) 1082{ 1083 DUMP_RR( dst, src ); 1084 emit_2ub(p, X86_TWOB, 0x52); 1085 emit_modrm( p, dst, src ); 1086} 1087 1088void sse_rsqrtss( struct x86_function *p, 1089 struct x86_reg dst, 1090 struct x86_reg src ) 1091{ 1092 DUMP_RR( dst, src ); 1093 emit_3ub(p, 0xF3, X86_TWOB, 0x52); 1094 emit_modrm( p, dst, src ); 1095 1096} 1097 1098void sse_movhlps( struct x86_function *p, 1099 struct x86_reg dst, 1100 struct x86_reg src ) 1101{ 1102 DUMP_RR( dst, src ); 1103 assert(dst.mod == mod_REG && src.mod == mod_REG); 1104 emit_2ub(p, X86_TWOB, 0x12); 1105 emit_modrm( p, dst, src ); 1106} 1107 1108void sse_movlhps( struct x86_function *p, 1109 struct x86_reg dst, 1110 struct x86_reg src ) 1111{ 1112 DUMP_RR( dst, src ); 1113 assert(dst.mod == mod_REG && src.mod == mod_REG); 1114 emit_2ub(p, X86_TWOB, 0x16); 1115 emit_modrm( p, dst, src ); 1116} 1117 1118void sse_orps( struct x86_function *p, 1119 struct x86_reg dst, 1120 struct x86_reg src ) 1121{ 1122 DUMP_RR( dst, src ); 1123 emit_2ub(p, X86_TWOB, 0x56); 1124 emit_modrm( p, dst, src ); 1125} 1126 1127void sse_xorps( struct x86_function *p, 1128 struct x86_reg dst, 1129 struct x86_reg src ) 1130{ 1131 DUMP_RR( dst, src ); 1132 emit_2ub(p, X86_TWOB, 0x57); 1133 emit_modrm( p, dst, src ); 1134} 1135 1136void sse_cvtps2pi( struct x86_function *p, 1137 struct x86_reg dst, 1138 struct x86_reg src ) 1139{ 1140 DUMP_RR( dst, src ); 1141 assert(dst.file == file_MMX && 1142 (src.file == file_XMM || src.mod != mod_REG)); 1143 1144 p->need_emms = 1; 1145 1146 emit_2ub(p, X86_TWOB, 0x2d); 1147 emit_modrm( p, dst, src ); 1148} 1149 1150void sse2_cvtdq2ps( struct x86_function *p, 1151 struct x86_reg dst, 1152 struct x86_reg src ) 1153{ 1154 DUMP_RR( dst, src ); 1155 emit_2ub(p, X86_TWOB, 0x5b); 1156 emit_modrm( p, dst, src ); 1157} 1158 1159 1160/* Shufps can also be used to implement a reduced swizzle when dest == 1161 * arg0. 1162 */ 1163void sse_shufps( struct x86_function *p, 1164 struct x86_reg dst, 1165 struct x86_reg src, 1166 unsigned char shuf) 1167{ 1168 DUMP_RRI( dst, src, shuf ); 1169 emit_2ub(p, X86_TWOB, 0xC6); 1170 emit_modrm(p, dst, src); 1171 emit_1ub(p, shuf); 1172} 1173 1174void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1175{ 1176 DUMP_RR( dst, src ); 1177 emit_2ub( p, X86_TWOB, 0x15 ); 1178 emit_modrm( p, dst, src ); 1179} 1180 1181void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1182{ 1183 DUMP_RR( dst, src ); 1184 emit_2ub( p, X86_TWOB, 0x14 ); 1185 emit_modrm( p, dst, src ); 1186} 1187 1188void sse_cmpps( struct x86_function *p, 1189 struct x86_reg dst, 1190 struct x86_reg src, 1191 enum sse_cc cc) 1192{ 1193 DUMP_RRI( dst, src, cc ); 1194 emit_2ub(p, X86_TWOB, 0xC2); 1195 emit_modrm(p, dst, src); 1196 emit_1ub(p, cc); 1197} 1198 1199void sse_pmovmskb( struct x86_function *p, 1200 struct x86_reg dst, 1201 struct x86_reg src) 1202{ 1203 DUMP_RR( dst, src ); 1204 emit_3ub(p, 0x66, X86_TWOB, 0xD7); 1205 emit_modrm(p, dst, src); 1206} 1207 1208void sse_movmskps( struct x86_function *p, 1209 struct x86_reg dst, 1210 struct x86_reg src) 1211{ 1212 DUMP_RR( dst, src ); 1213 emit_2ub(p, X86_TWOB, 0x50); 1214 emit_modrm(p, dst, src); 1215} 1216 1217/*********************************************************************** 1218 * SSE2 instructions 1219 */ 1220 1221void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1222{ 1223 DUMP_RR(dst, src); 1224 emit_2ub(p, 0x66, 0x0f); 1225 if(dst.mod == mod_REG && dst.file == file_REG32) 1226 { 1227 emit_1ub(p, 0x7e); 1228 emit_modrm(p, src, dst); 1229 } 1230 else 1231 { 1232 emit_op_modrm(p, 0x6e, 0x7e, dst, src); 1233 } 1234} 1235 1236void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1237{ 1238 DUMP_RR(dst, src); 1239 switch (dst.mod) { 1240 case mod_REG: 1241 emit_3ub(p, 0xf3, 0x0f, 0x7e); 1242 emit_modrm(p, dst, src); 1243 break; 1244 case mod_INDIRECT: 1245 case mod_DISP32: 1246 case mod_DISP8: 1247 assert(src.mod == mod_REG); 1248 emit_3ub(p, 0x66, 0x0f, 0xd6); 1249 emit_modrm(p, src, dst); 1250 break; 1251 default: 1252 assert(0); 1253 break; 1254 } 1255} 1256 1257void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1258{ 1259 DUMP_RR(dst, src); 1260 emit_2ub(p, 0xf3, 0x0f); 1261 emit_op_modrm(p, 0x6f, 0x7f, dst, src); 1262} 1263 1264void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1265{ 1266 DUMP_RR(dst, src); 1267 emit_2ub(p, 0x66, 0x0f); 1268 emit_op_modrm(p, 0x6f, 0x7f, dst, src); 1269} 1270 1271void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1272{ 1273 DUMP_RR(dst, src); 1274 emit_2ub(p, 0xf2, 0x0f); 1275 emit_op_modrm(p, 0x10, 0x11, dst, src); 1276} 1277 1278void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1279{ 1280 DUMP_RR(dst, src); 1281 emit_2ub(p, 0x66, 0x0f); 1282 emit_op_modrm(p, 0x10, 0x11, dst, src); 1283} 1284 1285void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1286{ 1287 DUMP_RR(dst, src); 1288 emit_2ub(p, 0x66, 0x0f); 1289 emit_op_modrm(p, 0x28, 0x29, dst, src); 1290} 1291 1292/** 1293 * Perform a reduced swizzle: 1294 */ 1295void sse2_pshufd( struct x86_function *p, 1296 struct x86_reg dst, 1297 struct x86_reg src, 1298 unsigned char shuf) 1299{ 1300 DUMP_RRI( dst, src, shuf ); 1301 emit_3ub(p, 0x66, X86_TWOB, 0x70); 1302 emit_modrm(p, dst, src); 1303 emit_1ub(p, shuf); 1304} 1305 1306void sse2_pshuflw( struct x86_function *p, 1307 struct x86_reg dst, 1308 struct x86_reg src, 1309 unsigned char shuf) 1310{ 1311 DUMP_RRI( dst, src, shuf ); 1312 emit_3ub(p, 0xf2, X86_TWOB, 0x70); 1313 emit_modrm(p, dst, src); 1314 emit_1ub(p, shuf); 1315} 1316 1317void sse2_pshufhw( struct x86_function *p, 1318 struct x86_reg dst, 1319 struct x86_reg src, 1320 unsigned char shuf) 1321{ 1322 DUMP_RRI( dst, src, shuf ); 1323 emit_3ub(p, 0xf3, X86_TWOB, 0x70); 1324 emit_modrm(p, dst, src); 1325 emit_1ub(p, shuf); 1326} 1327 1328void sse2_cvttps2dq( struct x86_function *p, 1329 struct x86_reg dst, 1330 struct x86_reg src ) 1331{ 1332 DUMP_RR( dst, src ); 1333 emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); 1334 emit_modrm( p, dst, src ); 1335} 1336 1337void sse2_cvtps2dq( struct x86_function *p, 1338 struct x86_reg dst, 1339 struct x86_reg src ) 1340{ 1341 DUMP_RR( dst, src ); 1342 emit_3ub(p, 0x66, X86_TWOB, 0x5B); 1343 emit_modrm( p, dst, src ); 1344} 1345 1346void sse2_cvtsd2ss( struct x86_function *p, 1347 struct x86_reg dst, 1348 struct x86_reg src ) 1349{ 1350 DUMP_RR( dst, src ); 1351 emit_3ub(p, 0xf2, 0x0f, 0x5a); 1352 emit_modrm( p, dst, src ); 1353} 1354 1355void sse2_cvtpd2ps( struct x86_function *p, 1356 struct x86_reg dst, 1357 struct x86_reg src ) 1358{ 1359 DUMP_RR( dst, src ); 1360 emit_3ub(p, 0x66, 0x0f, 0x5a); 1361 emit_modrm( p, dst, src ); 1362} 1363 1364void sse2_packssdw( struct x86_function *p, 1365 struct x86_reg dst, 1366 struct x86_reg src ) 1367{ 1368 DUMP_RR( dst, src ); 1369 emit_3ub(p, 0x66, X86_TWOB, 0x6B); 1370 emit_modrm( p, dst, src ); 1371} 1372 1373void sse2_packsswb( struct x86_function *p, 1374 struct x86_reg dst, 1375 struct x86_reg src ) 1376{ 1377 DUMP_RR( dst, src ); 1378 emit_3ub(p, 0x66, X86_TWOB, 0x63); 1379 emit_modrm( p, dst, src ); 1380} 1381 1382void sse2_packuswb( struct x86_function *p, 1383 struct x86_reg dst, 1384 struct x86_reg src ) 1385{ 1386 DUMP_RR( dst, src ); 1387 emit_3ub(p, 0x66, X86_TWOB, 0x67); 1388 emit_modrm( p, dst, src ); 1389} 1390 1391void sse2_punpcklbw( struct x86_function *p, 1392 struct x86_reg dst, 1393 struct x86_reg src ) 1394{ 1395 DUMP_RR( dst, src ); 1396 emit_3ub(p, 0x66, X86_TWOB, 0x60); 1397 emit_modrm( p, dst, src ); 1398} 1399 1400void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1401{ 1402 DUMP_RR( dst, src ); 1403 emit_3ub(p, 0x66, 0x0f, 0x61); 1404 emit_modrm( p, dst, src ); 1405} 1406 1407void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1408{ 1409 DUMP_RR( dst, src ); 1410 emit_3ub(p, 0x66, 0x0f, 0x62); 1411 emit_modrm( p, dst, src ); 1412} 1413 1414void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1415{ 1416 DUMP_RR( dst, src ); 1417 emit_3ub(p, 0x66, 0x0f, 0x6c); 1418 emit_modrm( p, dst, src ); 1419} 1420 1421void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1422{ 1423 DUMP_RI(dst, imm); 1424 emit_3ub(p, 0x66, 0x0f, 0x71); 1425 emit_modrm_noreg(p, 6, dst); 1426 emit_1ub(p, imm); 1427} 1428 1429void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1430{ 1431 DUMP_RI(dst, imm); 1432 emit_3ub(p, 0x66, 0x0f, 0x72); 1433 emit_modrm_noreg(p, 6, dst); 1434 emit_1ub(p, imm); 1435} 1436 1437void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1438{ 1439 DUMP_RI(dst, imm); 1440 emit_3ub(p, 0x66, 0x0f, 0x73); 1441 emit_modrm_noreg(p, 6, dst); 1442 emit_1ub(p, imm); 1443} 1444 1445void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1446{ 1447 DUMP_RI(dst, imm); 1448 emit_3ub(p, 0x66, 0x0f, 0x71); 1449 emit_modrm_noreg(p, 2, dst); 1450 emit_1ub(p, imm); 1451} 1452 1453void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1454{ 1455 DUMP_RI(dst, imm); 1456 emit_3ub(p, 0x66, 0x0f, 0x72); 1457 emit_modrm_noreg(p, 2, dst); 1458 emit_1ub(p, imm); 1459} 1460 1461void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1462{ 1463 DUMP_RI(dst, imm); 1464 emit_3ub(p, 0x66, 0x0f, 0x73); 1465 emit_modrm_noreg(p, 2, dst); 1466 emit_1ub(p, imm); 1467} 1468 1469void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1470{ 1471 DUMP_RI(dst, imm); 1472 emit_3ub(p, 0x66, 0x0f, 0x71); 1473 emit_modrm_noreg(p, 4, dst); 1474 emit_1ub(p, imm); 1475} 1476 1477void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1478{ 1479 DUMP_RI(dst, imm); 1480 emit_3ub(p, 0x66, 0x0f, 0x72); 1481 emit_modrm_noreg(p, 4, dst); 1482 emit_1ub(p, imm); 1483} 1484 1485void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1486{ 1487 DUMP_RR(dst, src); 1488 emit_3ub(p, 0x66, 0x0f, 0xeb); 1489 emit_modrm(p, dst, src); 1490} 1491 1492void sse2_rcpps( struct x86_function *p, 1493 struct x86_reg dst, 1494 struct x86_reg src ) 1495{ 1496 DUMP_RR( dst, src ); 1497 emit_2ub(p, X86_TWOB, 0x53); 1498 emit_modrm( p, dst, src ); 1499} 1500 1501void sse2_rcpss( struct x86_function *p, 1502 struct x86_reg dst, 1503 struct x86_reg src ) 1504{ 1505 DUMP_RR( dst, src ); 1506 emit_3ub(p, 0xF3, X86_TWOB, 0x53); 1507 emit_modrm( p, dst, src ); 1508} 1509 1510/*********************************************************************** 1511 * x87 instructions 1512 */ 1513static void note_x87_pop( struct x86_function *p ) 1514{ 1515 p->x87_stack--; 1516 assert(p->x87_stack >= 0); 1517} 1518 1519static void note_x87_push( struct x86_function *p ) 1520{ 1521 p->x87_stack++; 1522 assert(p->x87_stack <= 7); 1523} 1524 1525void x87_assert_stack_empty( struct x86_function *p ) 1526{ 1527 assert (p->x87_stack == 0); 1528} 1529 1530 1531void x87_fist( struct x86_function *p, struct x86_reg dst ) 1532{ 1533 DUMP_R( dst ); 1534 emit_1ub(p, 0xdb); 1535 emit_modrm_noreg(p, 2, dst); 1536} 1537 1538void x87_fistp( struct x86_function *p, struct x86_reg dst ) 1539{ 1540 DUMP_R( dst ); 1541 emit_1ub(p, 0xdb); 1542 emit_modrm_noreg(p, 3, dst); 1543 note_x87_pop(p); 1544} 1545 1546void x87_fild( struct x86_function *p, struct x86_reg arg ) 1547{ 1548 DUMP_R( arg ); 1549 emit_1ub(p, 0xdf); 1550 emit_modrm_noreg(p, 0, arg); 1551 note_x87_push(p); 1552} 1553 1554void x87_fldz( struct x86_function *p ) 1555{ 1556 DUMP(); 1557 emit_2ub(p, 0xd9, 0xee); 1558 note_x87_push(p); 1559} 1560 1561 1562void x87_fldcw( struct x86_function *p, struct x86_reg arg ) 1563{ 1564 DUMP_R( arg ); 1565 assert(arg.file == file_REG32); 1566 assert(arg.mod != mod_REG); 1567 emit_1ub(p, 0xd9); 1568 emit_modrm_noreg(p, 5, arg); 1569} 1570 1571void x87_fld1( struct x86_function *p ) 1572{ 1573 DUMP(); 1574 emit_2ub(p, 0xd9, 0xe8); 1575 note_x87_push(p); 1576} 1577 1578void x87_fldl2e( struct x86_function *p ) 1579{ 1580 DUMP(); 1581 emit_2ub(p, 0xd9, 0xea); 1582 note_x87_push(p); 1583} 1584 1585void x87_fldln2( struct x86_function *p ) 1586{ 1587 DUMP(); 1588 emit_2ub(p, 0xd9, 0xed); 1589 note_x87_push(p); 1590} 1591 1592void x87_fwait( struct x86_function *p ) 1593{ 1594 DUMP(); 1595 emit_1ub(p, 0x9b); 1596} 1597 1598void x87_fnclex( struct x86_function *p ) 1599{ 1600 DUMP(); 1601 emit_2ub(p, 0xdb, 0xe2); 1602} 1603 1604void x87_fclex( struct x86_function *p ) 1605{ 1606 x87_fwait(p); 1607 x87_fnclex(p); 1608} 1609 1610void x87_fcmovb( struct x86_function *p, struct x86_reg arg ) 1611{ 1612 DUMP_R( arg ); 1613 assert(arg.file == file_x87); 1614 emit_2ub(p, 0xda, 0xc0+arg.idx); 1615} 1616 1617void x87_fcmove( struct x86_function *p, struct x86_reg arg ) 1618{ 1619 DUMP_R( arg ); 1620 assert(arg.file == file_x87); 1621 emit_2ub(p, 0xda, 0xc8+arg.idx); 1622} 1623 1624void x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) 1625{ 1626 DUMP_R( arg ); 1627 assert(arg.file == file_x87); 1628 emit_2ub(p, 0xda, 0xd0+arg.idx); 1629} 1630 1631void x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) 1632{ 1633 DUMP_R( arg ); 1634 assert(arg.file == file_x87); 1635 emit_2ub(p, 0xdb, 0xc0+arg.idx); 1636} 1637 1638void x87_fcmovne( struct x86_function *p, struct x86_reg arg ) 1639{ 1640 DUMP_R( arg ); 1641 assert(arg.file == file_x87); 1642 emit_2ub(p, 0xdb, 0xc8+arg.idx); 1643} 1644 1645void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) 1646{ 1647 DUMP_R( arg ); 1648 assert(arg.file == file_x87); 1649 emit_2ub(p, 0xdb, 0xd0+arg.idx); 1650} 1651 1652 1653 1654static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, 1655 unsigned char dst0ub0, 1656 unsigned char dst0ub1, 1657 unsigned char arg0ub0, 1658 unsigned char arg0ub1, 1659 unsigned char argmem_noreg) 1660{ 1661 assert(dst.file == file_x87); 1662 1663 if (arg.file == file_x87) { 1664 if (dst.idx == 0) 1665 emit_2ub(p, dst0ub0, dst0ub1+arg.idx); 1666 else if (arg.idx == 0) 1667 emit_2ub(p, arg0ub0, arg0ub1+arg.idx); 1668 else 1669 assert(0); 1670 } 1671 else if (dst.idx == 0) { 1672 assert(arg.file == file_REG32); 1673 emit_1ub(p, 0xd8); 1674 emit_modrm_noreg(p, argmem_noreg, arg); 1675 } 1676 else 1677 assert(0); 1678} 1679 1680void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1681{ 1682 DUMP_RR( dst, src ); 1683 x87_arith_op(p, dst, src, 1684 0xd8, 0xc8, 1685 0xdc, 0xc8, 1686 4); 1687} 1688 1689void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1690{ 1691 DUMP_RR( dst, src ); 1692 x87_arith_op(p, dst, src, 1693 0xd8, 0xe0, 1694 0xdc, 0xe8, 1695 4); 1696} 1697 1698void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1699{ 1700 DUMP_RR( dst, src ); 1701 x87_arith_op(p, dst, src, 1702 0xd8, 0xe8, 1703 0xdc, 0xe0, 1704 5); 1705} 1706 1707void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1708{ 1709 DUMP_RR( dst, src ); 1710 x87_arith_op(p, dst, src, 1711 0xd8, 0xc0, 1712 0xdc, 0xc0, 1713 0); 1714} 1715 1716void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1717{ 1718 DUMP_RR( dst, src ); 1719 x87_arith_op(p, dst, src, 1720 0xd8, 0xf0, 1721 0xdc, 0xf8, 1722 6); 1723} 1724 1725void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1726{ 1727 DUMP_RR( dst, src ); 1728 x87_arith_op(p, dst, src, 1729 0xd8, 0xf8, 1730 0xdc, 0xf0, 1731 7); 1732} 1733 1734void x87_fmulp( struct x86_function *p, struct x86_reg dst ) 1735{ 1736 DUMP_R( dst ); 1737 assert(dst.file == file_x87); 1738 assert(dst.idx >= 1); 1739 emit_2ub(p, 0xde, 0xc8+dst.idx); 1740 note_x87_pop(p); 1741} 1742 1743void x87_fsubp( struct x86_function *p, struct x86_reg dst ) 1744{ 1745 DUMP_R( dst ); 1746 assert(dst.file == file_x87); 1747 assert(dst.idx >= 1); 1748 emit_2ub(p, 0xde, 0xe8+dst.idx); 1749 note_x87_pop(p); 1750} 1751 1752void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) 1753{ 1754 DUMP_R( dst ); 1755 assert(dst.file == file_x87); 1756 assert(dst.idx >= 1); 1757 emit_2ub(p, 0xde, 0xe0+dst.idx); 1758 note_x87_pop(p); 1759} 1760 1761void x87_faddp( struct x86_function *p, struct x86_reg dst ) 1762{ 1763 DUMP_R( dst ); 1764 assert(dst.file == file_x87); 1765 assert(dst.idx >= 1); 1766 emit_2ub(p, 0xde, 0xc0+dst.idx); 1767 note_x87_pop(p); 1768} 1769 1770void x87_fdivp( struct x86_function *p, struct x86_reg dst ) 1771{ 1772 DUMP_R( dst ); 1773 assert(dst.file == file_x87); 1774 assert(dst.idx >= 1); 1775 emit_2ub(p, 0xde, 0xf8+dst.idx); 1776 note_x87_pop(p); 1777} 1778 1779void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) 1780{ 1781 DUMP_R( dst ); 1782 assert(dst.file == file_x87); 1783 assert(dst.idx >= 1); 1784 emit_2ub(p, 0xde, 0xf0+dst.idx); 1785 note_x87_pop(p); 1786} 1787 1788void x87_ftst( struct x86_function *p ) 1789{ 1790 DUMP(); 1791 emit_2ub(p, 0xd9, 0xe4); 1792} 1793 1794void x87_fucom( struct x86_function *p, struct x86_reg arg ) 1795{ 1796 DUMP_R( arg ); 1797 assert(arg.file == file_x87); 1798 emit_2ub(p, 0xdd, 0xe0+arg.idx); 1799} 1800 1801void x87_fucomp( struct x86_function *p, struct x86_reg arg ) 1802{ 1803 DUMP_R( arg ); 1804 assert(arg.file == file_x87); 1805 emit_2ub(p, 0xdd, 0xe8+arg.idx); 1806 note_x87_pop(p); 1807} 1808 1809void x87_fucompp( struct x86_function *p ) 1810{ 1811 DUMP(); 1812 emit_2ub(p, 0xda, 0xe9); 1813 note_x87_pop(p); /* pop twice */ 1814 note_x87_pop(p); /* pop twice */ 1815} 1816 1817void x87_fxch( struct x86_function *p, struct x86_reg arg ) 1818{ 1819 DUMP_R( arg ); 1820 assert(arg.file == file_x87); 1821 emit_2ub(p, 0xd9, 0xc8+arg.idx); 1822} 1823 1824void x87_fabs( struct x86_function *p ) 1825{ 1826 DUMP(); 1827 emit_2ub(p, 0xd9, 0xe1); 1828} 1829 1830void x87_fchs( struct x86_function *p ) 1831{ 1832 DUMP(); 1833 emit_2ub(p, 0xd9, 0xe0); 1834} 1835 1836void x87_fcos( struct x86_function *p ) 1837{ 1838 DUMP(); 1839 emit_2ub(p, 0xd9, 0xff); 1840} 1841 1842 1843void x87_fprndint( struct x86_function *p ) 1844{ 1845 DUMP(); 1846 emit_2ub(p, 0xd9, 0xfc); 1847} 1848 1849void x87_fscale( struct x86_function *p ) 1850{ 1851 DUMP(); 1852 emit_2ub(p, 0xd9, 0xfd); 1853} 1854 1855void x87_fsin( struct x86_function *p ) 1856{ 1857 DUMP(); 1858 emit_2ub(p, 0xd9, 0xfe); 1859} 1860 1861void x87_fsincos( struct x86_function *p ) 1862{ 1863 DUMP(); 1864 emit_2ub(p, 0xd9, 0xfb); 1865} 1866 1867void x87_fsqrt( struct x86_function *p ) 1868{ 1869 DUMP(); 1870 emit_2ub(p, 0xd9, 0xfa); 1871} 1872 1873void x87_fxtract( struct x86_function *p ) 1874{ 1875 DUMP(); 1876 emit_2ub(p, 0xd9, 0xf4); 1877} 1878 1879/* st0 = (2^st0)-1 1880 * 1881 * Restrictions: -1.0 <= st0 <= 1.0 1882 */ 1883void x87_f2xm1( struct x86_function *p ) 1884{ 1885 DUMP(); 1886 emit_2ub(p, 0xd9, 0xf0); 1887} 1888 1889/* st1 = st1 * log2(st0); 1890 * pop_stack; 1891 */ 1892void x87_fyl2x( struct x86_function *p ) 1893{ 1894 DUMP(); 1895 emit_2ub(p, 0xd9, 0xf1); 1896 note_x87_pop(p); 1897} 1898 1899/* st1 = st1 * log2(st0 + 1.0); 1900 * pop_stack; 1901 * 1902 * A fast operation, with restrictions: -.29 < st0 < .29 1903 */ 1904void x87_fyl2xp1( struct x86_function *p ) 1905{ 1906 DUMP(); 1907 emit_2ub(p, 0xd9, 0xf9); 1908 note_x87_pop(p); 1909} 1910 1911 1912void x87_fld( struct x86_function *p, struct x86_reg arg ) 1913{ 1914 DUMP_R( arg ); 1915 if (arg.file == file_x87) 1916 emit_2ub(p, 0xd9, 0xc0 + arg.idx); 1917 else { 1918 emit_1ub(p, 0xd9); 1919 emit_modrm_noreg(p, 0, arg); 1920 } 1921 note_x87_push(p); 1922} 1923 1924void x87_fst( struct x86_function *p, struct x86_reg dst ) 1925{ 1926 DUMP_R( dst ); 1927 if (dst.file == file_x87) 1928 emit_2ub(p, 0xdd, 0xd0 + dst.idx); 1929 else { 1930 emit_1ub(p, 0xd9); 1931 emit_modrm_noreg(p, 2, dst); 1932 } 1933} 1934 1935void x87_fstp( struct x86_function *p, struct x86_reg dst ) 1936{ 1937 DUMP_R( dst ); 1938 if (dst.file == file_x87) 1939 emit_2ub(p, 0xdd, 0xd8 + dst.idx); 1940 else { 1941 emit_1ub(p, 0xd9); 1942 emit_modrm_noreg(p, 3, dst); 1943 } 1944 note_x87_pop(p); 1945} 1946 1947void x87_fpop( struct x86_function *p ) 1948{ 1949 x87_fstp( p, x86_make_reg( file_x87, 0 )); 1950} 1951 1952 1953void x87_fcom( struct x86_function *p, struct x86_reg dst ) 1954{ 1955 DUMP_R( dst ); 1956 if (dst.file == file_x87) 1957 emit_2ub(p, 0xd8, 0xd0 + dst.idx); 1958 else { 1959 emit_1ub(p, 0xd8); 1960 emit_modrm_noreg(p, 2, dst); 1961 } 1962} 1963 1964 1965void x87_fcomp( struct x86_function *p, struct x86_reg dst ) 1966{ 1967 DUMP_R( dst ); 1968 if (dst.file == file_x87) 1969 emit_2ub(p, 0xd8, 0xd8 + dst.idx); 1970 else { 1971 emit_1ub(p, 0xd8); 1972 emit_modrm_noreg(p, 3, dst); 1973 } 1974 note_x87_pop(p); 1975} 1976 1977void x87_fcomi( struct x86_function *p, struct x86_reg arg ) 1978{ 1979 DUMP_R( arg ); 1980 emit_2ub(p, 0xdb, 0xf0+arg.idx); 1981} 1982 1983void x87_fcomip( struct x86_function *p, struct x86_reg arg ) 1984{ 1985 DUMP_R( arg ); 1986 emit_2ub(p, 0xdb, 0xf0+arg.idx); 1987 note_x87_pop(p); 1988} 1989 1990 1991void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) 1992{ 1993 DUMP_R( dst ); 1994 assert(dst.file == file_REG32); 1995 1996 if (dst.idx == reg_AX && 1997 dst.mod == mod_REG) 1998 emit_2ub(p, 0xdf, 0xe0); 1999 else { 2000 emit_1ub(p, 0xdd); 2001 emit_modrm_noreg(p, 7, dst); 2002 } 2003} 2004 2005 2006void x87_fnstcw( struct x86_function *p, struct x86_reg dst ) 2007{ 2008 DUMP_R( dst ); 2009 assert(dst.file == file_REG32); 2010 2011 emit_1ub(p, 0x9b); /* WAIT -- needed? */ 2012 emit_1ub(p, 0xd9); 2013 emit_modrm_noreg(p, 7, dst); 2014} 2015 2016 2017 2018 2019/*********************************************************************** 2020 * MMX instructions 2021 */ 2022 2023void mmx_emms( struct x86_function *p ) 2024{ 2025 DUMP(); 2026 assert(p->need_emms); 2027 emit_2ub(p, 0x0f, 0x77); 2028 p->need_emms = 0; 2029} 2030 2031void mmx_packssdw( struct x86_function *p, 2032 struct x86_reg dst, 2033 struct x86_reg src ) 2034{ 2035 DUMP_RR( dst, src ); 2036 assert(dst.file == file_MMX && 2037 (src.file == file_MMX || src.mod != mod_REG)); 2038 2039 p->need_emms = 1; 2040 2041 emit_2ub(p, X86_TWOB, 0x6b); 2042 emit_modrm( p, dst, src ); 2043} 2044 2045void mmx_packuswb( struct x86_function *p, 2046 struct x86_reg dst, 2047 struct x86_reg src ) 2048{ 2049 DUMP_RR( dst, src ); 2050 assert(dst.file == file_MMX && 2051 (src.file == file_MMX || src.mod != mod_REG)); 2052 2053 p->need_emms = 1; 2054 2055 emit_2ub(p, X86_TWOB, 0x67); 2056 emit_modrm( p, dst, src ); 2057} 2058 2059void mmx_movd( struct x86_function *p, 2060 struct x86_reg dst, 2061 struct x86_reg src ) 2062{ 2063 DUMP_RR( dst, src ); 2064 p->need_emms = 1; 2065 emit_1ub(p, X86_TWOB); 2066 emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 2067} 2068 2069void mmx_movq( struct x86_function *p, 2070 struct x86_reg dst, 2071 struct x86_reg src ) 2072{ 2073 DUMP_RR( dst, src ); 2074 p->need_emms = 1; 2075 emit_1ub(p, X86_TWOB); 2076 emit_op_modrm( p, 0x6f, 0x7f, dst, src ); 2077} 2078 2079 2080/*********************************************************************** 2081 * Helper functions 2082 */ 2083 2084 2085void x86_cdecl_caller_push_regs( struct x86_function *p ) 2086{ 2087 x86_push(p, x86_make_reg(file_REG32, reg_AX)); 2088 x86_push(p, x86_make_reg(file_REG32, reg_CX)); 2089 x86_push(p, x86_make_reg(file_REG32, reg_DX)); 2090} 2091 2092void x86_cdecl_caller_pop_regs( struct x86_function *p ) 2093{ 2094 x86_pop(p, x86_make_reg(file_REG32, reg_DX)); 2095 x86_pop(p, x86_make_reg(file_REG32, reg_CX)); 2096 x86_pop(p, x86_make_reg(file_REG32, reg_AX)); 2097} 2098 2099 2100struct x86_reg x86_fn_arg( struct x86_function *p, 2101 unsigned arg ) 2102{ 2103 switch(x86_target(p)) 2104 { 2105 case X86_64_WIN64_ABI: 2106 /* Microsoft uses a different calling convention than the rest of the world */ 2107 switch(arg) 2108 { 2109 case 1: 2110 return x86_make_reg(file_REG32, reg_CX); 2111 case 2: 2112 return x86_make_reg(file_REG32, reg_DX); 2113 case 3: 2114 return x86_make_reg(file_REG32, reg_R8); 2115 case 4: 2116 return x86_make_reg(file_REG32, reg_R9); 2117 default: 2118 /* Win64 allocates stack slots as if it pushed the first 4 arguments too */ 2119 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2120 p->stack_offset + arg * 8); 2121 } 2122 case X86_64_STD_ABI: 2123 switch(arg) 2124 { 2125 case 1: 2126 return x86_make_reg(file_REG32, reg_DI); 2127 case 2: 2128 return x86_make_reg(file_REG32, reg_SI); 2129 case 3: 2130 return x86_make_reg(file_REG32, reg_DX); 2131 case 4: 2132 return x86_make_reg(file_REG32, reg_CX); 2133 case 5: 2134 return x86_make_reg(file_REG32, reg_R8); 2135 case 6: 2136 return x86_make_reg(file_REG32, reg_R9); 2137 default: 2138 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2139 p->stack_offset + (arg - 6) * 8); /* ??? */ 2140 } 2141 case X86_32: 2142 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2143 p->stack_offset + arg * 4); /* ??? */ 2144 default: 2145 assert(0 && "Unexpected x86 target ABI in x86_fn_arg"); 2146 return x86_make_reg(file_REG32, reg_CX); /* not used / silence warning */ 2147 } 2148} 2149 2150static void x86_init_func_common( struct x86_function *p ) 2151{ 2152 util_cpu_detect(); 2153 p->caps = 0; 2154 if(util_cpu_caps.has_mmx) 2155 p->caps |= X86_MMX; 2156 if(util_cpu_caps.has_mmx2) 2157 p->caps |= X86_MMX2; 2158 if(util_cpu_caps.has_sse) 2159 p->caps |= X86_SSE; 2160 if(util_cpu_caps.has_sse2) 2161 p->caps |= X86_SSE2; 2162 if(util_cpu_caps.has_sse3) 2163 p->caps |= X86_SSE3; 2164 if(util_cpu_caps.has_sse4_1) 2165 p->caps |= X86_SSE4_1; 2166 p->csr = p->store; 2167 DUMP_START(); 2168} 2169 2170void x86_init_func( struct x86_function *p ) 2171{ 2172 p->size = 0; 2173 p->store = NULL; 2174 x86_init_func_common(p); 2175} 2176 2177void x86_init_func_size( struct x86_function *p, unsigned code_size ) 2178{ 2179 p->size = code_size; 2180 p->store = rtasm_exec_malloc(code_size); 2181 if (p->store == NULL) { 2182 p->store = p->error_overflow; 2183 } 2184 x86_init_func_common(p); 2185} 2186 2187void x86_release_func( struct x86_function *p ) 2188{ 2189 if (p->store && p->store != p->error_overflow) 2190 rtasm_exec_free(p->store); 2191 2192 p->store = NULL; 2193 p->csr = NULL; 2194 p->size = 0; 2195} 2196 2197 2198static INLINE x86_func 2199voidptr_to_x86_func(void *v) 2200{ 2201 union { 2202 void *v; 2203 x86_func f; 2204 } u; 2205 assert(sizeof(u.v) == sizeof(u.f)); 2206 u.v = v; 2207 return u.f; 2208} 2209 2210 2211x86_func x86_get_func( struct x86_function *p ) 2212{ 2213 DUMP_END(); 2214 if (DISASSEM && p->store) 2215 debug_printf("disassemble %p %p\n", p->store, p->csr); 2216 2217 if (p->store == p->error_overflow) 2218 return voidptr_to_x86_func(NULL); 2219 else 2220 return voidptr_to_x86_func(p->store); 2221} 2222 2223#else 2224 2225void x86sse_dummy( void ); 2226 2227void x86sse_dummy( void ) 2228{ 2229} 2230 2231#endif 2232