1/************************************************************************** 2 * 3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included 13 * in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 **************************************************************************/ 24 25#include "pipe/p_config.h" 26#include "util/u_cpu_detect.h" 27 28#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 29 30#include "pipe/p_compiler.h" 31#include "util/u_debug.h" 32#include "util/u_pointer.h" 33 34#include "rtasm_execmem.h" 35#include "rtasm_x86sse.h" 36 37#define DISASSEM 0 38#define X86_TWOB 0x0f 39 40 41#define DUMP_SSE 0 42 43 44void x86_print_reg( struct x86_reg reg ) 45{ 46 if (reg.mod != mod_REG) 47 debug_printf( "[" ); 48 49 switch( reg.file ) { 50 case file_REG32: 51 switch( reg.idx ) { 52 case reg_AX: debug_printf( "EAX" ); break; 53 case reg_CX: debug_printf( "ECX" ); break; 54 case reg_DX: debug_printf( "EDX" ); break; 55 case reg_BX: debug_printf( "EBX" ); break; 56 case reg_SP: debug_printf( "ESP" ); break; 57 case reg_BP: debug_printf( "EBP" ); break; 58 case reg_SI: debug_printf( "ESI" ); break; 59 case reg_DI: debug_printf( "EDI" ); break; 60 } 61 break; 62 case file_MMX: 63 debug_printf( "MMX%u", reg.idx ); 64 break; 65 case file_XMM: 66 debug_printf( "XMM%u", reg.idx ); 67 break; 68 case file_x87: 69 debug_printf( "fp%u", reg.idx ); 70 break; 71 } 72 73 if (reg.mod == mod_DISP8 || 74 reg.mod == mod_DISP32) 75 debug_printf("+%d", reg.disp); 76 77 if (reg.mod != mod_REG) 78 debug_printf( "]" ); 79} 80 81#if DUMP_SSE 82 83#define DUMP_START() debug_printf( "\n" ) 84#define DUMP_END() debug_printf( "\n" ) 85 86#define DUMP() do { \ 87 const char *foo = __FUNCTION__; \ 88 while (*foo && *foo != '_') \ 89 foo++; \ 90 if (*foo) \ 91 foo++; \ 92 debug_printf( "\n%4x %14s ", p->csr - p->store, foo ); \ 93} while (0) 94 95#define DUMP_I( I ) do { \ 96 DUMP(); \ 97 debug_printf( "%u", I ); \ 98} while( 0 ) 99 100#define DUMP_R( R0 ) do { \ 101 DUMP(); \ 102 x86_print_reg( R0 ); \ 103} while( 0 ) 104 105#define DUMP_RR( R0, R1 ) do { \ 106 DUMP(); \ 107 x86_print_reg( R0 ); \ 108 debug_printf( ", " ); \ 109 x86_print_reg( R1 ); \ 110} while( 0 ) 111 112#define DUMP_RI( R0, I ) do { \ 113 DUMP(); \ 114 x86_print_reg( R0 ); \ 115 debug_printf( ", %u", I ); \ 116} while( 0 ) 117 118#define DUMP_RRI( R0, R1, I ) do { \ 119 DUMP(); \ 120 x86_print_reg( R0 ); \ 121 debug_printf( ", " ); \ 122 x86_print_reg( R1 ); \ 123 debug_printf( ", %u", I ); \ 124} while( 0 ) 125 126#else 127 128#define DUMP_START() 129#define DUMP_END() 130#define DUMP( ) 131#define DUMP_I( I ) 132#define DUMP_R( R0 ) 133#define DUMP_RR( R0, R1 ) 134#define DUMP_RI( R0, I ) 135#define DUMP_RRI( R0, R1, I ) 136 137#endif 138 139 140static void do_realloc( struct x86_function *p ) 141{ 142 if (p->store == p->error_overflow) { 143 p->csr = p->store; 144 } 145 else if (p->size == 0) { 146 p->size = 1024; 147 p->store = rtasm_exec_malloc(p->size); 148 p->csr = p->store; 149 } 150 else { 151 uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store ); 152 unsigned char *tmp = p->store; 153 p->size *= 2; 154 p->store = rtasm_exec_malloc(p->size); 155 156 if (p->store) { 157 memcpy(p->store, tmp, used); 158 p->csr = p->store + used; 159 } 160 else { 161 p->csr = p->store; 162 } 163 164 rtasm_exec_free(tmp); 165 } 166 167 if (p->store == NULL) { 168 p->store = p->csr = p->error_overflow; 169 p->size = sizeof(p->error_overflow); 170 } 171} 172 173/* Emit bytes to the instruction stream: 174 */ 175static unsigned char *reserve( struct x86_function *p, int bytes ) 176{ 177 if (p->csr + bytes - p->store > (int) p->size) 178 do_realloc(p); 179 180 { 181 unsigned char *csr = p->csr; 182 p->csr += bytes; 183 return csr; 184 } 185} 186 187 188 189static void emit_1b( struct x86_function *p, char b0 ) 190{ 191 char *csr = (char *)reserve(p, 1); 192 *csr = b0; 193} 194 195static void emit_1i( struct x86_function *p, int i0 ) 196{ 197 int *icsr = (int *)reserve(p, sizeof(i0)); 198 *icsr = i0; 199} 200 201static void emit_1ub( struct x86_function *p, unsigned char b0 ) 202{ 203 unsigned char *csr = reserve(p, 1); 204 *csr++ = b0; 205} 206 207static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) 208{ 209 unsigned char *csr = reserve(p, 2); 210 *csr++ = b0; 211 *csr++ = b1; 212} 213 214static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) 215{ 216 unsigned char *csr = reserve(p, 3); 217 *csr++ = b0; 218 *csr++ = b1; 219 *csr++ = b2; 220} 221 222 223/* Build a modRM byte + possible displacement. No treatment of SIB 224 * indexing. BZZT - no way to encode an absolute address. 225 * 226 * This is the "/r" field in the x86 manuals... 227 */ 228static void emit_modrm( struct x86_function *p, 229 struct x86_reg reg, 230 struct x86_reg regmem ) 231{ 232 unsigned char val = 0; 233 234 assert(reg.mod == mod_REG); 235 236 /* TODO: support extended x86-64 registers */ 237 assert(reg.idx < 8); 238 assert(regmem.idx < 8); 239 240 val |= regmem.mod << 6; /* mod field */ 241 val |= reg.idx << 3; /* reg field */ 242 val |= regmem.idx; /* r/m field */ 243 244 emit_1ub(p, val); 245 246 /* Oh-oh we've stumbled into the SIB thing. 247 */ 248 if (regmem.file == file_REG32 && 249 regmem.idx == reg_SP && 250 regmem.mod != mod_REG) { 251 emit_1ub(p, 0x24); /* simplistic! */ 252 } 253 254 switch (regmem.mod) { 255 case mod_REG: 256 case mod_INDIRECT: 257 break; 258 case mod_DISP8: 259 emit_1b(p, (char) regmem.disp); 260 break; 261 case mod_DISP32: 262 emit_1i(p, regmem.disp); 263 break; 264 default: 265 assert(0); 266 break; 267 } 268} 269 270/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. 271 */ 272static void emit_modrm_noreg( struct x86_function *p, 273 unsigned op, 274 struct x86_reg regmem ) 275{ 276 struct x86_reg dummy = x86_make_reg(file_REG32, op); 277 emit_modrm(p, dummy, regmem); 278} 279 280/* Many x86 instructions have two opcodes to cope with the situations 281 * where the destination is a register or memory reference 282 * respectively. This function selects the correct opcode based on 283 * the arguments presented. 284 */ 285static void emit_op_modrm( struct x86_function *p, 286 unsigned char op_dst_is_reg, 287 unsigned char op_dst_is_mem, 288 struct x86_reg dst, 289 struct x86_reg src ) 290{ 291 switch (dst.mod) { 292 case mod_REG: 293 emit_1ub(p, op_dst_is_reg); 294 emit_modrm(p, dst, src); 295 break; 296 case mod_INDIRECT: 297 case mod_DISP32: 298 case mod_DISP8: 299 assert(src.mod == mod_REG); 300 emit_1ub(p, op_dst_is_mem); 301 emit_modrm(p, src, dst); 302 break; 303 default: 304 assert(0); 305 break; 306 } 307} 308 309 310 311 312 313 314 315/* Create and manipulate registers and regmem values: 316 */ 317struct x86_reg x86_make_reg( enum x86_reg_file file, 318 enum x86_reg_name idx ) 319{ 320 struct x86_reg reg; 321 322 reg.file = file; 323 reg.idx = idx; 324 reg.mod = mod_REG; 325 reg.disp = 0; 326 327 return reg; 328} 329 330struct x86_reg x86_make_disp( struct x86_reg reg, 331 int disp ) 332{ 333 assert(reg.file == file_REG32); 334 335 if (reg.mod == mod_REG) 336 reg.disp = disp; 337 else 338 reg.disp += disp; 339 340 if (reg.disp == 0 && reg.idx != reg_BP) 341 reg.mod = mod_INDIRECT; 342 else if (reg.disp <= 127 && reg.disp >= -128) 343 reg.mod = mod_DISP8; 344 else 345 reg.mod = mod_DISP32; 346 347 return reg; 348} 349 350struct x86_reg x86_deref( struct x86_reg reg ) 351{ 352 return x86_make_disp(reg, 0); 353} 354 355struct x86_reg x86_get_base_reg( struct x86_reg reg ) 356{ 357 return x86_make_reg( reg.file, reg.idx ); 358} 359 360int x86_get_label( struct x86_function *p ) 361{ 362 return p->csr - p->store; 363} 364 365 366 367/*********************************************************************** 368 * x86 instructions 369 */ 370 371 372void x64_rexw(struct x86_function *p) 373{ 374 if(x86_target(p) != X86_32) 375 emit_1ub(p, 0x48); 376} 377 378void x86_jcc( struct x86_function *p, 379 enum x86_cc cc, 380 int label ) 381{ 382 int offset = label - (x86_get_label(p) + 2); 383 DUMP_I(cc); 384 385 if (offset < 0) { 386 /*assert(p->csr - p->store > -offset);*/ 387 if (p->csr - p->store <= -offset) { 388 /* probably out of memory (using the error_overflow buffer) */ 389 return; 390 } 391 } 392 393 if (offset <= 127 && offset >= -128) { 394 emit_1ub(p, 0x70 + cc); 395 emit_1b(p, (char) offset); 396 } 397 else { 398 offset = label - (x86_get_label(p) + 6); 399 emit_2ub(p, 0x0f, 0x80 + cc); 400 emit_1i(p, offset); 401 } 402} 403 404/* Always use a 32bit offset for forward jumps: 405 */ 406int x86_jcc_forward( struct x86_function *p, 407 enum x86_cc cc ) 408{ 409 DUMP_I(cc); 410 emit_2ub(p, 0x0f, 0x80 + cc); 411 emit_1i(p, 0); 412 return x86_get_label(p); 413} 414 415int x86_jmp_forward( struct x86_function *p) 416{ 417 DUMP(); 418 emit_1ub(p, 0xe9); 419 emit_1i(p, 0); 420 return x86_get_label(p); 421} 422 423int x86_call_forward( struct x86_function *p) 424{ 425 DUMP(); 426 427 emit_1ub(p, 0xe8); 428 emit_1i(p, 0); 429 return x86_get_label(p); 430} 431 432/* Fixup offset from forward jump: 433 */ 434void x86_fixup_fwd_jump( struct x86_function *p, 435 int fixup ) 436{ 437 *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup; 438} 439 440void x86_jmp( struct x86_function *p, int label) 441{ 442 DUMP_I( label ); 443 emit_1ub(p, 0xe9); 444 emit_1i(p, label - x86_get_label(p) - 4); 445} 446 447void x86_call( struct x86_function *p, struct x86_reg reg) 448{ 449 DUMP_R( reg ); 450 emit_1ub(p, 0xff); 451 emit_modrm_noreg(p, 2, reg); 452} 453 454 455void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) 456{ 457 DUMP_RI( dst, imm ); 458 assert(dst.file == file_REG32); 459 assert(dst.mod == mod_REG); 460 emit_1ub(p, 0xb8 + dst.idx); 461 emit_1i(p, imm); 462} 463 464void x86_mov_imm( struct x86_function *p, struct x86_reg dst, int imm ) 465{ 466 DUMP_RI( dst, imm ); 467 if(dst.mod == mod_REG) 468 x86_mov_reg_imm(p, dst, imm); 469 else 470 { 471 emit_1ub(p, 0xc7); 472 emit_modrm_noreg(p, 0, dst); 473 emit_1i(p, imm); 474 } 475} 476 477void x86_mov16_imm( struct x86_function *p, struct x86_reg dst, uint16_t imm ) 478{ 479 DUMP_RI( dst, imm ); 480 emit_1ub(p, 0x66); 481 if(dst.mod == mod_REG) 482 { 483 emit_1ub(p, 0xb8 + dst.idx); 484 emit_2ub(p, imm & 0xff, imm >> 8); 485 } 486 else 487 { 488 emit_1ub(p, 0xc7); 489 emit_modrm_noreg(p, 0, dst); 490 emit_2ub(p, imm & 0xff, imm >> 8); 491 } 492} 493 494void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm ) 495{ 496 DUMP_RI( dst, imm ); 497 if(dst.mod == mod_REG) 498 { 499 emit_1ub(p, 0xb0 + dst.idx); 500 emit_1ub(p, imm); 501 } 502 else 503 { 504 emit_1ub(p, 0xc6); 505 emit_modrm_noreg(p, 0, dst); 506 emit_1ub(p, imm); 507 } 508} 509 510/** 511 * Immediate group 1 instructions. 512 */ 513static inline void 514x86_group1_imm( struct x86_function *p, 515 unsigned op, struct x86_reg dst, int imm ) 516{ 517 assert(dst.file == file_REG32); 518 assert(dst.mod == mod_REG); 519 if(-0x80 <= imm && imm < 0x80) { 520 emit_1ub(p, 0x83); 521 emit_modrm_noreg(p, op, dst); 522 emit_1b(p, (char)imm); 523 } 524 else { 525 emit_1ub(p, 0x81); 526 emit_modrm_noreg(p, op, dst); 527 emit_1i(p, imm); 528 } 529} 530 531void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) 532{ 533 DUMP_RI( dst, imm ); 534 x86_group1_imm(p, 0, dst, imm); 535} 536 537void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) 538{ 539 DUMP_RI( dst, imm ); 540 x86_group1_imm(p, 1, dst, imm); 541} 542 543void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) 544{ 545 DUMP_RI( dst, imm ); 546 x86_group1_imm(p, 4, dst, imm); 547} 548 549void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) 550{ 551 DUMP_RI( dst, imm ); 552 x86_group1_imm(p, 5, dst, imm); 553} 554 555void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) 556{ 557 DUMP_RI( dst, imm ); 558 x86_group1_imm(p, 6, dst, imm); 559} 560 561void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) 562{ 563 DUMP_RI( dst, imm ); 564 x86_group1_imm(p, 7, dst, imm); 565} 566 567 568void x86_push( struct x86_function *p, 569 struct x86_reg reg ) 570{ 571 DUMP_R( reg ); 572 if (reg.mod == mod_REG) 573 emit_1ub(p, 0x50 + reg.idx); 574 else 575 { 576 emit_1ub(p, 0xff); 577 emit_modrm_noreg(p, 6, reg); 578 } 579 580 581 p->stack_offset += sizeof(void*); 582} 583 584void x86_push_imm32( struct x86_function *p, 585 int imm32 ) 586{ 587 DUMP_I( imm32 ); 588 emit_1ub(p, 0x68); 589 emit_1i(p, imm32); 590 591 p->stack_offset += sizeof(void*); 592} 593 594 595void x86_pop( struct x86_function *p, 596 struct x86_reg reg ) 597{ 598 DUMP_R( reg ); 599 assert(reg.mod == mod_REG); 600 emit_1ub(p, 0x58 + reg.idx); 601 p->stack_offset -= sizeof(void*); 602} 603 604void x86_inc( struct x86_function *p, 605 struct x86_reg reg ) 606{ 607 DUMP_R( reg ); 608 if(x86_target(p) == X86_32 && reg.mod == mod_REG) 609 { 610 emit_1ub(p, 0x40 + reg.idx); 611 return; 612 } 613 emit_1ub(p, 0xff); 614 emit_modrm_noreg(p, 0, reg); 615} 616 617void x86_dec( struct x86_function *p, 618 struct x86_reg reg ) 619{ 620 DUMP_R( reg ); 621 if(x86_target(p) == X86_32 && reg.mod == mod_REG) 622 { 623 emit_1ub(p, 0x48 + reg.idx); 624 return; 625 } 626 emit_1ub(p, 0xff); 627 emit_modrm_noreg(p, 1, reg); 628} 629 630void x86_ret( struct x86_function *p ) 631{ 632 DUMP(); 633 assert(p->stack_offset == 0); 634 emit_1ub(p, 0xc3); 635} 636 637void x86_retw( struct x86_function *p, unsigned short imm ) 638{ 639 DUMP(); 640 emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff); 641} 642 643void x86_sahf( struct x86_function *p ) 644{ 645 DUMP(); 646 emit_1ub(p, 0x9e); 647} 648 649void x86_mov( struct x86_function *p, 650 struct x86_reg dst, 651 struct x86_reg src ) 652{ 653 DUMP_RR( dst, src ); 654 /* special hack for reading arguments until we support x86-64 registers everywhere */ 655 if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) 656 { 657 uint8_t rex = 0x40; 658 if(dst.idx >= 8) 659 { 660 rex |= 4; 661 dst.idx -= 8; 662 } 663 if(src.idx >= 8) 664 { 665 rex |= 1; 666 src.idx -= 8; 667 } 668 emit_1ub(p, rex); 669 } 670 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 671} 672 673void x86_mov16( struct x86_function *p, 674 struct x86_reg dst, 675 struct x86_reg src ) 676{ 677 DUMP_RR( dst, src ); 678 emit_1ub(p, 0x66); 679 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 680} 681 682void x86_mov8( struct x86_function *p, 683 struct x86_reg dst, 684 struct x86_reg src ) 685{ 686 DUMP_RR( dst, src ); 687 emit_op_modrm( p, 0x8a, 0x88, dst, src ); 688} 689 690void x64_mov64( struct x86_function *p, 691 struct x86_reg dst, 692 struct x86_reg src ) 693{ 694 uint8_t rex = 0x48; 695 DUMP_RR( dst, src ); 696 assert(x86_target(p) != X86_32); 697 698 /* special hack for reading arguments until we support x86-64 registers everywhere */ 699 if(src.mod == mod_REG && dst.mod == mod_REG && (src.idx >= 8 || dst.idx >= 8)) 700 { 701 if(dst.idx >= 8) 702 { 703 rex |= 4; 704 dst.idx -= 8; 705 } 706 if(src.idx >= 8) 707 { 708 rex |= 1; 709 src.idx -= 8; 710 } 711 } 712 emit_1ub(p, rex); 713 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 714} 715 716void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 717{ 718 DUMP_RR( dst, src ); 719 emit_2ub(p, 0x0f, 0xb6); 720 emit_modrm(p, dst, src); 721} 722 723void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 724{ 725 DUMP_RR( dst, src ); 726 emit_2ub(p, 0x0f, 0xb7); 727 emit_modrm(p, dst, src); 728} 729 730void x86_cmovcc( struct x86_function *p, 731 struct x86_reg dst, 732 struct x86_reg src, 733 enum x86_cc cc) 734{ 735 DUMP_RRI( dst, src, cc ); 736 emit_2ub( p, 0x0f, 0x40 + cc ); 737 emit_modrm( p, dst, src ); 738} 739 740void x86_xor( struct x86_function *p, 741 struct x86_reg dst, 742 struct x86_reg src ) 743{ 744 DUMP_RR( dst, src ); 745 emit_op_modrm( p, 0x33, 0x31, dst, src ); 746} 747 748void x86_cmp( struct x86_function *p, 749 struct x86_reg dst, 750 struct x86_reg src ) 751{ 752 DUMP_RR( dst, src ); 753 emit_op_modrm( p, 0x3b, 0x39, dst, src ); 754} 755 756void x86_lea( struct x86_function *p, 757 struct x86_reg dst, 758 struct x86_reg src ) 759{ 760 DUMP_RR( dst, src ); 761 emit_1ub(p, 0x8d); 762 emit_modrm( p, dst, src ); 763} 764 765void x86_test( struct x86_function *p, 766 struct x86_reg dst, 767 struct x86_reg src ) 768{ 769 DUMP_RR( dst, src ); 770 emit_1ub(p, 0x85); 771 emit_modrm( p, dst, src ); 772} 773 774void x86_add( struct x86_function *p, 775 struct x86_reg dst, 776 struct x86_reg src ) 777{ 778 DUMP_RR( dst, src ); 779 emit_op_modrm(p, 0x03, 0x01, dst, src ); 780} 781 782/* Calculate EAX * src, results in EDX:EAX. 783 */ 784void x86_mul( struct x86_function *p, 785 struct x86_reg src ) 786{ 787 DUMP_R( src ); 788 emit_1ub(p, 0xf7); 789 emit_modrm_noreg(p, 4, src ); 790} 791 792 793void x86_imul( struct x86_function *p, 794 struct x86_reg dst, 795 struct x86_reg src ) 796{ 797 DUMP_RR( dst, src ); 798 emit_2ub(p, X86_TWOB, 0xAF); 799 emit_modrm(p, dst, src); 800} 801 802 803void x86_sub( struct x86_function *p, 804 struct x86_reg dst, 805 struct x86_reg src ) 806{ 807 DUMP_RR( dst, src ); 808 emit_op_modrm(p, 0x2b, 0x29, dst, src ); 809} 810 811void x86_or( struct x86_function *p, 812 struct x86_reg dst, 813 struct x86_reg src ) 814{ 815 DUMP_RR( dst, src ); 816 emit_op_modrm( p, 0x0b, 0x09, dst, src ); 817} 818 819void x86_and( struct x86_function *p, 820 struct x86_reg dst, 821 struct x86_reg src ) 822{ 823 DUMP_RR( dst, src ); 824 emit_op_modrm( p, 0x23, 0x21, dst, src ); 825} 826 827void x86_div( struct x86_function *p, 828 struct x86_reg src ) 829{ 830 assert(src.file == file_REG32 && src.mod == mod_REG); 831 emit_op_modrm(p, 0xf7, 0, x86_make_reg(file_REG32, 6), src); 832} 833 834void x86_bswap( struct x86_function *p, struct x86_reg reg ) 835{ 836 DUMP_R(reg); 837 assert(reg.file == file_REG32); 838 assert(reg.mod == mod_REG); 839 emit_2ub(p, 0x0f, 0xc8 + reg.idx); 840} 841 842void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 843{ 844 DUMP_RI(reg, imm); 845 if(imm == 1) 846 { 847 emit_1ub(p, 0xd1); 848 emit_modrm_noreg(p, 5, reg); 849 } 850 else 851 { 852 emit_1ub(p, 0xc1); 853 emit_modrm_noreg(p, 5, reg); 854 emit_1ub(p, imm); 855 } 856} 857 858void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 859{ 860 DUMP_RI(reg, imm); 861 if(imm == 1) 862 { 863 emit_1ub(p, 0xd1); 864 emit_modrm_noreg(p, 7, reg); 865 } 866 else 867 { 868 emit_1ub(p, 0xc1); 869 emit_modrm_noreg(p, 7, reg); 870 emit_1ub(p, imm); 871 } 872} 873 874void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm ) 875{ 876 DUMP_RI(reg, imm); 877 if(imm == 1) 878 { 879 emit_1ub(p, 0xd1); 880 emit_modrm_noreg(p, 4, reg); 881 } 882 else 883 { 884 emit_1ub(p, 0xc1); 885 emit_modrm_noreg(p, 4, reg); 886 emit_1ub(p, imm); 887 } 888} 889 890 891/*********************************************************************** 892 * SSE instructions 893 */ 894 895void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) 896{ 897 DUMP_R( ptr ); 898 assert(ptr.mod != mod_REG); 899 emit_2ub(p, 0x0f, 0x18); 900 emit_modrm_noreg(p, 0, ptr); 901} 902 903void sse_prefetch0( struct x86_function *p, struct x86_reg ptr) 904{ 905 DUMP_R( ptr ); 906 assert(ptr.mod != mod_REG); 907 emit_2ub(p, 0x0f, 0x18); 908 emit_modrm_noreg(p, 1, ptr); 909} 910 911void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) 912{ 913 DUMP_R( ptr ); 914 assert(ptr.mod != mod_REG); 915 emit_2ub(p, 0x0f, 0x18); 916 emit_modrm_noreg(p, 2, ptr); 917} 918 919void sse_movntps( struct x86_function *p, 920 struct x86_reg dst, 921 struct x86_reg src) 922{ 923 DUMP_RR( dst, src ); 924 925 assert(dst.mod != mod_REG); 926 assert(src.mod == mod_REG); 927 emit_2ub(p, 0x0f, 0x2b); 928 emit_modrm(p, src, dst); 929} 930 931 932 933 934void sse_movss( struct x86_function *p, 935 struct x86_reg dst, 936 struct x86_reg src ) 937{ 938 DUMP_RR( dst, src ); 939 emit_2ub(p, 0xF3, X86_TWOB); 940 emit_op_modrm( p, 0x10, 0x11, dst, src ); 941} 942 943void sse_movaps( struct x86_function *p, 944 struct x86_reg dst, 945 struct x86_reg src ) 946{ 947 DUMP_RR( dst, src ); 948 emit_1ub(p, X86_TWOB); 949 emit_op_modrm( p, 0x28, 0x29, dst, src ); 950} 951 952void sse_movups( struct x86_function *p, 953 struct x86_reg dst, 954 struct x86_reg src ) 955{ 956 DUMP_RR( dst, src ); 957 emit_1ub(p, X86_TWOB); 958 emit_op_modrm( p, 0x10, 0x11, dst, src ); 959} 960 961void sse_movhps( struct x86_function *p, 962 struct x86_reg dst, 963 struct x86_reg src ) 964{ 965 DUMP_RR( dst, src ); 966 assert(dst.mod != mod_REG || src.mod != mod_REG); 967 emit_1ub(p, X86_TWOB); 968 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ 969} 970 971void sse_movlps( struct x86_function *p, 972 struct x86_reg dst, 973 struct x86_reg src ) 974{ 975 DUMP_RR( dst, src ); 976 assert(dst.mod != mod_REG || src.mod != mod_REG); 977 emit_1ub(p, X86_TWOB); 978 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ 979} 980 981void sse_maxps( struct x86_function *p, 982 struct x86_reg dst, 983 struct x86_reg src ) 984{ 985 DUMP_RR( dst, src ); 986 emit_2ub(p, X86_TWOB, 0x5F); 987 emit_modrm( p, dst, src ); 988} 989 990void sse_maxss( struct x86_function *p, 991 struct x86_reg dst, 992 struct x86_reg src ) 993{ 994 DUMP_RR( dst, src ); 995 emit_3ub(p, 0xF3, X86_TWOB, 0x5F); 996 emit_modrm( p, dst, src ); 997} 998 999void sse_divss( struct x86_function *p, 1000 struct x86_reg dst, 1001 struct x86_reg src ) 1002{ 1003 DUMP_RR( dst, src ); 1004 emit_3ub(p, 0xF3, X86_TWOB, 0x5E); 1005 emit_modrm( p, dst, src ); 1006} 1007 1008void sse_minps( struct x86_function *p, 1009 struct x86_reg dst, 1010 struct x86_reg src ) 1011{ 1012 DUMP_RR( dst, src ); 1013 emit_2ub(p, X86_TWOB, 0x5D); 1014 emit_modrm( p, dst, src ); 1015} 1016 1017void sse_subps( struct x86_function *p, 1018 struct x86_reg dst, 1019 struct x86_reg src ) 1020{ 1021 DUMP_RR( dst, src ); 1022 emit_2ub(p, X86_TWOB, 0x5C); 1023 emit_modrm( p, dst, src ); 1024} 1025 1026void sse_mulps( struct x86_function *p, 1027 struct x86_reg dst, 1028 struct x86_reg src ) 1029{ 1030 DUMP_RR( dst, src ); 1031 emit_2ub(p, X86_TWOB, 0x59); 1032 emit_modrm( p, dst, src ); 1033} 1034 1035void sse_mulss( struct x86_function *p, 1036 struct x86_reg dst, 1037 struct x86_reg src ) 1038{ 1039 DUMP_RR( dst, src ); 1040 emit_3ub(p, 0xF3, X86_TWOB, 0x59); 1041 emit_modrm( p, dst, src ); 1042} 1043 1044void sse_addps( struct x86_function *p, 1045 struct x86_reg dst, 1046 struct x86_reg src ) 1047{ 1048 DUMP_RR( dst, src ); 1049 emit_2ub(p, X86_TWOB, 0x58); 1050 emit_modrm( p, dst, src ); 1051} 1052 1053void sse_addss( struct x86_function *p, 1054 struct x86_reg dst, 1055 struct x86_reg src ) 1056{ 1057 DUMP_RR( dst, src ); 1058 emit_3ub(p, 0xF3, X86_TWOB, 0x58); 1059 emit_modrm( p, dst, src ); 1060} 1061 1062void sse_andnps( struct x86_function *p, 1063 struct x86_reg dst, 1064 struct x86_reg src ) 1065{ 1066 DUMP_RR( dst, src ); 1067 emit_2ub(p, X86_TWOB, 0x55); 1068 emit_modrm( p, dst, src ); 1069} 1070 1071void sse_andps( struct x86_function *p, 1072 struct x86_reg dst, 1073 struct x86_reg src ) 1074{ 1075 DUMP_RR( dst, src ); 1076 emit_2ub(p, X86_TWOB, 0x54); 1077 emit_modrm( p, dst, src ); 1078} 1079 1080void sse_rsqrtps( struct x86_function *p, 1081 struct x86_reg dst, 1082 struct x86_reg src ) 1083{ 1084 DUMP_RR( dst, src ); 1085 emit_2ub(p, X86_TWOB, 0x52); 1086 emit_modrm( p, dst, src ); 1087} 1088 1089void sse_rsqrtss( struct x86_function *p, 1090 struct x86_reg dst, 1091 struct x86_reg src ) 1092{ 1093 DUMP_RR( dst, src ); 1094 emit_3ub(p, 0xF3, X86_TWOB, 0x52); 1095 emit_modrm( p, dst, src ); 1096 1097} 1098 1099void sse_movhlps( struct x86_function *p, 1100 struct x86_reg dst, 1101 struct x86_reg src ) 1102{ 1103 DUMP_RR( dst, src ); 1104 assert(dst.mod == mod_REG && src.mod == mod_REG); 1105 emit_2ub(p, X86_TWOB, 0x12); 1106 emit_modrm( p, dst, src ); 1107} 1108 1109void sse_movlhps( struct x86_function *p, 1110 struct x86_reg dst, 1111 struct x86_reg src ) 1112{ 1113 DUMP_RR( dst, src ); 1114 assert(dst.mod == mod_REG && src.mod == mod_REG); 1115 emit_2ub(p, X86_TWOB, 0x16); 1116 emit_modrm( p, dst, src ); 1117} 1118 1119void sse_orps( struct x86_function *p, 1120 struct x86_reg dst, 1121 struct x86_reg src ) 1122{ 1123 DUMP_RR( dst, src ); 1124 emit_2ub(p, X86_TWOB, 0x56); 1125 emit_modrm( p, dst, src ); 1126} 1127 1128void sse_xorps( struct x86_function *p, 1129 struct x86_reg dst, 1130 struct x86_reg src ) 1131{ 1132 DUMP_RR( dst, src ); 1133 emit_2ub(p, X86_TWOB, 0x57); 1134 emit_modrm( p, dst, src ); 1135} 1136 1137void sse_cvtps2pi( struct x86_function *p, 1138 struct x86_reg dst, 1139 struct x86_reg src ) 1140{ 1141 DUMP_RR( dst, src ); 1142 assert(dst.file == file_MMX && 1143 (src.file == file_XMM || src.mod != mod_REG)); 1144 1145 p->need_emms = 1; 1146 1147 emit_2ub(p, X86_TWOB, 0x2d); 1148 emit_modrm( p, dst, src ); 1149} 1150 1151void sse2_cvtdq2ps( struct x86_function *p, 1152 struct x86_reg dst, 1153 struct x86_reg src ) 1154{ 1155 DUMP_RR( dst, src ); 1156 emit_2ub(p, X86_TWOB, 0x5b); 1157 emit_modrm( p, dst, src ); 1158} 1159 1160 1161/* Shufps can also be used to implement a reduced swizzle when dest == 1162 * arg0. 1163 */ 1164void sse_shufps( struct x86_function *p, 1165 struct x86_reg dst, 1166 struct x86_reg src, 1167 unsigned char shuf) 1168{ 1169 DUMP_RRI( dst, src, shuf ); 1170 emit_2ub(p, X86_TWOB, 0xC6); 1171 emit_modrm(p, dst, src); 1172 emit_1ub(p, shuf); 1173} 1174 1175void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1176{ 1177 DUMP_RR( dst, src ); 1178 emit_2ub( p, X86_TWOB, 0x15 ); 1179 emit_modrm( p, dst, src ); 1180} 1181 1182void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1183{ 1184 DUMP_RR( dst, src ); 1185 emit_2ub( p, X86_TWOB, 0x14 ); 1186 emit_modrm( p, dst, src ); 1187} 1188 1189void sse_cmpps( struct x86_function *p, 1190 struct x86_reg dst, 1191 struct x86_reg src, 1192 enum sse_cc cc) 1193{ 1194 DUMP_RRI( dst, src, cc ); 1195 emit_2ub(p, X86_TWOB, 0xC2); 1196 emit_modrm(p, dst, src); 1197 emit_1ub(p, cc); 1198} 1199 1200void sse_pmovmskb( struct x86_function *p, 1201 struct x86_reg dst, 1202 struct x86_reg src) 1203{ 1204 DUMP_RR( dst, src ); 1205 emit_3ub(p, 0x66, X86_TWOB, 0xD7); 1206 emit_modrm(p, dst, src); 1207} 1208 1209void sse_movmskps( struct x86_function *p, 1210 struct x86_reg dst, 1211 struct x86_reg src) 1212{ 1213 DUMP_RR( dst, src ); 1214 emit_2ub(p, X86_TWOB, 0x50); 1215 emit_modrm(p, dst, src); 1216} 1217 1218/*********************************************************************** 1219 * SSE2 instructions 1220 */ 1221 1222void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1223{ 1224 DUMP_RR(dst, src); 1225 emit_2ub(p, 0x66, 0x0f); 1226 if(dst.mod == mod_REG && dst.file == file_REG32) 1227 { 1228 emit_1ub(p, 0x7e); 1229 emit_modrm(p, src, dst); 1230 } 1231 else 1232 { 1233 emit_op_modrm(p, 0x6e, 0x7e, dst, src); 1234 } 1235} 1236 1237void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1238{ 1239 DUMP_RR(dst, src); 1240 switch (dst.mod) { 1241 case mod_REG: 1242 emit_3ub(p, 0xf3, 0x0f, 0x7e); 1243 emit_modrm(p, dst, src); 1244 break; 1245 case mod_INDIRECT: 1246 case mod_DISP32: 1247 case mod_DISP8: 1248 assert(src.mod == mod_REG); 1249 emit_3ub(p, 0x66, 0x0f, 0xd6); 1250 emit_modrm(p, src, dst); 1251 break; 1252 default: 1253 assert(0); 1254 break; 1255 } 1256} 1257 1258void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1259{ 1260 DUMP_RR(dst, src); 1261 emit_2ub(p, 0xf3, 0x0f); 1262 emit_op_modrm(p, 0x6f, 0x7f, dst, src); 1263} 1264 1265void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1266{ 1267 DUMP_RR(dst, src); 1268 emit_2ub(p, 0x66, 0x0f); 1269 emit_op_modrm(p, 0x6f, 0x7f, dst, src); 1270} 1271 1272void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1273{ 1274 DUMP_RR(dst, src); 1275 emit_2ub(p, 0xf2, 0x0f); 1276 emit_op_modrm(p, 0x10, 0x11, dst, src); 1277} 1278 1279void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1280{ 1281 DUMP_RR(dst, src); 1282 emit_2ub(p, 0x66, 0x0f); 1283 emit_op_modrm(p, 0x10, 0x11, dst, src); 1284} 1285 1286void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1287{ 1288 DUMP_RR(dst, src); 1289 emit_2ub(p, 0x66, 0x0f); 1290 emit_op_modrm(p, 0x28, 0x29, dst, src); 1291} 1292 1293/** 1294 * Perform a reduced swizzle: 1295 */ 1296void sse2_pshufd( struct x86_function *p, 1297 struct x86_reg dst, 1298 struct x86_reg src, 1299 unsigned char shuf) 1300{ 1301 DUMP_RRI( dst, src, shuf ); 1302 emit_3ub(p, 0x66, X86_TWOB, 0x70); 1303 emit_modrm(p, dst, src); 1304 emit_1ub(p, shuf); 1305} 1306 1307void sse2_pshuflw( struct x86_function *p, 1308 struct x86_reg dst, 1309 struct x86_reg src, 1310 unsigned char shuf) 1311{ 1312 DUMP_RRI( dst, src, shuf ); 1313 emit_3ub(p, 0xf2, X86_TWOB, 0x70); 1314 emit_modrm(p, dst, src); 1315 emit_1ub(p, shuf); 1316} 1317 1318void sse2_pshufhw( struct x86_function *p, 1319 struct x86_reg dst, 1320 struct x86_reg src, 1321 unsigned char shuf) 1322{ 1323 DUMP_RRI( dst, src, shuf ); 1324 emit_3ub(p, 0xf3, X86_TWOB, 0x70); 1325 emit_modrm(p, dst, src); 1326 emit_1ub(p, shuf); 1327} 1328 1329void sse2_cvttps2dq( struct x86_function *p, 1330 struct x86_reg dst, 1331 struct x86_reg src ) 1332{ 1333 DUMP_RR( dst, src ); 1334 emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); 1335 emit_modrm( p, dst, src ); 1336} 1337 1338void sse2_cvtps2dq( struct x86_function *p, 1339 struct x86_reg dst, 1340 struct x86_reg src ) 1341{ 1342 DUMP_RR( dst, src ); 1343 emit_3ub(p, 0x66, X86_TWOB, 0x5B); 1344 emit_modrm( p, dst, src ); 1345} 1346 1347void sse2_cvtsd2ss( struct x86_function *p, 1348 struct x86_reg dst, 1349 struct x86_reg src ) 1350{ 1351 DUMP_RR( dst, src ); 1352 emit_3ub(p, 0xf2, 0x0f, 0x5a); 1353 emit_modrm( p, dst, src ); 1354} 1355 1356void sse2_cvtpd2ps( struct x86_function *p, 1357 struct x86_reg dst, 1358 struct x86_reg src ) 1359{ 1360 DUMP_RR( dst, src ); 1361 emit_3ub(p, 0x66, 0x0f, 0x5a); 1362 emit_modrm( p, dst, src ); 1363} 1364 1365void sse2_packssdw( struct x86_function *p, 1366 struct x86_reg dst, 1367 struct x86_reg src ) 1368{ 1369 DUMP_RR( dst, src ); 1370 emit_3ub(p, 0x66, X86_TWOB, 0x6B); 1371 emit_modrm( p, dst, src ); 1372} 1373 1374void sse2_packsswb( struct x86_function *p, 1375 struct x86_reg dst, 1376 struct x86_reg src ) 1377{ 1378 DUMP_RR( dst, src ); 1379 emit_3ub(p, 0x66, X86_TWOB, 0x63); 1380 emit_modrm( p, dst, src ); 1381} 1382 1383void sse2_packuswb( struct x86_function *p, 1384 struct x86_reg dst, 1385 struct x86_reg src ) 1386{ 1387 DUMP_RR( dst, src ); 1388 emit_3ub(p, 0x66, X86_TWOB, 0x67); 1389 emit_modrm( p, dst, src ); 1390} 1391 1392void sse2_punpcklbw( struct x86_function *p, 1393 struct x86_reg dst, 1394 struct x86_reg src ) 1395{ 1396 DUMP_RR( dst, src ); 1397 emit_3ub(p, 0x66, X86_TWOB, 0x60); 1398 emit_modrm( p, dst, src ); 1399} 1400 1401void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1402{ 1403 DUMP_RR( dst, src ); 1404 emit_3ub(p, 0x66, 0x0f, 0x61); 1405 emit_modrm( p, dst, src ); 1406} 1407 1408void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1409{ 1410 DUMP_RR( dst, src ); 1411 emit_3ub(p, 0x66, 0x0f, 0x62); 1412 emit_modrm( p, dst, src ); 1413} 1414 1415void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1416{ 1417 DUMP_RR( dst, src ); 1418 emit_3ub(p, 0x66, 0x0f, 0x6c); 1419 emit_modrm( p, dst, src ); 1420} 1421 1422void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1423{ 1424 DUMP_RI(dst, imm); 1425 emit_3ub(p, 0x66, 0x0f, 0x71); 1426 emit_modrm_noreg(p, 6, dst); 1427 emit_1ub(p, imm); 1428} 1429 1430void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1431{ 1432 DUMP_RI(dst, imm); 1433 emit_3ub(p, 0x66, 0x0f, 0x72); 1434 emit_modrm_noreg(p, 6, dst); 1435 emit_1ub(p, imm); 1436} 1437 1438void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1439{ 1440 DUMP_RI(dst, imm); 1441 emit_3ub(p, 0x66, 0x0f, 0x73); 1442 emit_modrm_noreg(p, 6, dst); 1443 emit_1ub(p, imm); 1444} 1445 1446void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1447{ 1448 DUMP_RI(dst, imm); 1449 emit_3ub(p, 0x66, 0x0f, 0x71); 1450 emit_modrm_noreg(p, 2, dst); 1451 emit_1ub(p, imm); 1452} 1453 1454void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1455{ 1456 DUMP_RI(dst, imm); 1457 emit_3ub(p, 0x66, 0x0f, 0x72); 1458 emit_modrm_noreg(p, 2, dst); 1459 emit_1ub(p, imm); 1460} 1461 1462void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1463{ 1464 DUMP_RI(dst, imm); 1465 emit_3ub(p, 0x66, 0x0f, 0x73); 1466 emit_modrm_noreg(p, 2, dst); 1467 emit_1ub(p, imm); 1468} 1469 1470void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1471{ 1472 DUMP_RI(dst, imm); 1473 emit_3ub(p, 0x66, 0x0f, 0x71); 1474 emit_modrm_noreg(p, 4, dst); 1475 emit_1ub(p, imm); 1476} 1477 1478void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm ) 1479{ 1480 DUMP_RI(dst, imm); 1481 emit_3ub(p, 0x66, 0x0f, 0x72); 1482 emit_modrm_noreg(p, 4, dst); 1483 emit_1ub(p, imm); 1484} 1485 1486void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1487{ 1488 DUMP_RR(dst, src); 1489 emit_3ub(p, 0x66, 0x0f, 0xeb); 1490 emit_modrm(p, dst, src); 1491} 1492 1493void sse2_rcpps( struct x86_function *p, 1494 struct x86_reg dst, 1495 struct x86_reg src ) 1496{ 1497 DUMP_RR( dst, src ); 1498 emit_2ub(p, X86_TWOB, 0x53); 1499 emit_modrm( p, dst, src ); 1500} 1501 1502void sse2_rcpss( struct x86_function *p, 1503 struct x86_reg dst, 1504 struct x86_reg src ) 1505{ 1506 DUMP_RR( dst, src ); 1507 emit_3ub(p, 0xF3, X86_TWOB, 0x53); 1508 emit_modrm( p, dst, src ); 1509} 1510 1511/*********************************************************************** 1512 * x87 instructions 1513 */ 1514static void note_x87_pop( struct x86_function *p ) 1515{ 1516 p->x87_stack--; 1517 assert(p->x87_stack >= 0); 1518} 1519 1520static void note_x87_push( struct x86_function *p ) 1521{ 1522 p->x87_stack++; 1523 assert(p->x87_stack <= 7); 1524} 1525 1526void x87_assert_stack_empty( struct x86_function *p ) 1527{ 1528 assert (p->x87_stack == 0); 1529} 1530 1531 1532void x87_fist( struct x86_function *p, struct x86_reg dst ) 1533{ 1534 DUMP_R( dst ); 1535 emit_1ub(p, 0xdb); 1536 emit_modrm_noreg(p, 2, dst); 1537} 1538 1539void x87_fistp( struct x86_function *p, struct x86_reg dst ) 1540{ 1541 DUMP_R( dst ); 1542 emit_1ub(p, 0xdb); 1543 emit_modrm_noreg(p, 3, dst); 1544 note_x87_pop(p); 1545} 1546 1547void x87_fild( struct x86_function *p, struct x86_reg arg ) 1548{ 1549 DUMP_R( arg ); 1550 emit_1ub(p, 0xdf); 1551 emit_modrm_noreg(p, 0, arg); 1552 note_x87_push(p); 1553} 1554 1555void x87_fldz( struct x86_function *p ) 1556{ 1557 DUMP(); 1558 emit_2ub(p, 0xd9, 0xee); 1559 note_x87_push(p); 1560} 1561 1562 1563void x87_fldcw( struct x86_function *p, struct x86_reg arg ) 1564{ 1565 DUMP_R( arg ); 1566 assert(arg.file == file_REG32); 1567 assert(arg.mod != mod_REG); 1568 emit_1ub(p, 0xd9); 1569 emit_modrm_noreg(p, 5, arg); 1570} 1571 1572void x87_fld1( struct x86_function *p ) 1573{ 1574 DUMP(); 1575 emit_2ub(p, 0xd9, 0xe8); 1576 note_x87_push(p); 1577} 1578 1579void x87_fldl2e( struct x86_function *p ) 1580{ 1581 DUMP(); 1582 emit_2ub(p, 0xd9, 0xea); 1583 note_x87_push(p); 1584} 1585 1586void x87_fldln2( struct x86_function *p ) 1587{ 1588 DUMP(); 1589 emit_2ub(p, 0xd9, 0xed); 1590 note_x87_push(p); 1591} 1592 1593void x87_fwait( struct x86_function *p ) 1594{ 1595 DUMP(); 1596 emit_1ub(p, 0x9b); 1597} 1598 1599void x87_fnclex( struct x86_function *p ) 1600{ 1601 DUMP(); 1602 emit_2ub(p, 0xdb, 0xe2); 1603} 1604 1605void x87_fclex( struct x86_function *p ) 1606{ 1607 x87_fwait(p); 1608 x87_fnclex(p); 1609} 1610 1611void x87_fcmovb( struct x86_function *p, struct x86_reg arg ) 1612{ 1613 DUMP_R( arg ); 1614 assert(arg.file == file_x87); 1615 emit_2ub(p, 0xda, 0xc0+arg.idx); 1616} 1617 1618void x87_fcmove( struct x86_function *p, struct x86_reg arg ) 1619{ 1620 DUMP_R( arg ); 1621 assert(arg.file == file_x87); 1622 emit_2ub(p, 0xda, 0xc8+arg.idx); 1623} 1624 1625void x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) 1626{ 1627 DUMP_R( arg ); 1628 assert(arg.file == file_x87); 1629 emit_2ub(p, 0xda, 0xd0+arg.idx); 1630} 1631 1632void x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) 1633{ 1634 DUMP_R( arg ); 1635 assert(arg.file == file_x87); 1636 emit_2ub(p, 0xdb, 0xc0+arg.idx); 1637} 1638 1639void x87_fcmovne( struct x86_function *p, struct x86_reg arg ) 1640{ 1641 DUMP_R( arg ); 1642 assert(arg.file == file_x87); 1643 emit_2ub(p, 0xdb, 0xc8+arg.idx); 1644} 1645 1646void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) 1647{ 1648 DUMP_R( arg ); 1649 assert(arg.file == file_x87); 1650 emit_2ub(p, 0xdb, 0xd0+arg.idx); 1651} 1652 1653 1654 1655static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, 1656 unsigned char dst0ub0, 1657 unsigned char dst0ub1, 1658 unsigned char arg0ub0, 1659 unsigned char arg0ub1, 1660 unsigned char argmem_noreg) 1661{ 1662 assert(dst.file == file_x87); 1663 1664 if (arg.file == file_x87) { 1665 if (dst.idx == 0) 1666 emit_2ub(p, dst0ub0, dst0ub1+arg.idx); 1667 else if (arg.idx == 0) 1668 emit_2ub(p, arg0ub0, arg0ub1+arg.idx); 1669 else 1670 assert(0); 1671 } 1672 else if (dst.idx == 0) { 1673 assert(arg.file == file_REG32); 1674 emit_1ub(p, 0xd8); 1675 emit_modrm_noreg(p, argmem_noreg, arg); 1676 } 1677 else 1678 assert(0); 1679} 1680 1681void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1682{ 1683 DUMP_RR( dst, src ); 1684 x87_arith_op(p, dst, src, 1685 0xd8, 0xc8, 1686 0xdc, 0xc8, 1687 4); 1688} 1689 1690void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1691{ 1692 DUMP_RR( dst, src ); 1693 x87_arith_op(p, dst, src, 1694 0xd8, 0xe0, 1695 0xdc, 0xe8, 1696 4); 1697} 1698 1699void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1700{ 1701 DUMP_RR( dst, src ); 1702 x87_arith_op(p, dst, src, 1703 0xd8, 0xe8, 1704 0xdc, 0xe0, 1705 5); 1706} 1707 1708void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1709{ 1710 DUMP_RR( dst, src ); 1711 x87_arith_op(p, dst, src, 1712 0xd8, 0xc0, 1713 0xdc, 0xc0, 1714 0); 1715} 1716 1717void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1718{ 1719 DUMP_RR( dst, src ); 1720 x87_arith_op(p, dst, src, 1721 0xd8, 0xf0, 1722 0xdc, 0xf8, 1723 6); 1724} 1725 1726void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) 1727{ 1728 DUMP_RR( dst, src ); 1729 x87_arith_op(p, dst, src, 1730 0xd8, 0xf8, 1731 0xdc, 0xf0, 1732 7); 1733} 1734 1735void x87_fmulp( struct x86_function *p, struct x86_reg dst ) 1736{ 1737 DUMP_R( dst ); 1738 assert(dst.file == file_x87); 1739 assert(dst.idx >= 1); 1740 emit_2ub(p, 0xde, 0xc8+dst.idx); 1741 note_x87_pop(p); 1742} 1743 1744void x87_fsubp( struct x86_function *p, struct x86_reg dst ) 1745{ 1746 DUMP_R( dst ); 1747 assert(dst.file == file_x87); 1748 assert(dst.idx >= 1); 1749 emit_2ub(p, 0xde, 0xe8+dst.idx); 1750 note_x87_pop(p); 1751} 1752 1753void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) 1754{ 1755 DUMP_R( dst ); 1756 assert(dst.file == file_x87); 1757 assert(dst.idx >= 1); 1758 emit_2ub(p, 0xde, 0xe0+dst.idx); 1759 note_x87_pop(p); 1760} 1761 1762void x87_faddp( struct x86_function *p, struct x86_reg dst ) 1763{ 1764 DUMP_R( dst ); 1765 assert(dst.file == file_x87); 1766 assert(dst.idx >= 1); 1767 emit_2ub(p, 0xde, 0xc0+dst.idx); 1768 note_x87_pop(p); 1769} 1770 1771void x87_fdivp( struct x86_function *p, struct x86_reg dst ) 1772{ 1773 DUMP_R( dst ); 1774 assert(dst.file == file_x87); 1775 assert(dst.idx >= 1); 1776 emit_2ub(p, 0xde, 0xf8+dst.idx); 1777 note_x87_pop(p); 1778} 1779 1780void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) 1781{ 1782 DUMP_R( dst ); 1783 assert(dst.file == file_x87); 1784 assert(dst.idx >= 1); 1785 emit_2ub(p, 0xde, 0xf0+dst.idx); 1786 note_x87_pop(p); 1787} 1788 1789void x87_ftst( struct x86_function *p ) 1790{ 1791 DUMP(); 1792 emit_2ub(p, 0xd9, 0xe4); 1793} 1794 1795void x87_fucom( struct x86_function *p, struct x86_reg arg ) 1796{ 1797 DUMP_R( arg ); 1798 assert(arg.file == file_x87); 1799 emit_2ub(p, 0xdd, 0xe0+arg.idx); 1800} 1801 1802void x87_fucomp( struct x86_function *p, struct x86_reg arg ) 1803{ 1804 DUMP_R( arg ); 1805 assert(arg.file == file_x87); 1806 emit_2ub(p, 0xdd, 0xe8+arg.idx); 1807 note_x87_pop(p); 1808} 1809 1810void x87_fucompp( struct x86_function *p ) 1811{ 1812 DUMP(); 1813 emit_2ub(p, 0xda, 0xe9); 1814 note_x87_pop(p); /* pop twice */ 1815 note_x87_pop(p); /* pop twice */ 1816} 1817 1818void x87_fxch( struct x86_function *p, struct x86_reg arg ) 1819{ 1820 DUMP_R( arg ); 1821 assert(arg.file == file_x87); 1822 emit_2ub(p, 0xd9, 0xc8+arg.idx); 1823} 1824 1825void x87_fabs( struct x86_function *p ) 1826{ 1827 DUMP(); 1828 emit_2ub(p, 0xd9, 0xe1); 1829} 1830 1831void x87_fchs( struct x86_function *p ) 1832{ 1833 DUMP(); 1834 emit_2ub(p, 0xd9, 0xe0); 1835} 1836 1837void x87_fcos( struct x86_function *p ) 1838{ 1839 DUMP(); 1840 emit_2ub(p, 0xd9, 0xff); 1841} 1842 1843 1844void x87_fprndint( struct x86_function *p ) 1845{ 1846 DUMP(); 1847 emit_2ub(p, 0xd9, 0xfc); 1848} 1849 1850void x87_fscale( struct x86_function *p ) 1851{ 1852 DUMP(); 1853 emit_2ub(p, 0xd9, 0xfd); 1854} 1855 1856void x87_fsin( struct x86_function *p ) 1857{ 1858 DUMP(); 1859 emit_2ub(p, 0xd9, 0xfe); 1860} 1861 1862void x87_fsincos( struct x86_function *p ) 1863{ 1864 DUMP(); 1865 emit_2ub(p, 0xd9, 0xfb); 1866} 1867 1868void x87_fsqrt( struct x86_function *p ) 1869{ 1870 DUMP(); 1871 emit_2ub(p, 0xd9, 0xfa); 1872} 1873 1874void x87_fxtract( struct x86_function *p ) 1875{ 1876 DUMP(); 1877 emit_2ub(p, 0xd9, 0xf4); 1878} 1879 1880/* st0 = (2^st0)-1 1881 * 1882 * Restrictions: -1.0 <= st0 <= 1.0 1883 */ 1884void x87_f2xm1( struct x86_function *p ) 1885{ 1886 DUMP(); 1887 emit_2ub(p, 0xd9, 0xf0); 1888} 1889 1890/* st1 = st1 * log2(st0); 1891 * pop_stack; 1892 */ 1893void x87_fyl2x( struct x86_function *p ) 1894{ 1895 DUMP(); 1896 emit_2ub(p, 0xd9, 0xf1); 1897 note_x87_pop(p); 1898} 1899 1900/* st1 = st1 * log2(st0 + 1.0); 1901 * pop_stack; 1902 * 1903 * A fast operation, with restrictions: -.29 < st0 < .29 1904 */ 1905void x87_fyl2xp1( struct x86_function *p ) 1906{ 1907 DUMP(); 1908 emit_2ub(p, 0xd9, 0xf9); 1909 note_x87_pop(p); 1910} 1911 1912 1913void x87_fld( struct x86_function *p, struct x86_reg arg ) 1914{ 1915 DUMP_R( arg ); 1916 if (arg.file == file_x87) 1917 emit_2ub(p, 0xd9, 0xc0 + arg.idx); 1918 else { 1919 emit_1ub(p, 0xd9); 1920 emit_modrm_noreg(p, 0, arg); 1921 } 1922 note_x87_push(p); 1923} 1924 1925void x87_fst( struct x86_function *p, struct x86_reg dst ) 1926{ 1927 DUMP_R( dst ); 1928 if (dst.file == file_x87) 1929 emit_2ub(p, 0xdd, 0xd0 + dst.idx); 1930 else { 1931 emit_1ub(p, 0xd9); 1932 emit_modrm_noreg(p, 2, dst); 1933 } 1934} 1935 1936void x87_fstp( struct x86_function *p, struct x86_reg dst ) 1937{ 1938 DUMP_R( dst ); 1939 if (dst.file == file_x87) 1940 emit_2ub(p, 0xdd, 0xd8 + dst.idx); 1941 else { 1942 emit_1ub(p, 0xd9); 1943 emit_modrm_noreg(p, 3, dst); 1944 } 1945 note_x87_pop(p); 1946} 1947 1948void x87_fpop( struct x86_function *p ) 1949{ 1950 x87_fstp( p, x86_make_reg( file_x87, 0 )); 1951} 1952 1953 1954void x87_fcom( struct x86_function *p, struct x86_reg dst ) 1955{ 1956 DUMP_R( dst ); 1957 if (dst.file == file_x87) 1958 emit_2ub(p, 0xd8, 0xd0 + dst.idx); 1959 else { 1960 emit_1ub(p, 0xd8); 1961 emit_modrm_noreg(p, 2, dst); 1962 } 1963} 1964 1965 1966void x87_fcomp( struct x86_function *p, struct x86_reg dst ) 1967{ 1968 DUMP_R( dst ); 1969 if (dst.file == file_x87) 1970 emit_2ub(p, 0xd8, 0xd8 + dst.idx); 1971 else { 1972 emit_1ub(p, 0xd8); 1973 emit_modrm_noreg(p, 3, dst); 1974 } 1975 note_x87_pop(p); 1976} 1977 1978void x87_fcomi( struct x86_function *p, struct x86_reg arg ) 1979{ 1980 DUMP_R( arg ); 1981 emit_2ub(p, 0xdb, 0xf0+arg.idx); 1982} 1983 1984void x87_fcomip( struct x86_function *p, struct x86_reg arg ) 1985{ 1986 DUMP_R( arg ); 1987 emit_2ub(p, 0xdb, 0xf0+arg.idx); 1988 note_x87_pop(p); 1989} 1990 1991 1992void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) 1993{ 1994 DUMP_R( dst ); 1995 assert(dst.file == file_REG32); 1996 1997 if (dst.idx == reg_AX && 1998 dst.mod == mod_REG) 1999 emit_2ub(p, 0xdf, 0xe0); 2000 else { 2001 emit_1ub(p, 0xdd); 2002 emit_modrm_noreg(p, 7, dst); 2003 } 2004} 2005 2006 2007void x87_fnstcw( struct x86_function *p, struct x86_reg dst ) 2008{ 2009 DUMP_R( dst ); 2010 assert(dst.file == file_REG32); 2011 2012 emit_1ub(p, 0x9b); /* WAIT -- needed? */ 2013 emit_1ub(p, 0xd9); 2014 emit_modrm_noreg(p, 7, dst); 2015} 2016 2017 2018 2019 2020/*********************************************************************** 2021 * MMX instructions 2022 */ 2023 2024void mmx_emms( struct x86_function *p ) 2025{ 2026 DUMP(); 2027 assert(p->need_emms); 2028 emit_2ub(p, 0x0f, 0x77); 2029 p->need_emms = 0; 2030} 2031 2032void mmx_packssdw( struct x86_function *p, 2033 struct x86_reg dst, 2034 struct x86_reg src ) 2035{ 2036 DUMP_RR( dst, src ); 2037 assert(dst.file == file_MMX && 2038 (src.file == file_MMX || src.mod != mod_REG)); 2039 2040 p->need_emms = 1; 2041 2042 emit_2ub(p, X86_TWOB, 0x6b); 2043 emit_modrm( p, dst, src ); 2044} 2045 2046void mmx_packuswb( struct x86_function *p, 2047 struct x86_reg dst, 2048 struct x86_reg src ) 2049{ 2050 DUMP_RR( dst, src ); 2051 assert(dst.file == file_MMX && 2052 (src.file == file_MMX || src.mod != mod_REG)); 2053 2054 p->need_emms = 1; 2055 2056 emit_2ub(p, X86_TWOB, 0x67); 2057 emit_modrm( p, dst, src ); 2058} 2059 2060void mmx_movd( struct x86_function *p, 2061 struct x86_reg dst, 2062 struct x86_reg src ) 2063{ 2064 DUMP_RR( dst, src ); 2065 p->need_emms = 1; 2066 emit_1ub(p, X86_TWOB); 2067 emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 2068} 2069 2070void mmx_movq( struct x86_function *p, 2071 struct x86_reg dst, 2072 struct x86_reg src ) 2073{ 2074 DUMP_RR( dst, src ); 2075 p->need_emms = 1; 2076 emit_1ub(p, X86_TWOB); 2077 emit_op_modrm( p, 0x6f, 0x7f, dst, src ); 2078} 2079 2080 2081/*********************************************************************** 2082 * Helper functions 2083 */ 2084 2085 2086void x86_cdecl_caller_push_regs( struct x86_function *p ) 2087{ 2088 x86_push(p, x86_make_reg(file_REG32, reg_AX)); 2089 x86_push(p, x86_make_reg(file_REG32, reg_CX)); 2090 x86_push(p, x86_make_reg(file_REG32, reg_DX)); 2091} 2092 2093void x86_cdecl_caller_pop_regs( struct x86_function *p ) 2094{ 2095 x86_pop(p, x86_make_reg(file_REG32, reg_DX)); 2096 x86_pop(p, x86_make_reg(file_REG32, reg_CX)); 2097 x86_pop(p, x86_make_reg(file_REG32, reg_AX)); 2098} 2099 2100 2101struct x86_reg x86_fn_arg( struct x86_function *p, 2102 unsigned arg ) 2103{ 2104 switch(x86_target(p)) 2105 { 2106 case X86_64_WIN64_ABI: 2107 /* Microsoft uses a different calling convention than the rest of the world */ 2108 switch(arg) 2109 { 2110 case 1: 2111 return x86_make_reg(file_REG32, reg_CX); 2112 case 2: 2113 return x86_make_reg(file_REG32, reg_DX); 2114 case 3: 2115 return x86_make_reg(file_REG32, reg_R8); 2116 case 4: 2117 return x86_make_reg(file_REG32, reg_R9); 2118 default: 2119 /* Win64 allocates stack slots as if it pushed the first 4 arguments too */ 2120 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2121 p->stack_offset + arg * 8); 2122 } 2123 case X86_64_STD_ABI: 2124 switch(arg) 2125 { 2126 case 1: 2127 return x86_make_reg(file_REG32, reg_DI); 2128 case 2: 2129 return x86_make_reg(file_REG32, reg_SI); 2130 case 3: 2131 return x86_make_reg(file_REG32, reg_DX); 2132 case 4: 2133 return x86_make_reg(file_REG32, reg_CX); 2134 case 5: 2135 return x86_make_reg(file_REG32, reg_R8); 2136 case 6: 2137 return x86_make_reg(file_REG32, reg_R9); 2138 default: 2139 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2140 p->stack_offset + (arg - 6) * 8); /* ??? */ 2141 } 2142 case X86_32: 2143 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 2144 p->stack_offset + arg * 4); /* ??? */ 2145 default: 2146 assert(0 && "Unexpected x86 target ABI in x86_fn_arg"); 2147 return x86_make_reg(file_REG32, reg_CX); /* not used / silence warning */ 2148 } 2149} 2150 2151static void x86_init_func_common( struct x86_function *p ) 2152{ 2153 util_cpu_detect(); 2154 p->caps = 0; 2155 if(util_cpu_caps.has_mmx) 2156 p->caps |= X86_MMX; 2157 if(util_cpu_caps.has_mmx2) 2158 p->caps |= X86_MMX2; 2159 if(util_cpu_caps.has_sse) 2160 p->caps |= X86_SSE; 2161 if(util_cpu_caps.has_sse2) 2162 p->caps |= X86_SSE2; 2163 if(util_cpu_caps.has_sse3) 2164 p->caps |= X86_SSE3; 2165 if(util_cpu_caps.has_sse4_1) 2166 p->caps |= X86_SSE4_1; 2167 p->csr = p->store; 2168 DUMP_START(); 2169} 2170 2171void x86_init_func( struct x86_function *p ) 2172{ 2173 p->size = 0; 2174 p->store = NULL; 2175 x86_init_func_common(p); 2176} 2177 2178void x86_init_func_size( struct x86_function *p, unsigned code_size ) 2179{ 2180 p->size = code_size; 2181 p->store = rtasm_exec_malloc(code_size); 2182 if (p->store == NULL) { 2183 p->store = p->error_overflow; 2184 } 2185 x86_init_func_common(p); 2186} 2187 2188void x86_release_func( struct x86_function *p ) 2189{ 2190 if (p->store && p->store != p->error_overflow) 2191 rtasm_exec_free(p->store); 2192 2193 p->store = NULL; 2194 p->csr = NULL; 2195 p->size = 0; 2196} 2197 2198 2199static inline x86_func 2200voidptr_to_x86_func(void *v) 2201{ 2202 union { 2203 void *v; 2204 x86_func f; 2205 } u; 2206 STATIC_ASSERT(sizeof(u.v) == sizeof(u.f)); 2207 u.v = v; 2208 return u.f; 2209} 2210 2211 2212x86_func x86_get_func( struct x86_function *p ) 2213{ 2214 DUMP_END(); 2215 if (DISASSEM && p->store) 2216 debug_printf("disassemble %p %p\n", p->store, p->csr); 2217 2218 if (p->store == p->error_overflow) 2219 return voidptr_to_x86_func(NULL); 2220 else 2221 return voidptr_to_x86_func(p->store); 2222} 2223 2224#else 2225 2226void x86sse_dummy( void ); 2227 2228void x86sse_dummy( void ) 2229{ 2230} 2231 2232#endif 2233