1#ifdef USE_X86_ASM 2#if defined(__i386__) || defined(__386__) 3 4#include <stdio.h> 5 6#include "main/imports.h" 7#include "x86sse.h" 8 9#define DISASSEM 0 10#define X86_TWOB 0x0f 11 12#if 0 13static unsigned char *cptr( void (*label)() ) 14{ 15 return (unsigned char *)(unsigned long)label; 16} 17#endif 18 19 20static void do_realloc( struct x86_function *p ) 21{ 22 if (p->size == 0) { 23 p->size = 1024; 24 p->store = _mesa_exec_malloc(p->size); 25 p->csr = p->store; 26 } 27 else { 28 unsigned used = p->csr - p->store; 29 unsigned char *tmp = p->store; 30 p->size *= 2; 31 p->store = _mesa_exec_malloc(p->size); 32 memcpy(p->store, tmp, used); 33 p->csr = p->store + used; 34 _mesa_exec_free(tmp); 35 } 36} 37 38/* Emit bytes to the instruction stream: 39 */ 40static unsigned char *reserve( struct x86_function *p, int bytes ) 41{ 42 if (p->csr + bytes - p->store > p->size) 43 do_realloc(p); 44 45 { 46 unsigned char *csr = p->csr; 47 p->csr += bytes; 48 return csr; 49 } 50} 51 52 53 54static void emit_1b( struct x86_function *p, char b0 ) 55{ 56 char *csr = (char *)reserve(p, 1); 57 *csr = b0; 58} 59 60static void emit_1i( struct x86_function *p, int i0 ) 61{ 62 int *icsr = (int *)reserve(p, sizeof(i0)); 63 *icsr = i0; 64} 65 66static void emit_1ub( struct x86_function *p, unsigned char b0 ) 67{ 68 unsigned char *csr = reserve(p, 1); 69 *csr++ = b0; 70} 71 72static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) 73{ 74 unsigned char *csr = reserve(p, 2); 75 *csr++ = b0; 76 *csr++ = b1; 77} 78 79static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) 80{ 81 unsigned char *csr = reserve(p, 3); 82 *csr++ = b0; 83 *csr++ = b1; 84 *csr++ = b2; 85} 86 87 88/* Build a modRM byte + possible displacement. No treatment of SIB 89 * indexing. BZZT - no way to encode an absolute address. 90 */ 91static void emit_modrm( struct x86_function *p, 92 struct x86_reg reg, 93 struct x86_reg regmem ) 94{ 95 unsigned char val = 0; 96 97 assert(reg.mod == mod_REG); 98 99 val |= regmem.mod << 6; /* mod field */ 100 val |= reg.idx << 3; /* reg field */ 101 val |= regmem.idx; /* r/m field */ 102 103 emit_1ub(p, val); 104 105 /* Oh-oh we've stumbled into the SIB thing. 106 */ 107 if (regmem.file == file_REG32 && 108 regmem.idx == reg_SP) { 109 emit_1ub(p, 0x24); /* simplistic! */ 110 } 111 112 switch (regmem.mod) { 113 case mod_REG: 114 case mod_INDIRECT: 115 break; 116 case mod_DISP8: 117 emit_1b(p, regmem.disp); 118 break; 119 case mod_DISP32: 120 emit_1i(p, regmem.disp); 121 break; 122 default: 123 assert(0); 124 break; 125 } 126} 127 128 129static void emit_modrm_noreg( struct x86_function *p, 130 unsigned op, 131 struct x86_reg regmem ) 132{ 133 struct x86_reg dummy = x86_make_reg(file_REG32, op); 134 emit_modrm(p, dummy, regmem); 135} 136 137/* Many x86 instructions have two opcodes to cope with the situations 138 * where the destination is a register or memory reference 139 * respectively. This function selects the correct opcode based on 140 * the arguments presented. 141 */ 142static void emit_op_modrm( struct x86_function *p, 143 unsigned char op_dst_is_reg, 144 unsigned char op_dst_is_mem, 145 struct x86_reg dst, 146 struct x86_reg src ) 147{ 148 switch (dst.mod) { 149 case mod_REG: 150 emit_1ub(p, op_dst_is_reg); 151 emit_modrm(p, dst, src); 152 break; 153 case mod_INDIRECT: 154 case mod_DISP32: 155 case mod_DISP8: 156 assert(src.mod == mod_REG); 157 emit_1ub(p, op_dst_is_mem); 158 emit_modrm(p, src, dst); 159 break; 160 default: 161 assert(0); 162 break; 163 } 164} 165 166 167 168 169 170 171 172/* Create and manipulate registers and regmem values: 173 */ 174struct x86_reg x86_make_reg( enum x86_reg_file file, 175 enum x86_reg_name idx ) 176{ 177 struct x86_reg reg; 178 179 reg.file = file; 180 reg.idx = idx; 181 reg.mod = mod_REG; 182 reg.disp = 0; 183 184 return reg; 185} 186 187struct x86_reg x86_make_disp( struct x86_reg reg, 188 int disp ) 189{ 190 assert(reg.file == file_REG32); 191 192 if (reg.mod == mod_REG) 193 reg.disp = disp; 194 else 195 reg.disp += disp; 196 197 if (reg.disp == 0) 198 reg.mod = mod_INDIRECT; 199 else if (reg.disp <= 127 && reg.disp >= -128) 200 reg.mod = mod_DISP8; 201 else 202 reg.mod = mod_DISP32; 203 204 return reg; 205} 206 207struct x86_reg x86_deref( struct x86_reg reg ) 208{ 209 return x86_make_disp(reg, 0); 210} 211 212struct x86_reg x86_get_base_reg( struct x86_reg reg ) 213{ 214 return x86_make_reg( reg.file, reg.idx ); 215} 216 217unsigned char *x86_get_label( struct x86_function *p ) 218{ 219 return p->csr; 220} 221 222 223 224/*********************************************************************** 225 * x86 instructions 226 */ 227 228 229void x86_jcc( struct x86_function *p, 230 enum x86_cc cc, 231 unsigned char *label ) 232{ 233 int offset = label - (x86_get_label(p) + 2); 234 235 if (offset <= 127 && offset >= -128) { 236 emit_1ub(p, 0x70 + cc); 237 emit_1b(p, (char) offset); 238 } 239 else { 240 offset = label - (x86_get_label(p) + 6); 241 emit_2ub(p, 0x0f, 0x80 + cc); 242 emit_1i(p, offset); 243 } 244} 245 246/* Always use a 32bit offset for forward jumps: 247 */ 248unsigned char *x86_jcc_forward( struct x86_function *p, 249 enum x86_cc cc ) 250{ 251 emit_2ub(p, 0x0f, 0x80 + cc); 252 emit_1i(p, 0); 253 return x86_get_label(p); 254} 255 256unsigned char *x86_jmp_forward( struct x86_function *p) 257{ 258 emit_1ub(p, 0xe9); 259 emit_1i(p, 0); 260 return x86_get_label(p); 261} 262 263unsigned char *x86_call_forward( struct x86_function *p) 264{ 265 emit_1ub(p, 0xe8); 266 emit_1i(p, 0); 267 return x86_get_label(p); 268} 269 270/* Fixup offset from forward jump: 271 */ 272void x86_fixup_fwd_jump( struct x86_function *p, 273 unsigned char *fixup ) 274{ 275 *(int *)(fixup - 4) = x86_get_label(p) - fixup; 276} 277 278void x86_jmp( struct x86_function *p, unsigned char *label) 279{ 280 emit_1ub(p, 0xe9); 281 emit_1i(p, label - x86_get_label(p) - 4); 282} 283 284#if 0 285/* This doesn't work once we start reallocating & copying the 286 * generated code on buffer fills, because the call is relative to the 287 * current pc. 288 */ 289void x86_call( struct x86_function *p, void (*label)()) 290{ 291 emit_1ub(p, 0xe8); 292 emit_1i(p, cptr(label) - x86_get_label(p) - 4); 293} 294#else 295void x86_call( struct x86_function *p, struct x86_reg reg) 296{ 297 emit_1ub(p, 0xff); 298 emit_modrm_noreg(p, 2, reg); 299} 300#endif 301 302 303/* michal: 304 * Temporary. As I need immediate operands, and dont want to mess with the codegen, 305 * I load the immediate into general purpose register and use it. 306 */ 307void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) 308{ 309 assert(dst.mod == mod_REG); 310 emit_1ub(p, 0xb8 + dst.idx); 311 emit_1i(p, imm); 312} 313 314void x86_push( struct x86_function *p, 315 struct x86_reg reg ) 316{ 317 assert(reg.mod == mod_REG); 318 emit_1ub(p, 0x50 + reg.idx); 319 p->stack_offset += 4; 320} 321 322void x86_pop( struct x86_function *p, 323 struct x86_reg reg ) 324{ 325 assert(reg.mod == mod_REG); 326 emit_1ub(p, 0x58 + reg.idx); 327 p->stack_offset -= 4; 328} 329 330void x86_inc( struct x86_function *p, 331 struct x86_reg reg ) 332{ 333 assert(reg.mod == mod_REG); 334 emit_1ub(p, 0x40 + reg.idx); 335} 336 337void x86_dec( struct x86_function *p, 338 struct x86_reg reg ) 339{ 340 assert(reg.mod == mod_REG); 341 emit_1ub(p, 0x48 + reg.idx); 342} 343 344void x86_ret( struct x86_function *p ) 345{ 346 emit_1ub(p, 0xc3); 347} 348 349void x86_sahf( struct x86_function *p ) 350{ 351 emit_1ub(p, 0x9e); 352} 353 354void x86_mov( struct x86_function *p, 355 struct x86_reg dst, 356 struct x86_reg src ) 357{ 358 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 359} 360 361void x86_xor( struct x86_function *p, 362 struct x86_reg dst, 363 struct x86_reg src ) 364{ 365 emit_op_modrm( p, 0x33, 0x31, dst, src ); 366} 367 368void x86_cmp( struct x86_function *p, 369 struct x86_reg dst, 370 struct x86_reg src ) 371{ 372 emit_op_modrm( p, 0x3b, 0x39, dst, src ); 373} 374 375void x86_lea( struct x86_function *p, 376 struct x86_reg dst, 377 struct x86_reg src ) 378{ 379 emit_1ub(p, 0x8d); 380 emit_modrm( p, dst, src ); 381} 382 383void x86_test( struct x86_function *p, 384 struct x86_reg dst, 385 struct x86_reg src ) 386{ 387 emit_1ub(p, 0x85); 388 emit_modrm( p, dst, src ); 389} 390 391void x86_add( struct x86_function *p, 392 struct x86_reg dst, 393 struct x86_reg src ) 394{ 395 emit_op_modrm(p, 0x03, 0x01, dst, src ); 396} 397 398void x86_mul( struct x86_function *p, 399 struct x86_reg src ) 400{ 401 assert (src.file == file_REG32 && src.mod == mod_REG); 402 emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); 403} 404 405void x86_sub( struct x86_function *p, 406 struct x86_reg dst, 407 struct x86_reg src ) 408{ 409 emit_op_modrm(p, 0x2b, 0x29, dst, src ); 410} 411 412void x86_or( struct x86_function *p, 413 struct x86_reg dst, 414 struct x86_reg src ) 415{ 416 emit_op_modrm( p, 0x0b, 0x09, dst, src ); 417} 418 419void x86_and( struct x86_function *p, 420 struct x86_reg dst, 421 struct x86_reg src ) 422{ 423 emit_op_modrm( p, 0x23, 0x21, dst, src ); 424} 425 426 427 428/*********************************************************************** 429 * SSE instructions 430 */ 431 432 433void sse_movss( struct x86_function *p, 434 struct x86_reg dst, 435 struct x86_reg src ) 436{ 437 emit_2ub(p, 0xF3, X86_TWOB); 438 emit_op_modrm( p, 0x10, 0x11, dst, src ); 439} 440 441void sse_movaps( struct x86_function *p, 442 struct x86_reg dst, 443 struct x86_reg src ) 444{ 445 emit_1ub(p, X86_TWOB); 446 emit_op_modrm( p, 0x28, 0x29, dst, src ); 447} 448 449void sse_movups( struct x86_function *p, 450 struct x86_reg dst, 451 struct x86_reg src ) 452{ 453 emit_1ub(p, X86_TWOB); 454 emit_op_modrm( p, 0x10, 0x11, dst, src ); 455} 456 457void sse_movhps( struct x86_function *p, 458 struct x86_reg dst, 459 struct x86_reg src ) 460{ 461 assert(dst.mod != mod_REG || src.mod != mod_REG); 462 emit_1ub(p, X86_TWOB); 463 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ 464} 465 466void sse_movlps( struct x86_function *p, 467 struct x86_reg dst, 468 struct x86_reg src ) 469{ 470 assert(dst.mod != mod_REG || src.mod != mod_REG); 471 emit_1ub(p, X86_TWOB); 472 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ 473} 474 475void sse_maxps( struct x86_function *p, 476 struct x86_reg dst, 477 struct x86_reg src ) 478{ 479 emit_2ub(p, X86_TWOB, 0x5F); 480 emit_modrm( p, dst, src ); 481} 482 483void sse_maxss( struct x86_function *p, 484 struct x86_reg dst, 485 struct x86_reg src ) 486{ 487 emit_3ub(p, 0xF3, X86_TWOB, 0x5F); 488 emit_modrm( p, dst, src ); 489} 490 491void sse_divss( struct x86_function *p, 492 struct x86_reg dst, 493 struct x86_reg src ) 494{ 495 emit_3ub(p, 0xF3, X86_TWOB, 0x5E); 496 emit_modrm( p, dst, src ); 497} 498 499void sse_minps( struct x86_function *p, 500 struct x86_reg dst, 501 struct x86_reg src ) 502{ 503 emit_2ub(p, X86_TWOB, 0x5D); 504 emit_modrm( p, dst, src ); 505} 506 507void sse_subps( struct x86_function *p, 508 struct x86_reg dst, 509 struct x86_reg src ) 510{ 511 emit_2ub(p, X86_TWOB, 0x5C); 512 emit_modrm( p, dst, src ); 513} 514 515void sse_mulps( struct x86_function *p, 516 struct x86_reg dst, 517 struct x86_reg src ) 518{ 519 emit_2ub(p, X86_TWOB, 0x59); 520 emit_modrm( p, dst, src ); 521} 522 523void sse_mulss( struct x86_function *p, 524 struct x86_reg dst, 525 struct x86_reg src ) 526{ 527 emit_3ub(p, 0xF3, X86_TWOB, 0x59); 528 emit_modrm( p, dst, src ); 529} 530 531void sse_addps( struct x86_function *p, 532 struct x86_reg dst, 533 struct x86_reg src ) 534{ 535 emit_2ub(p, X86_TWOB, 0x58); 536 emit_modrm( p, dst, src ); 537} 538 539void sse_addss( struct x86_function *p, 540 struct x86_reg dst, 541 struct x86_reg src ) 542{ 543 emit_3ub(p, 0xF3, X86_TWOB, 0x58); 544 emit_modrm( p, dst, src ); 545} 546 547void sse_andnps( struct x86_function *p, 548 struct x86_reg dst, 549 struct x86_reg src ) 550{ 551 emit_2ub(p, X86_TWOB, 0x55); 552 emit_modrm( p, dst, src ); 553} 554 555void sse_andps( struct x86_function *p, 556 struct x86_reg dst, 557 struct x86_reg src ) 558{ 559 emit_2ub(p, X86_TWOB, 0x54); 560 emit_modrm( p, dst, src ); 561} 562 563void sse_rsqrtps( struct x86_function *p, 564 struct x86_reg dst, 565 struct x86_reg src ) 566{ 567 emit_2ub(p, X86_TWOB, 0x52); 568 emit_modrm( p, dst, src ); 569} 570 571void sse_rsqrtss( struct x86_function *p, 572 struct x86_reg dst, 573 struct x86_reg src ) 574{ 575 emit_3ub(p, 0xF3, X86_TWOB, 0x52); 576 emit_modrm( p, dst, src ); 577 578} 579 580void sse_movhlps( struct x86_function *p, 581 struct x86_reg dst, 582 struct x86_reg src ) 583{ 584 assert(dst.mod == mod_REG && src.mod == mod_REG); 585 emit_2ub(p, X86_TWOB, 0x12); 586 emit_modrm( p, dst, src ); 587} 588 589void sse_movlhps( struct x86_function *p, 590 struct x86_reg dst, 591 struct x86_reg src ) 592{ 593 assert(dst.mod == mod_REG && src.mod == mod_REG); 594 emit_2ub(p, X86_TWOB, 0x16); 595 emit_modrm( p, dst, src ); 596} 597 598void sse_orps( struct x86_function *p, 599 struct x86_reg dst, 600 struct x86_reg src ) 601{ 602 emit_2ub(p, X86_TWOB, 0x56); 603 emit_modrm( p, dst, src ); 604} 605 606void sse_xorps( struct x86_function *p, 607 struct x86_reg dst, 608 struct x86_reg src ) 609{ 610 emit_2ub(p, X86_TWOB, 0x57); 611 emit_modrm( p, dst, src ); 612} 613 614void sse_cvtps2pi( struct x86_function *p, 615 struct x86_reg dst, 616 struct x86_reg src ) 617{ 618 assert(dst.file == file_MMX && 619 (src.file == file_XMM || src.mod != mod_REG)); 620 621 p->need_emms = 1; 622 623 emit_2ub(p, X86_TWOB, 0x2d); 624 emit_modrm( p, dst, src ); 625} 626 627 628/* Shufps can also be used to implement a reduced swizzle when dest == 629 * arg0. 630 */ 631void sse_shufps( struct x86_function *p, 632 struct x86_reg dest, 633 struct x86_reg arg0, 634 unsigned char shuf) 635{ 636 emit_2ub(p, X86_TWOB, 0xC6); 637 emit_modrm(p, dest, arg0); 638 emit_1ub(p, shuf); 639} 640 641void sse_cmpps( struct x86_function *p, 642 struct x86_reg dest, 643 struct x86_reg arg0, 644 unsigned char cc) 645{ 646 emit_2ub(p, X86_TWOB, 0xC2); 647 emit_modrm(p, dest, arg0); 648 emit_1ub(p, cc); 649} 650 651void sse_pmovmskb( struct x86_function *p, 652 struct x86_reg dest, 653 struct x86_reg src) 654{ 655 emit_3ub(p, 0x66, X86_TWOB, 0xD7); 656 emit_modrm(p, dest, src); 657} 658 659/*********************************************************************** 660 * SSE2 instructions 661 */ 662 663/** 664 * Perform a reduced swizzle: 665 */ 666void sse2_pshufd( struct x86_function *p, 667 struct x86_reg dest, 668 struct x86_reg arg0, 669 unsigned char shuf) 670{ 671 emit_3ub(p, 0x66, X86_TWOB, 0x70); 672 emit_modrm(p, dest, arg0); 673 emit_1ub(p, shuf); 674} 675 676void sse2_cvttps2dq( struct x86_function *p, 677 struct x86_reg dst, 678 struct x86_reg src ) 679{ 680 emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); 681 emit_modrm( p, dst, src ); 682} 683 684void sse2_cvtps2dq( struct x86_function *p, 685 struct x86_reg dst, 686 struct x86_reg src ) 687{ 688 emit_3ub(p, 0x66, X86_TWOB, 0x5B); 689 emit_modrm( p, dst, src ); 690} 691 692void sse2_packssdw( struct x86_function *p, 693 struct x86_reg dst, 694 struct x86_reg src ) 695{ 696 emit_3ub(p, 0x66, X86_TWOB, 0x6B); 697 emit_modrm( p, dst, src ); 698} 699 700void sse2_packsswb( struct x86_function *p, 701 struct x86_reg dst, 702 struct x86_reg src ) 703{ 704 emit_3ub(p, 0x66, X86_TWOB, 0x63); 705 emit_modrm( p, dst, src ); 706} 707 708void sse2_packuswb( struct x86_function *p, 709 struct x86_reg dst, 710 struct x86_reg src ) 711{ 712 emit_3ub(p, 0x66, X86_TWOB, 0x67); 713 emit_modrm( p, dst, src ); 714} 715 716void sse2_rcpps( struct x86_function *p, 717 struct x86_reg dst, 718 struct x86_reg src ) 719{ 720 emit_2ub(p, X86_TWOB, 0x53); 721 emit_modrm( p, dst, src ); 722} 723 724void sse2_rcpss( struct x86_function *p, 725 struct x86_reg dst, 726 struct x86_reg src ) 727{ 728 emit_3ub(p, 0xF3, X86_TWOB, 0x53); 729 emit_modrm( p, dst, src ); 730} 731 732void sse2_movd( struct x86_function *p, 733 struct x86_reg dst, 734 struct x86_reg src ) 735{ 736 emit_2ub(p, 0x66, X86_TWOB); 737 emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 738} 739 740 741 742 743/*********************************************************************** 744 * x87 instructions 745 */ 746void x87_fist( struct x86_function *p, struct x86_reg dst ) 747{ 748 emit_1ub(p, 0xdb); 749 emit_modrm_noreg(p, 2, dst); 750} 751 752void x87_fistp( struct x86_function *p, struct x86_reg dst ) 753{ 754 emit_1ub(p, 0xdb); 755 emit_modrm_noreg(p, 3, dst); 756} 757 758void x87_fild( struct x86_function *p, struct x86_reg arg ) 759{ 760 emit_1ub(p, 0xdf); 761 emit_modrm_noreg(p, 0, arg); 762} 763 764void x87_fldz( struct x86_function *p ) 765{ 766 emit_2ub(p, 0xd9, 0xee); 767} 768 769 770void x87_fldcw( struct x86_function *p, struct x86_reg arg ) 771{ 772 assert(arg.file == file_REG32); 773 assert(arg.mod != mod_REG); 774 emit_1ub(p, 0xd9); 775 emit_modrm_noreg(p, 5, arg); 776} 777 778void x87_fld1( struct x86_function *p ) 779{ 780 emit_2ub(p, 0xd9, 0xe8); 781} 782 783void x87_fldl2e( struct x86_function *p ) 784{ 785 emit_2ub(p, 0xd9, 0xea); 786} 787 788void x87_fldln2( struct x86_function *p ) 789{ 790 emit_2ub(p, 0xd9, 0xed); 791} 792 793void x87_fwait( struct x86_function *p ) 794{ 795 emit_1ub(p, 0x9b); 796} 797 798void x87_fnclex( struct x86_function *p ) 799{ 800 emit_2ub(p, 0xdb, 0xe2); 801} 802 803void x87_fclex( struct x86_function *p ) 804{ 805 x87_fwait(p); 806 x87_fnclex(p); 807} 808 809 810static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, 811 unsigned char dst0ub0, 812 unsigned char dst0ub1, 813 unsigned char arg0ub0, 814 unsigned char arg0ub1, 815 unsigned char argmem_noreg) 816{ 817 assert(dst.file == file_x87); 818 819 if (arg.file == file_x87) { 820 if (dst.idx == 0) 821 emit_2ub(p, dst0ub0, dst0ub1+arg.idx); 822 else if (arg.idx == 0) 823 emit_2ub(p, arg0ub0, arg0ub1+arg.idx); 824 else 825 assert(0); 826 } 827 else if (dst.idx == 0) { 828 assert(arg.file == file_REG32); 829 emit_1ub(p, 0xd8); 830 emit_modrm_noreg(p, argmem_noreg, arg); 831 } 832 else 833 assert(0); 834} 835 836void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 837{ 838 x87_arith_op(p, dst, arg, 839 0xd8, 0xc8, 840 0xdc, 0xc8, 841 4); 842} 843 844void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 845{ 846 x87_arith_op(p, dst, arg, 847 0xd8, 0xe0, 848 0xdc, 0xe8, 849 4); 850} 851 852void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 853{ 854 x87_arith_op(p, dst, arg, 855 0xd8, 0xe8, 856 0xdc, 0xe0, 857 5); 858} 859 860void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 861{ 862 x87_arith_op(p, dst, arg, 863 0xd8, 0xc0, 864 0xdc, 0xc0, 865 0); 866} 867 868void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 869{ 870 x87_arith_op(p, dst, arg, 871 0xd8, 0xf0, 872 0xdc, 0xf8, 873 6); 874} 875 876void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 877{ 878 x87_arith_op(p, dst, arg, 879 0xd8, 0xf8, 880 0xdc, 0xf0, 881 7); 882} 883 884void x87_fmulp( struct x86_function *p, struct x86_reg dst ) 885{ 886 assert(dst.file == file_x87); 887 assert(dst.idx >= 1); 888 emit_2ub(p, 0xde, 0xc8+dst.idx); 889} 890 891void x87_fsubp( struct x86_function *p, struct x86_reg dst ) 892{ 893 assert(dst.file == file_x87); 894 assert(dst.idx >= 1); 895 emit_2ub(p, 0xde, 0xe8+dst.idx); 896} 897 898void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) 899{ 900 assert(dst.file == file_x87); 901 assert(dst.idx >= 1); 902 emit_2ub(p, 0xde, 0xe0+dst.idx); 903} 904 905void x87_faddp( struct x86_function *p, struct x86_reg dst ) 906{ 907 assert(dst.file == file_x87); 908 assert(dst.idx >= 1); 909 emit_2ub(p, 0xde, 0xc0+dst.idx); 910} 911 912void x87_fdivp( struct x86_function *p, struct x86_reg dst ) 913{ 914 assert(dst.file == file_x87); 915 assert(dst.idx >= 1); 916 emit_2ub(p, 0xde, 0xf8+dst.idx); 917} 918 919void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) 920{ 921 assert(dst.file == file_x87); 922 assert(dst.idx >= 1); 923 emit_2ub(p, 0xde, 0xf0+dst.idx); 924} 925 926void x87_fucom( struct x86_function *p, struct x86_reg arg ) 927{ 928 assert(arg.file == file_x87); 929 emit_2ub(p, 0xdd, 0xe0+arg.idx); 930} 931 932void x87_fucomp( struct x86_function *p, struct x86_reg arg ) 933{ 934 assert(arg.file == file_x87); 935 emit_2ub(p, 0xdd, 0xe8+arg.idx); 936} 937 938void x87_fucompp( struct x86_function *p ) 939{ 940 emit_2ub(p, 0xda, 0xe9); 941} 942 943void x87_fxch( struct x86_function *p, struct x86_reg arg ) 944{ 945 assert(arg.file == file_x87); 946 emit_2ub(p, 0xd9, 0xc8+arg.idx); 947} 948 949void x87_fabs( struct x86_function *p ) 950{ 951 emit_2ub(p, 0xd9, 0xe1); 952} 953 954void x87_fchs( struct x86_function *p ) 955{ 956 emit_2ub(p, 0xd9, 0xe0); 957} 958 959void x87_fcos( struct x86_function *p ) 960{ 961 emit_2ub(p, 0xd9, 0xff); 962} 963 964 965void x87_fprndint( struct x86_function *p ) 966{ 967 emit_2ub(p, 0xd9, 0xfc); 968} 969 970void x87_fscale( struct x86_function *p ) 971{ 972 emit_2ub(p, 0xd9, 0xfd); 973} 974 975void x87_fsin( struct x86_function *p ) 976{ 977 emit_2ub(p, 0xd9, 0xfe); 978} 979 980void x87_fsincos( struct x86_function *p ) 981{ 982 emit_2ub(p, 0xd9, 0xfb); 983} 984 985void x87_fsqrt( struct x86_function *p ) 986{ 987 emit_2ub(p, 0xd9, 0xfa); 988} 989 990void x87_fxtract( struct x86_function *p ) 991{ 992 emit_2ub(p, 0xd9, 0xf4); 993} 994 995/* st0 = (2^st0)-1 996 * 997 * Restrictions: -1.0 <= st0 <= 1.0 998 */ 999void x87_f2xm1( struct x86_function *p ) 1000{ 1001 emit_2ub(p, 0xd9, 0xf0); 1002} 1003 1004/* st1 = st1 * log2(st0); 1005 * pop_stack; 1006 */ 1007void x87_fyl2x( struct x86_function *p ) 1008{ 1009 emit_2ub(p, 0xd9, 0xf1); 1010} 1011 1012/* st1 = st1 * log2(st0 + 1.0); 1013 * pop_stack; 1014 * 1015 * A fast operation, with restrictions: -.29 < st0 < .29 1016 */ 1017void x87_fyl2xp1( struct x86_function *p ) 1018{ 1019 emit_2ub(p, 0xd9, 0xf9); 1020} 1021 1022 1023void x87_fld( struct x86_function *p, struct x86_reg arg ) 1024{ 1025 if (arg.file == file_x87) 1026 emit_2ub(p, 0xd9, 0xc0 + arg.idx); 1027 else { 1028 emit_1ub(p, 0xd9); 1029 emit_modrm_noreg(p, 0, arg); 1030 } 1031} 1032 1033void x87_fst( struct x86_function *p, struct x86_reg dst ) 1034{ 1035 if (dst.file == file_x87) 1036 emit_2ub(p, 0xdd, 0xd0 + dst.idx); 1037 else { 1038 emit_1ub(p, 0xd9); 1039 emit_modrm_noreg(p, 2, dst); 1040 } 1041} 1042 1043void x87_fstp( struct x86_function *p, struct x86_reg dst ) 1044{ 1045 if (dst.file == file_x87) 1046 emit_2ub(p, 0xdd, 0xd8 + dst.idx); 1047 else { 1048 emit_1ub(p, 0xd9); 1049 emit_modrm_noreg(p, 3, dst); 1050 } 1051} 1052 1053void x87_fcom( struct x86_function *p, struct x86_reg dst ) 1054{ 1055 if (dst.file == file_x87) 1056 emit_2ub(p, 0xd8, 0xd0 + dst.idx); 1057 else { 1058 emit_1ub(p, 0xd8); 1059 emit_modrm_noreg(p, 2, dst); 1060 } 1061} 1062 1063void x87_fcomp( struct x86_function *p, struct x86_reg dst ) 1064{ 1065 if (dst.file == file_x87) 1066 emit_2ub(p, 0xd8, 0xd8 + dst.idx); 1067 else { 1068 emit_1ub(p, 0xd8); 1069 emit_modrm_noreg(p, 3, dst); 1070 } 1071} 1072 1073 1074void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) 1075{ 1076 assert(dst.file == file_REG32); 1077 1078 if (dst.idx == reg_AX && 1079 dst.mod == mod_REG) 1080 emit_2ub(p, 0xdf, 0xe0); 1081 else { 1082 emit_1ub(p, 0xdd); 1083 emit_modrm_noreg(p, 7, dst); 1084 } 1085} 1086 1087 1088 1089 1090/*********************************************************************** 1091 * MMX instructions 1092 */ 1093 1094void mmx_emms( struct x86_function *p ) 1095{ 1096 assert(p->need_emms); 1097 emit_2ub(p, 0x0f, 0x77); 1098 p->need_emms = 0; 1099} 1100 1101void mmx_packssdw( struct x86_function *p, 1102 struct x86_reg dst, 1103 struct x86_reg src ) 1104{ 1105 assert(dst.file == file_MMX && 1106 (src.file == file_MMX || src.mod != mod_REG)); 1107 1108 p->need_emms = 1; 1109 1110 emit_2ub(p, X86_TWOB, 0x6b); 1111 emit_modrm( p, dst, src ); 1112} 1113 1114void mmx_packuswb( struct x86_function *p, 1115 struct x86_reg dst, 1116 struct x86_reg src ) 1117{ 1118 assert(dst.file == file_MMX && 1119 (src.file == file_MMX || src.mod != mod_REG)); 1120 1121 p->need_emms = 1; 1122 1123 emit_2ub(p, X86_TWOB, 0x67); 1124 emit_modrm( p, dst, src ); 1125} 1126 1127void mmx_movd( struct x86_function *p, 1128 struct x86_reg dst, 1129 struct x86_reg src ) 1130{ 1131 p->need_emms = 1; 1132 emit_1ub(p, X86_TWOB); 1133 emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 1134} 1135 1136void mmx_movq( struct x86_function *p, 1137 struct x86_reg dst, 1138 struct x86_reg src ) 1139{ 1140 p->need_emms = 1; 1141 emit_1ub(p, X86_TWOB); 1142 emit_op_modrm( p, 0x6f, 0x7f, dst, src ); 1143} 1144 1145 1146/*********************************************************************** 1147 * Helper functions 1148 */ 1149 1150 1151/* Retreive a reference to one of the function arguments, taking into 1152 * account any push/pop activity: 1153 */ 1154struct x86_reg x86_fn_arg( struct x86_function *p, 1155 unsigned arg ) 1156{ 1157 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 1158 p->stack_offset + arg * 4); /* ??? */ 1159} 1160 1161 1162void x86_init_func( struct x86_function *p ) 1163{ 1164 p->size = 0; 1165 p->store = NULL; 1166 p->csr = p->store; 1167} 1168 1169int x86_init_func_size( struct x86_function *p, unsigned code_size ) 1170{ 1171 p->size = code_size; 1172 p->store = _mesa_exec_malloc(code_size); 1173 p->csr = p->store; 1174 return p->store != NULL; 1175} 1176 1177void x86_release_func( struct x86_function *p ) 1178{ 1179 _mesa_exec_free(p->store); 1180 p->store = NULL; 1181 p->csr = NULL; 1182 p->size = 0; 1183} 1184 1185 1186void (*x86_get_func( struct x86_function *p ))(void) 1187{ 1188 if (DISASSEM && p->store) 1189 printf("disassemble %p %p\n", p->store, p->csr); 1190 return (void (*)(void)) (unsigned long) p->store; 1191} 1192 1193#else 1194 1195void x86sse_dummy( void ) 1196{ 1197} 1198 1199#endif 1200 1201#else /* USE_X86_ASM */ 1202 1203int x86sse_c_dummy_var; /* silence warning */ 1204 1205#endif /* USE_X86_ASM */ 1206