1#ifdef USE_X86_ASM 2#if defined(__i386__) || defined(__386__) 3 4#include "main/imports.h" 5#include "x86sse.h" 6 7#define DISASSEM 0 8#define X86_TWOB 0x0f 9 10#if 0 11static unsigned char *cptr( void (*label)() ) 12{ 13 return (unsigned char *)(unsigned long)label; 14} 15#endif 16 17 18static void do_realloc( struct x86_function *p ) 19{ 20 if (p->size == 0) { 21 p->size = 1024; 22 p->store = _mesa_exec_malloc(p->size); 23 p->csr = p->store; 24 } 25 else { 26 unsigned used = p->csr - p->store; 27 unsigned char *tmp = p->store; 28 p->size *= 2; 29 p->store = _mesa_exec_malloc(p->size); 30 memcpy(p->store, tmp, used); 31 p->csr = p->store + used; 32 _mesa_exec_free(tmp); 33 } 34} 35 36/* Emit bytes to the instruction stream: 37 */ 38static unsigned char *reserve( struct x86_function *p, int bytes ) 39{ 40 if (p->csr + bytes - p->store > p->size) 41 do_realloc(p); 42 43 { 44 unsigned char *csr = p->csr; 45 p->csr += bytes; 46 return csr; 47 } 48} 49 50 51 52static void emit_1b( struct x86_function *p, char b0 ) 53{ 54 char *csr = (char *)reserve(p, 1); 55 *csr = b0; 56} 57 58static void emit_1i( struct x86_function *p, int i0 ) 59{ 60 int *icsr = (int *)reserve(p, sizeof(i0)); 61 *icsr = i0; 62} 63 64static void emit_1ub( struct x86_function *p, unsigned char b0 ) 65{ 66 unsigned char *csr = reserve(p, 1); 67 *csr++ = b0; 68} 69 70static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) 71{ 72 unsigned char *csr = reserve(p, 2); 73 *csr++ = b0; 74 *csr++ = b1; 75} 76 77static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) 78{ 79 unsigned char *csr = reserve(p, 3); 80 *csr++ = b0; 81 *csr++ = b1; 82 *csr++ = b2; 83} 84 85 86/* Build a modRM byte + possible displacement. No treatment of SIB 87 * indexing. BZZT - no way to encode an absolute address. 88 */ 89static void emit_modrm( struct x86_function *p, 90 struct x86_reg reg, 91 struct x86_reg regmem ) 92{ 93 unsigned char val = 0; 94 95 assert(reg.mod == mod_REG); 96 97 val |= regmem.mod << 6; /* mod field */ 98 val |= reg.idx << 3; /* reg field */ 99 val |= regmem.idx; /* r/m field */ 100 101 emit_1ub(p, val); 102 103 /* Oh-oh we've stumbled into the SIB thing. 104 */ 105 if (regmem.file == file_REG32 && 106 regmem.idx == reg_SP) { 107 emit_1ub(p, 0x24); /* simplistic! */ 108 } 109 110 switch (regmem.mod) { 111 case mod_REG: 112 case mod_INDIRECT: 113 break; 114 case mod_DISP8: 115 emit_1b(p, regmem.disp); 116 break; 117 case mod_DISP32: 118 emit_1i(p, regmem.disp); 119 break; 120 default: 121 assert(0); 122 break; 123 } 124} 125 126 127static void emit_modrm_noreg( struct x86_function *p, 128 unsigned op, 129 struct x86_reg regmem ) 130{ 131 struct x86_reg dummy = x86_make_reg(file_REG32, op); 132 emit_modrm(p, dummy, regmem); 133} 134 135/* Many x86 instructions have two opcodes to cope with the situations 136 * where the destination is a register or memory reference 137 * respectively. This function selects the correct opcode based on 138 * the arguments presented. 139 */ 140static void emit_op_modrm( struct x86_function *p, 141 unsigned char op_dst_is_reg, 142 unsigned char op_dst_is_mem, 143 struct x86_reg dst, 144 struct x86_reg src ) 145{ 146 switch (dst.mod) { 147 case mod_REG: 148 emit_1ub(p, op_dst_is_reg); 149 emit_modrm(p, dst, src); 150 break; 151 case mod_INDIRECT: 152 case mod_DISP32: 153 case mod_DISP8: 154 assert(src.mod == mod_REG); 155 emit_1ub(p, op_dst_is_mem); 156 emit_modrm(p, src, dst); 157 break; 158 default: 159 assert(0); 160 break; 161 } 162} 163 164 165 166 167 168 169 170/* Create and manipulate registers and regmem values: 171 */ 172struct x86_reg x86_make_reg( enum x86_reg_file file, 173 enum x86_reg_name idx ) 174{ 175 struct x86_reg reg; 176 177 reg.file = file; 178 reg.idx = idx; 179 reg.mod = mod_REG; 180 reg.disp = 0; 181 182 return reg; 183} 184 185struct x86_reg x86_make_disp( struct x86_reg reg, 186 int disp ) 187{ 188 assert(reg.file == file_REG32); 189 190 if (reg.mod == mod_REG) 191 reg.disp = disp; 192 else 193 reg.disp += disp; 194 195 if (reg.disp == 0) 196 reg.mod = mod_INDIRECT; 197 else if (reg.disp <= 127 && reg.disp >= -128) 198 reg.mod = mod_DISP8; 199 else 200 reg.mod = mod_DISP32; 201 202 return reg; 203} 204 205struct x86_reg x86_deref( struct x86_reg reg ) 206{ 207 return x86_make_disp(reg, 0); 208} 209 210struct x86_reg x86_get_base_reg( struct x86_reg reg ) 211{ 212 return x86_make_reg( reg.file, reg.idx ); 213} 214 215unsigned char *x86_get_label( struct x86_function *p ) 216{ 217 return p->csr; 218} 219 220 221 222/*********************************************************************** 223 * x86 instructions 224 */ 225 226 227void x86_jcc( struct x86_function *p, 228 enum x86_cc cc, 229 unsigned char *label ) 230{ 231 int offset = label - (x86_get_label(p) + 2); 232 233 if (offset <= 127 && offset >= -128) { 234 emit_1ub(p, 0x70 + cc); 235 emit_1b(p, (char) offset); 236 } 237 else { 238 offset = label - (x86_get_label(p) + 6); 239 emit_2ub(p, 0x0f, 0x80 + cc); 240 emit_1i(p, offset); 241 } 242} 243 244/* Always use a 32bit offset for forward jumps: 245 */ 246unsigned char *x86_jcc_forward( struct x86_function *p, 247 enum x86_cc cc ) 248{ 249 emit_2ub(p, 0x0f, 0x80 + cc); 250 emit_1i(p, 0); 251 return x86_get_label(p); 252} 253 254unsigned char *x86_jmp_forward( struct x86_function *p) 255{ 256 emit_1ub(p, 0xe9); 257 emit_1i(p, 0); 258 return x86_get_label(p); 259} 260 261unsigned char *x86_call_forward( struct x86_function *p) 262{ 263 emit_1ub(p, 0xe8); 264 emit_1i(p, 0); 265 return x86_get_label(p); 266} 267 268/* Fixup offset from forward jump: 269 */ 270void x86_fixup_fwd_jump( struct x86_function *p, 271 unsigned char *fixup ) 272{ 273 *(int *)(fixup - 4) = x86_get_label(p) - fixup; 274} 275 276void x86_jmp( struct x86_function *p, unsigned char *label) 277{ 278 emit_1ub(p, 0xe9); 279 emit_1i(p, label - x86_get_label(p) - 4); 280} 281 282#if 0 283/* This doesn't work once we start reallocating & copying the 284 * generated code on buffer fills, because the call is relative to the 285 * current pc. 286 */ 287void x86_call( struct x86_function *p, void (*label)()) 288{ 289 emit_1ub(p, 0xe8); 290 emit_1i(p, cptr(label) - x86_get_label(p) - 4); 291} 292#else 293void x86_call( struct x86_function *p, struct x86_reg reg) 294{ 295 emit_1ub(p, 0xff); 296 emit_modrm_noreg(p, 2, reg); 297} 298#endif 299 300 301/* michal: 302 * Temporary. As I need immediate operands, and dont want to mess with the codegen, 303 * I load the immediate into general purpose register and use it. 304 */ 305void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) 306{ 307 assert(dst.mod == mod_REG); 308 emit_1ub(p, 0xb8 + dst.idx); 309 emit_1i(p, imm); 310} 311 312void x86_push( struct x86_function *p, 313 struct x86_reg reg ) 314{ 315 assert(reg.mod == mod_REG); 316 emit_1ub(p, 0x50 + reg.idx); 317 p->stack_offset += 4; 318} 319 320void x86_pop( struct x86_function *p, 321 struct x86_reg reg ) 322{ 323 assert(reg.mod == mod_REG); 324 emit_1ub(p, 0x58 + reg.idx); 325 p->stack_offset -= 4; 326} 327 328void x86_inc( struct x86_function *p, 329 struct x86_reg reg ) 330{ 331 assert(reg.mod == mod_REG); 332 emit_1ub(p, 0x40 + reg.idx); 333} 334 335void x86_dec( struct x86_function *p, 336 struct x86_reg reg ) 337{ 338 assert(reg.mod == mod_REG); 339 emit_1ub(p, 0x48 + reg.idx); 340} 341 342void x86_ret( struct x86_function *p ) 343{ 344 emit_1ub(p, 0xc3); 345} 346 347void x86_sahf( struct x86_function *p ) 348{ 349 emit_1ub(p, 0x9e); 350} 351 352void x86_mov( struct x86_function *p, 353 struct x86_reg dst, 354 struct x86_reg src ) 355{ 356 emit_op_modrm( p, 0x8b, 0x89, dst, src ); 357} 358 359void x86_xor( struct x86_function *p, 360 struct x86_reg dst, 361 struct x86_reg src ) 362{ 363 emit_op_modrm( p, 0x33, 0x31, dst, src ); 364} 365 366void x86_cmp( struct x86_function *p, 367 struct x86_reg dst, 368 struct x86_reg src ) 369{ 370 emit_op_modrm( p, 0x3b, 0x39, dst, src ); 371} 372 373void x86_lea( struct x86_function *p, 374 struct x86_reg dst, 375 struct x86_reg src ) 376{ 377 emit_1ub(p, 0x8d); 378 emit_modrm( p, dst, src ); 379} 380 381void x86_test( struct x86_function *p, 382 struct x86_reg dst, 383 struct x86_reg src ) 384{ 385 emit_1ub(p, 0x85); 386 emit_modrm( p, dst, src ); 387} 388 389void x86_add( struct x86_function *p, 390 struct x86_reg dst, 391 struct x86_reg src ) 392{ 393 emit_op_modrm(p, 0x03, 0x01, dst, src ); 394} 395 396void x86_mul( struct x86_function *p, 397 struct x86_reg src ) 398{ 399 assert (src.file == file_REG32 && src.mod == mod_REG); 400 emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); 401} 402 403void x86_sub( struct x86_function *p, 404 struct x86_reg dst, 405 struct x86_reg src ) 406{ 407 emit_op_modrm(p, 0x2b, 0x29, dst, src ); 408} 409 410void x86_or( struct x86_function *p, 411 struct x86_reg dst, 412 struct x86_reg src ) 413{ 414 emit_op_modrm( p, 0x0b, 0x09, dst, src ); 415} 416 417void x86_and( struct x86_function *p, 418 struct x86_reg dst, 419 struct x86_reg src ) 420{ 421 emit_op_modrm( p, 0x23, 0x21, dst, src ); 422} 423 424 425 426/*********************************************************************** 427 * SSE instructions 428 */ 429 430 431void sse_movss( struct x86_function *p, 432 struct x86_reg dst, 433 struct x86_reg src ) 434{ 435 emit_2ub(p, 0xF3, X86_TWOB); 436 emit_op_modrm( p, 0x10, 0x11, dst, src ); 437} 438 439void sse_movaps( struct x86_function *p, 440 struct x86_reg dst, 441 struct x86_reg src ) 442{ 443 emit_1ub(p, X86_TWOB); 444 emit_op_modrm( p, 0x28, 0x29, dst, src ); 445} 446 447void sse_movups( struct x86_function *p, 448 struct x86_reg dst, 449 struct x86_reg src ) 450{ 451 emit_1ub(p, X86_TWOB); 452 emit_op_modrm( p, 0x10, 0x11, dst, src ); 453} 454 455void sse_movhps( struct x86_function *p, 456 struct x86_reg dst, 457 struct x86_reg src ) 458{ 459 assert(dst.mod != mod_REG || src.mod != mod_REG); 460 emit_1ub(p, X86_TWOB); 461 emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ 462} 463 464void sse_movlps( struct x86_function *p, 465 struct x86_reg dst, 466 struct x86_reg src ) 467{ 468 assert(dst.mod != mod_REG || src.mod != mod_REG); 469 emit_1ub(p, X86_TWOB); 470 emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ 471} 472 473void sse_maxps( struct x86_function *p, 474 struct x86_reg dst, 475 struct x86_reg src ) 476{ 477 emit_2ub(p, X86_TWOB, 0x5F); 478 emit_modrm( p, dst, src ); 479} 480 481void sse_maxss( struct x86_function *p, 482 struct x86_reg dst, 483 struct x86_reg src ) 484{ 485 emit_3ub(p, 0xF3, X86_TWOB, 0x5F); 486 emit_modrm( p, dst, src ); 487} 488 489void sse_divss( struct x86_function *p, 490 struct x86_reg dst, 491 struct x86_reg src ) 492{ 493 emit_3ub(p, 0xF3, X86_TWOB, 0x5E); 494 emit_modrm( p, dst, src ); 495} 496 497void sse_minps( struct x86_function *p, 498 struct x86_reg dst, 499 struct x86_reg src ) 500{ 501 emit_2ub(p, X86_TWOB, 0x5D); 502 emit_modrm( p, dst, src ); 503} 504 505void sse_subps( struct x86_function *p, 506 struct x86_reg dst, 507 struct x86_reg src ) 508{ 509 emit_2ub(p, X86_TWOB, 0x5C); 510 emit_modrm( p, dst, src ); 511} 512 513void sse_mulps( struct x86_function *p, 514 struct x86_reg dst, 515 struct x86_reg src ) 516{ 517 emit_2ub(p, X86_TWOB, 0x59); 518 emit_modrm( p, dst, src ); 519} 520 521void sse_mulss( struct x86_function *p, 522 struct x86_reg dst, 523 struct x86_reg src ) 524{ 525 emit_3ub(p, 0xF3, X86_TWOB, 0x59); 526 emit_modrm( p, dst, src ); 527} 528 529void sse_addps( struct x86_function *p, 530 struct x86_reg dst, 531 struct x86_reg src ) 532{ 533 emit_2ub(p, X86_TWOB, 0x58); 534 emit_modrm( p, dst, src ); 535} 536 537void sse_addss( struct x86_function *p, 538 struct x86_reg dst, 539 struct x86_reg src ) 540{ 541 emit_3ub(p, 0xF3, X86_TWOB, 0x58); 542 emit_modrm( p, dst, src ); 543} 544 545void sse_andnps( struct x86_function *p, 546 struct x86_reg dst, 547 struct x86_reg src ) 548{ 549 emit_2ub(p, X86_TWOB, 0x55); 550 emit_modrm( p, dst, src ); 551} 552 553void sse_andps( struct x86_function *p, 554 struct x86_reg dst, 555 struct x86_reg src ) 556{ 557 emit_2ub(p, X86_TWOB, 0x54); 558 emit_modrm( p, dst, src ); 559} 560 561void sse_rsqrtps( struct x86_function *p, 562 struct x86_reg dst, 563 struct x86_reg src ) 564{ 565 emit_2ub(p, X86_TWOB, 0x52); 566 emit_modrm( p, dst, src ); 567} 568 569void sse_rsqrtss( struct x86_function *p, 570 struct x86_reg dst, 571 struct x86_reg src ) 572{ 573 emit_3ub(p, 0xF3, X86_TWOB, 0x52); 574 emit_modrm( p, dst, src ); 575 576} 577 578void sse_movhlps( struct x86_function *p, 579 struct x86_reg dst, 580 struct x86_reg src ) 581{ 582 assert(dst.mod == mod_REG && src.mod == mod_REG); 583 emit_2ub(p, X86_TWOB, 0x12); 584 emit_modrm( p, dst, src ); 585} 586 587void sse_movlhps( struct x86_function *p, 588 struct x86_reg dst, 589 struct x86_reg src ) 590{ 591 assert(dst.mod == mod_REG && src.mod == mod_REG); 592 emit_2ub(p, X86_TWOB, 0x16); 593 emit_modrm( p, dst, src ); 594} 595 596void sse_orps( struct x86_function *p, 597 struct x86_reg dst, 598 struct x86_reg src ) 599{ 600 emit_2ub(p, X86_TWOB, 0x56); 601 emit_modrm( p, dst, src ); 602} 603 604void sse_xorps( struct x86_function *p, 605 struct x86_reg dst, 606 struct x86_reg src ) 607{ 608 emit_2ub(p, X86_TWOB, 0x57); 609 emit_modrm( p, dst, src ); 610} 611 612void sse_cvtps2pi( struct x86_function *p, 613 struct x86_reg dst, 614 struct x86_reg src ) 615{ 616 assert(dst.file == file_MMX && 617 (src.file == file_XMM || src.mod != mod_REG)); 618 619 p->need_emms = 1; 620 621 emit_2ub(p, X86_TWOB, 0x2d); 622 emit_modrm( p, dst, src ); 623} 624 625 626/* Shufps can also be used to implement a reduced swizzle when dest == 627 * arg0. 628 */ 629void sse_shufps( struct x86_function *p, 630 struct x86_reg dest, 631 struct x86_reg arg0, 632 unsigned char shuf) 633{ 634 emit_2ub(p, X86_TWOB, 0xC6); 635 emit_modrm(p, dest, arg0); 636 emit_1ub(p, shuf); 637} 638 639void sse_cmpps( struct x86_function *p, 640 struct x86_reg dest, 641 struct x86_reg arg0, 642 unsigned char cc) 643{ 644 emit_2ub(p, X86_TWOB, 0xC2); 645 emit_modrm(p, dest, arg0); 646 emit_1ub(p, cc); 647} 648 649void sse_pmovmskb( struct x86_function *p, 650 struct x86_reg dest, 651 struct x86_reg src) 652{ 653 emit_3ub(p, 0x66, X86_TWOB, 0xD7); 654 emit_modrm(p, dest, src); 655} 656 657/*********************************************************************** 658 * SSE2 instructions 659 */ 660 661/** 662 * Perform a reduced swizzle: 663 */ 664void sse2_pshufd( struct x86_function *p, 665 struct x86_reg dest, 666 struct x86_reg arg0, 667 unsigned char shuf) 668{ 669 emit_3ub(p, 0x66, X86_TWOB, 0x70); 670 emit_modrm(p, dest, arg0); 671 emit_1ub(p, shuf); 672} 673 674void sse2_cvttps2dq( struct x86_function *p, 675 struct x86_reg dst, 676 struct x86_reg src ) 677{ 678 emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); 679 emit_modrm( p, dst, src ); 680} 681 682void sse2_cvtps2dq( struct x86_function *p, 683 struct x86_reg dst, 684 struct x86_reg src ) 685{ 686 emit_3ub(p, 0x66, X86_TWOB, 0x5B); 687 emit_modrm( p, dst, src ); 688} 689 690void sse2_packssdw( struct x86_function *p, 691 struct x86_reg dst, 692 struct x86_reg src ) 693{ 694 emit_3ub(p, 0x66, X86_TWOB, 0x6B); 695 emit_modrm( p, dst, src ); 696} 697 698void sse2_packsswb( struct x86_function *p, 699 struct x86_reg dst, 700 struct x86_reg src ) 701{ 702 emit_3ub(p, 0x66, X86_TWOB, 0x63); 703 emit_modrm( p, dst, src ); 704} 705 706void sse2_packuswb( struct x86_function *p, 707 struct x86_reg dst, 708 struct x86_reg src ) 709{ 710 emit_3ub(p, 0x66, X86_TWOB, 0x67); 711 emit_modrm( p, dst, src ); 712} 713 714void sse2_rcpps( struct x86_function *p, 715 struct x86_reg dst, 716 struct x86_reg src ) 717{ 718 emit_2ub(p, X86_TWOB, 0x53); 719 emit_modrm( p, dst, src ); 720} 721 722void sse2_rcpss( struct x86_function *p, 723 struct x86_reg dst, 724 struct x86_reg src ) 725{ 726 emit_3ub(p, 0xF3, X86_TWOB, 0x53); 727 emit_modrm( p, dst, src ); 728} 729 730void sse2_movd( struct x86_function *p, 731 struct x86_reg dst, 732 struct x86_reg src ) 733{ 734 emit_2ub(p, 0x66, X86_TWOB); 735 emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 736} 737 738 739 740 741/*********************************************************************** 742 * x87 instructions 743 */ 744void x87_fist( struct x86_function *p, struct x86_reg dst ) 745{ 746 emit_1ub(p, 0xdb); 747 emit_modrm_noreg(p, 2, dst); 748} 749 750void x87_fistp( struct x86_function *p, struct x86_reg dst ) 751{ 752 emit_1ub(p, 0xdb); 753 emit_modrm_noreg(p, 3, dst); 754} 755 756void x87_fild( struct x86_function *p, struct x86_reg arg ) 757{ 758 emit_1ub(p, 0xdf); 759 emit_modrm_noreg(p, 0, arg); 760} 761 762void x87_fldz( struct x86_function *p ) 763{ 764 emit_2ub(p, 0xd9, 0xee); 765} 766 767 768void x87_fldcw( struct x86_function *p, struct x86_reg arg ) 769{ 770 assert(arg.file == file_REG32); 771 assert(arg.mod != mod_REG); 772 emit_1ub(p, 0xd9); 773 emit_modrm_noreg(p, 5, arg); 774} 775 776void x87_fld1( struct x86_function *p ) 777{ 778 emit_2ub(p, 0xd9, 0xe8); 779} 780 781void x87_fldl2e( struct x86_function *p ) 782{ 783 emit_2ub(p, 0xd9, 0xea); 784} 785 786void x87_fldln2( struct x86_function *p ) 787{ 788 emit_2ub(p, 0xd9, 0xed); 789} 790 791void x87_fwait( struct x86_function *p ) 792{ 793 emit_1ub(p, 0x9b); 794} 795 796void x87_fnclex( struct x86_function *p ) 797{ 798 emit_2ub(p, 0xdb, 0xe2); 799} 800 801void x87_fclex( struct x86_function *p ) 802{ 803 x87_fwait(p); 804 x87_fnclex(p); 805} 806 807 808static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, 809 unsigned char dst0ub0, 810 unsigned char dst0ub1, 811 unsigned char arg0ub0, 812 unsigned char arg0ub1, 813 unsigned char argmem_noreg) 814{ 815 assert(dst.file == file_x87); 816 817 if (arg.file == file_x87) { 818 if (dst.idx == 0) 819 emit_2ub(p, dst0ub0, dst0ub1+arg.idx); 820 else if (arg.idx == 0) 821 emit_2ub(p, arg0ub0, arg0ub1+arg.idx); 822 else 823 assert(0); 824 } 825 else if (dst.idx == 0) { 826 assert(arg.file == file_REG32); 827 emit_1ub(p, 0xd8); 828 emit_modrm_noreg(p, argmem_noreg, arg); 829 } 830 else 831 assert(0); 832} 833 834void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 835{ 836 x87_arith_op(p, dst, arg, 837 0xd8, 0xc8, 838 0xdc, 0xc8, 839 4); 840} 841 842void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 843{ 844 x87_arith_op(p, dst, arg, 845 0xd8, 0xe0, 846 0xdc, 0xe8, 847 4); 848} 849 850void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 851{ 852 x87_arith_op(p, dst, arg, 853 0xd8, 0xe8, 854 0xdc, 0xe0, 855 5); 856} 857 858void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 859{ 860 x87_arith_op(p, dst, arg, 861 0xd8, 0xc0, 862 0xdc, 0xc0, 863 0); 864} 865 866void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 867{ 868 x87_arith_op(p, dst, arg, 869 0xd8, 0xf0, 870 0xdc, 0xf8, 871 6); 872} 873 874void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) 875{ 876 x87_arith_op(p, dst, arg, 877 0xd8, 0xf8, 878 0xdc, 0xf0, 879 7); 880} 881 882void x87_fmulp( struct x86_function *p, struct x86_reg dst ) 883{ 884 assert(dst.file == file_x87); 885 assert(dst.idx >= 1); 886 emit_2ub(p, 0xde, 0xc8+dst.idx); 887} 888 889void x87_fsubp( struct x86_function *p, struct x86_reg dst ) 890{ 891 assert(dst.file == file_x87); 892 assert(dst.idx >= 1); 893 emit_2ub(p, 0xde, 0xe8+dst.idx); 894} 895 896void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) 897{ 898 assert(dst.file == file_x87); 899 assert(dst.idx >= 1); 900 emit_2ub(p, 0xde, 0xe0+dst.idx); 901} 902 903void x87_faddp( struct x86_function *p, struct x86_reg dst ) 904{ 905 assert(dst.file == file_x87); 906 assert(dst.idx >= 1); 907 emit_2ub(p, 0xde, 0xc0+dst.idx); 908} 909 910void x87_fdivp( struct x86_function *p, struct x86_reg dst ) 911{ 912 assert(dst.file == file_x87); 913 assert(dst.idx >= 1); 914 emit_2ub(p, 0xde, 0xf8+dst.idx); 915} 916 917void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) 918{ 919 assert(dst.file == file_x87); 920 assert(dst.idx >= 1); 921 emit_2ub(p, 0xde, 0xf0+dst.idx); 922} 923 924void x87_fucom( struct x86_function *p, struct x86_reg arg ) 925{ 926 assert(arg.file == file_x87); 927 emit_2ub(p, 0xdd, 0xe0+arg.idx); 928} 929 930void x87_fucomp( struct x86_function *p, struct x86_reg arg ) 931{ 932 assert(arg.file == file_x87); 933 emit_2ub(p, 0xdd, 0xe8+arg.idx); 934} 935 936void x87_fucompp( struct x86_function *p ) 937{ 938 emit_2ub(p, 0xda, 0xe9); 939} 940 941void x87_fxch( struct x86_function *p, struct x86_reg arg ) 942{ 943 assert(arg.file == file_x87); 944 emit_2ub(p, 0xd9, 0xc8+arg.idx); 945} 946 947void x87_fabs( struct x86_function *p ) 948{ 949 emit_2ub(p, 0xd9, 0xe1); 950} 951 952void x87_fchs( struct x86_function *p ) 953{ 954 emit_2ub(p, 0xd9, 0xe0); 955} 956 957void x87_fcos( struct x86_function *p ) 958{ 959 emit_2ub(p, 0xd9, 0xff); 960} 961 962 963void x87_fprndint( struct x86_function *p ) 964{ 965 emit_2ub(p, 0xd9, 0xfc); 966} 967 968void x87_fscale( struct x86_function *p ) 969{ 970 emit_2ub(p, 0xd9, 0xfd); 971} 972 973void x87_fsin( struct x86_function *p ) 974{ 975 emit_2ub(p, 0xd9, 0xfe); 976} 977 978void x87_fsincos( struct x86_function *p ) 979{ 980 emit_2ub(p, 0xd9, 0xfb); 981} 982 983void x87_fsqrt( struct x86_function *p ) 984{ 985 emit_2ub(p, 0xd9, 0xfa); 986} 987 988void x87_fxtract( struct x86_function *p ) 989{ 990 emit_2ub(p, 0xd9, 0xf4); 991} 992 993/* st0 = (2^st0)-1 994 * 995 * Restrictions: -1.0 <= st0 <= 1.0 996 */ 997void x87_f2xm1( struct x86_function *p ) 998{ 999 emit_2ub(p, 0xd9, 0xf0); 1000} 1001 1002/* st1 = st1 * log2(st0); 1003 * pop_stack; 1004 */ 1005void x87_fyl2x( struct x86_function *p ) 1006{ 1007 emit_2ub(p, 0xd9, 0xf1); 1008} 1009 1010/* st1 = st1 * log2(st0 + 1.0); 1011 * pop_stack; 1012 * 1013 * A fast operation, with restrictions: -.29 < st0 < .29 1014 */ 1015void x87_fyl2xp1( struct x86_function *p ) 1016{ 1017 emit_2ub(p, 0xd9, 0xf9); 1018} 1019 1020 1021void x87_fld( struct x86_function *p, struct x86_reg arg ) 1022{ 1023 if (arg.file == file_x87) 1024 emit_2ub(p, 0xd9, 0xc0 + arg.idx); 1025 else { 1026 emit_1ub(p, 0xd9); 1027 emit_modrm_noreg(p, 0, arg); 1028 } 1029} 1030 1031void x87_fst( struct x86_function *p, struct x86_reg dst ) 1032{ 1033 if (dst.file == file_x87) 1034 emit_2ub(p, 0xdd, 0xd0 + dst.idx); 1035 else { 1036 emit_1ub(p, 0xd9); 1037 emit_modrm_noreg(p, 2, dst); 1038 } 1039} 1040 1041void x87_fstp( struct x86_function *p, struct x86_reg dst ) 1042{ 1043 if (dst.file == file_x87) 1044 emit_2ub(p, 0xdd, 0xd8 + dst.idx); 1045 else { 1046 emit_1ub(p, 0xd9); 1047 emit_modrm_noreg(p, 3, dst); 1048 } 1049} 1050 1051void x87_fcom( struct x86_function *p, struct x86_reg dst ) 1052{ 1053 if (dst.file == file_x87) 1054 emit_2ub(p, 0xd8, 0xd0 + dst.idx); 1055 else { 1056 emit_1ub(p, 0xd8); 1057 emit_modrm_noreg(p, 2, dst); 1058 } 1059} 1060 1061void x87_fcomp( struct x86_function *p, struct x86_reg dst ) 1062{ 1063 if (dst.file == file_x87) 1064 emit_2ub(p, 0xd8, 0xd8 + dst.idx); 1065 else { 1066 emit_1ub(p, 0xd8); 1067 emit_modrm_noreg(p, 3, dst); 1068 } 1069} 1070 1071 1072void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) 1073{ 1074 assert(dst.file == file_REG32); 1075 1076 if (dst.idx == reg_AX && 1077 dst.mod == mod_REG) 1078 emit_2ub(p, 0xdf, 0xe0); 1079 else { 1080 emit_1ub(p, 0xdd); 1081 emit_modrm_noreg(p, 7, dst); 1082 } 1083} 1084 1085 1086 1087 1088/*********************************************************************** 1089 * MMX instructions 1090 */ 1091 1092void mmx_emms( struct x86_function *p ) 1093{ 1094 assert(p->need_emms); 1095 emit_2ub(p, 0x0f, 0x77); 1096 p->need_emms = 0; 1097} 1098 1099void mmx_packssdw( struct x86_function *p, 1100 struct x86_reg dst, 1101 struct x86_reg src ) 1102{ 1103 assert(dst.file == file_MMX && 1104 (src.file == file_MMX || src.mod != mod_REG)); 1105 1106 p->need_emms = 1; 1107 1108 emit_2ub(p, X86_TWOB, 0x6b); 1109 emit_modrm( p, dst, src ); 1110} 1111 1112void mmx_packuswb( struct x86_function *p, 1113 struct x86_reg dst, 1114 struct x86_reg src ) 1115{ 1116 assert(dst.file == file_MMX && 1117 (src.file == file_MMX || src.mod != mod_REG)); 1118 1119 p->need_emms = 1; 1120 1121 emit_2ub(p, X86_TWOB, 0x67); 1122 emit_modrm( p, dst, src ); 1123} 1124 1125void mmx_movd( struct x86_function *p, 1126 struct x86_reg dst, 1127 struct x86_reg src ) 1128{ 1129 p->need_emms = 1; 1130 emit_1ub(p, X86_TWOB); 1131 emit_op_modrm( p, 0x6e, 0x7e, dst, src ); 1132} 1133 1134void mmx_movq( struct x86_function *p, 1135 struct x86_reg dst, 1136 struct x86_reg src ) 1137{ 1138 p->need_emms = 1; 1139 emit_1ub(p, X86_TWOB); 1140 emit_op_modrm( p, 0x6f, 0x7f, dst, src ); 1141} 1142 1143 1144/*********************************************************************** 1145 * Helper functions 1146 */ 1147 1148 1149/* Retreive a reference to one of the function arguments, taking into 1150 * account any push/pop activity: 1151 */ 1152struct x86_reg x86_fn_arg( struct x86_function *p, 1153 unsigned arg ) 1154{ 1155 return x86_make_disp(x86_make_reg(file_REG32, reg_SP), 1156 p->stack_offset + arg * 4); /* ??? */ 1157} 1158 1159 1160void x86_init_func( struct x86_function *p ) 1161{ 1162 p->size = 0; 1163 p->store = NULL; 1164 p->csr = p->store; 1165} 1166 1167int x86_init_func_size( struct x86_function *p, unsigned code_size ) 1168{ 1169 p->size = code_size; 1170 p->store = _mesa_exec_malloc(code_size); 1171 p->csr = p->store; 1172 return p->store != NULL; 1173} 1174 1175void x86_release_func( struct x86_function *p ) 1176{ 1177 _mesa_exec_free(p->store); 1178 p->store = NULL; 1179 p->csr = NULL; 1180 p->size = 0; 1181} 1182 1183 1184void (*x86_get_func( struct x86_function *p ))(void) 1185{ 1186 if (DISASSEM && p->store) 1187 printf("disassemble %p %p\n", p->store, p->csr); 1188 return (void (*)(void)) (unsigned long) p->store; 1189} 1190 1191#else 1192 1193void x86sse_dummy( void ) 1194{ 1195} 1196 1197#endif 1198 1199#else /* USE_X86_ASM */ 1200 1201int x86sse_c_dummy_var; /* silence warning */ 1202 1203#endif /* USE_X86_ASM */ 1204