1#ifdef USE_X86_ASM
2#if defined(__i386__) || defined(__386__)
3
4#include <stdio.h>
5
6#include "main/imports.h"
7#include "x86sse.h"
8
9#define DISASSEM 0
10#define X86_TWOB 0x0f
11
12#if 0
13static unsigned char *cptr( void (*label)() )
14{
15   return (unsigned char *)(unsigned long)label;
16}
17#endif
18
19
20static void do_realloc( struct x86_function *p )
21{
22   if (p->size == 0) {
23      p->size = 1024;
24      p->store = _mesa_exec_malloc(p->size);
25      p->csr = p->store;
26   }
27   else {
28      unsigned used = p->csr - p->store;
29      unsigned char *tmp = p->store;
30      p->size *= 2;
31      p->store = _mesa_exec_malloc(p->size);
32      memcpy(p->store, tmp, used);
33      p->csr = p->store + used;
34      _mesa_exec_free(tmp);
35   }
36}
37
38/* Emit bytes to the instruction stream:
39 */
40static unsigned char *reserve( struct x86_function *p, int bytes )
41{
42   if (p->csr + bytes - p->store > p->size)
43      do_realloc(p);
44
45   {
46      unsigned char *csr = p->csr;
47      p->csr += bytes;
48      return csr;
49   }
50}
51
52
53
54static void emit_1b( struct x86_function *p, char b0 )
55{
56   char *csr = (char *)reserve(p, 1);
57   *csr = b0;
58}
59
60static void emit_1i( struct x86_function *p, int i0 )
61{
62   int *icsr = (int *)reserve(p, sizeof(i0));
63   *icsr = i0;
64}
65
66static void emit_1ub( struct x86_function *p, unsigned char b0 )
67{
68   unsigned char *csr = reserve(p, 1);
69   *csr++ = b0;
70}
71
72static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 )
73{
74   unsigned char *csr = reserve(p, 2);
75   *csr++ = b0;
76   *csr++ = b1;
77}
78
79static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 )
80{
81   unsigned char *csr = reserve(p, 3);
82   *csr++ = b0;
83   *csr++ = b1;
84   *csr++ = b2;
85}
86
87
88/* Build a modRM byte + possible displacement.  No treatment of SIB
89 * indexing.  BZZT - no way to encode an absolute address.
90 */
91static void emit_modrm( struct x86_function *p,
92			struct x86_reg reg,
93			struct x86_reg regmem )
94{
95   unsigned char val = 0;
96
97   assert(reg.mod == mod_REG);
98
99   val |= regmem.mod << 6;     	/* mod field */
100   val |= reg.idx << 3;		/* reg field */
101   val |= regmem.idx;		/* r/m field */
102
103   emit_1ub(p, val);
104
105   /* Oh-oh we've stumbled into the SIB thing.
106    */
107   if (regmem.file == file_REG32 &&
108       regmem.idx == reg_SP) {
109      emit_1ub(p, 0x24);		/* simplistic! */
110   }
111
112   switch (regmem.mod) {
113   case mod_REG:
114   case mod_INDIRECT:
115      break;
116   case mod_DISP8:
117      emit_1b(p, regmem.disp);
118      break;
119   case mod_DISP32:
120      emit_1i(p, regmem.disp);
121      break;
122   default:
123      assert(0);
124      break;
125   }
126}
127
128
129static void emit_modrm_noreg( struct x86_function *p,
130			      unsigned op,
131			      struct x86_reg regmem )
132{
133   struct x86_reg dummy = x86_make_reg(file_REG32, op);
134   emit_modrm(p, dummy, regmem);
135}
136
137/* Many x86 instructions have two opcodes to cope with the situations
138 * where the destination is a register or memory reference
139 * respectively.  This function selects the correct opcode based on
140 * the arguments presented.
141 */
142static void emit_op_modrm( struct x86_function *p,
143			   unsigned char op_dst_is_reg,
144			   unsigned char op_dst_is_mem,
145			   struct x86_reg dst,
146			   struct x86_reg src )
147{
148   switch (dst.mod) {
149   case mod_REG:
150      emit_1ub(p, op_dst_is_reg);
151      emit_modrm(p, dst, src);
152      break;
153   case mod_INDIRECT:
154   case mod_DISP32:
155   case mod_DISP8:
156      assert(src.mod == mod_REG);
157      emit_1ub(p, op_dst_is_mem);
158      emit_modrm(p, src, dst);
159      break;
160   default:
161      assert(0);
162      break;
163   }
164}
165
166
167
168
169
170
171
172/* Create and manipulate registers and regmem values:
173 */
174struct x86_reg x86_make_reg( enum x86_reg_file file,
175			     enum x86_reg_name idx )
176{
177   struct x86_reg reg;
178
179   reg.file = file;
180   reg.idx = idx;
181   reg.mod = mod_REG;
182   reg.disp = 0;
183
184   return reg;
185}
186
187struct x86_reg x86_make_disp( struct x86_reg reg,
188			      int disp )
189{
190   assert(reg.file == file_REG32);
191
192   if (reg.mod == mod_REG)
193      reg.disp = disp;
194   else
195      reg.disp += disp;
196
197   if (reg.disp == 0)
198      reg.mod = mod_INDIRECT;
199   else if (reg.disp <= 127 && reg.disp >= -128)
200      reg.mod = mod_DISP8;
201   else
202      reg.mod = mod_DISP32;
203
204   return reg;
205}
206
207struct x86_reg x86_deref( struct x86_reg reg )
208{
209   return x86_make_disp(reg, 0);
210}
211
212struct x86_reg x86_get_base_reg( struct x86_reg reg )
213{
214   return x86_make_reg( reg.file, reg.idx );
215}
216
217unsigned char *x86_get_label( struct x86_function *p )
218{
219   return p->csr;
220}
221
222
223
224/***********************************************************************
225 * x86 instructions
226 */
227
228
229void x86_jcc( struct x86_function *p,
230	      enum x86_cc cc,
231	      unsigned char *label )
232{
233   int offset = label - (x86_get_label(p) + 2);
234
235   if (offset <= 127 && offset >= -128) {
236      emit_1ub(p, 0x70 + cc);
237      emit_1b(p, (char) offset);
238   }
239   else {
240      offset = label - (x86_get_label(p) + 6);
241      emit_2ub(p, 0x0f, 0x80 + cc);
242      emit_1i(p, offset);
243   }
244}
245
246/* Always use a 32bit offset for forward jumps:
247 */
248unsigned char *x86_jcc_forward( struct x86_function *p,
249			  enum x86_cc cc )
250{
251   emit_2ub(p, 0x0f, 0x80 + cc);
252   emit_1i(p, 0);
253   return x86_get_label(p);
254}
255
256unsigned char *x86_jmp_forward( struct x86_function *p)
257{
258   emit_1ub(p, 0xe9);
259   emit_1i(p, 0);
260   return x86_get_label(p);
261}
262
263unsigned char *x86_call_forward( struct x86_function *p)
264{
265   emit_1ub(p, 0xe8);
266   emit_1i(p, 0);
267   return x86_get_label(p);
268}
269
270/* Fixup offset from forward jump:
271 */
272void x86_fixup_fwd_jump( struct x86_function *p,
273			 unsigned char *fixup )
274{
275   *(int *)(fixup - 4) = x86_get_label(p) - fixup;
276}
277
278void x86_jmp( struct x86_function *p, unsigned char *label)
279{
280   emit_1ub(p, 0xe9);
281   emit_1i(p, label - x86_get_label(p) - 4);
282}
283
284#if 0
285/* This doesn't work once we start reallocating & copying the
286 * generated code on buffer fills, because the call is relative to the
287 * current pc.
288 */
289void x86_call( struct x86_function *p, void (*label)())
290{
291   emit_1ub(p, 0xe8);
292   emit_1i(p, cptr(label) - x86_get_label(p) - 4);
293}
294#else
295void x86_call( struct x86_function *p, struct x86_reg reg)
296{
297   emit_1ub(p, 0xff);
298   emit_modrm_noreg(p, 2, reg);
299}
300#endif
301
302
303/* michal:
304 * Temporary. As I need immediate operands, and dont want to mess with the codegen,
305 * I load the immediate into general purpose register and use it.
306 */
307void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm )
308{
309   assert(dst.mod == mod_REG);
310   emit_1ub(p, 0xb8 + dst.idx);
311   emit_1i(p, imm);
312}
313
314void x86_push( struct x86_function *p,
315	       struct x86_reg reg )
316{
317   assert(reg.mod == mod_REG);
318   emit_1ub(p, 0x50 + reg.idx);
319   p->stack_offset += 4;
320}
321
322void x86_pop( struct x86_function *p,
323	      struct x86_reg reg )
324{
325   assert(reg.mod == mod_REG);
326   emit_1ub(p, 0x58 + reg.idx);
327   p->stack_offset -= 4;
328}
329
330void x86_inc( struct x86_function *p,
331	      struct x86_reg reg )
332{
333   assert(reg.mod == mod_REG);
334   emit_1ub(p, 0x40 + reg.idx);
335}
336
337void x86_dec( struct x86_function *p,
338	      struct x86_reg reg )
339{
340   assert(reg.mod == mod_REG);
341   emit_1ub(p, 0x48 + reg.idx);
342}
343
344void x86_ret( struct x86_function *p )
345{
346   emit_1ub(p, 0xc3);
347}
348
349void x86_sahf( struct x86_function *p )
350{
351   emit_1ub(p, 0x9e);
352}
353
354void x86_mov( struct x86_function *p,
355	      struct x86_reg dst,
356	      struct x86_reg src )
357{
358   emit_op_modrm( p, 0x8b, 0x89, dst, src );
359}
360
361void x86_xor( struct x86_function *p,
362	      struct x86_reg dst,
363	      struct x86_reg src )
364{
365   emit_op_modrm( p, 0x33, 0x31, dst, src );
366}
367
368void x86_cmp( struct x86_function *p,
369	      struct x86_reg dst,
370	      struct x86_reg src )
371{
372   emit_op_modrm( p, 0x3b, 0x39, dst, src );
373}
374
375void x86_lea( struct x86_function *p,
376	      struct x86_reg dst,
377	      struct x86_reg src )
378{
379   emit_1ub(p, 0x8d);
380   emit_modrm( p, dst, src );
381}
382
383void x86_test( struct x86_function *p,
384	       struct x86_reg dst,
385	       struct x86_reg src )
386{
387   emit_1ub(p, 0x85);
388   emit_modrm( p, dst, src );
389}
390
391void x86_add( struct x86_function *p,
392	       struct x86_reg dst,
393	       struct x86_reg src )
394{
395   emit_op_modrm(p, 0x03, 0x01, dst, src );
396}
397
398void x86_mul( struct x86_function *p,
399	       struct x86_reg src )
400{
401   assert (src.file == file_REG32 && src.mod == mod_REG);
402   emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src );
403}
404
405void x86_sub( struct x86_function *p,
406	       struct x86_reg dst,
407	       struct x86_reg src )
408{
409   emit_op_modrm(p, 0x2b, 0x29, dst, src );
410}
411
412void x86_or( struct x86_function *p,
413             struct x86_reg dst,
414             struct x86_reg src )
415{
416   emit_op_modrm( p, 0x0b, 0x09, dst, src );
417}
418
419void x86_and( struct x86_function *p,
420              struct x86_reg dst,
421              struct x86_reg src )
422{
423   emit_op_modrm( p, 0x23, 0x21, dst, src );
424}
425
426
427
428/***********************************************************************
429 * SSE instructions
430 */
431
432
433void sse_movss( struct x86_function *p,
434		struct x86_reg dst,
435		struct x86_reg src )
436{
437   emit_2ub(p, 0xF3, X86_TWOB);
438   emit_op_modrm( p, 0x10, 0x11, dst, src );
439}
440
441void sse_movaps( struct x86_function *p,
442		 struct x86_reg dst,
443		 struct x86_reg src )
444{
445   emit_1ub(p, X86_TWOB);
446   emit_op_modrm( p, 0x28, 0x29, dst, src );
447}
448
449void sse_movups( struct x86_function *p,
450		 struct x86_reg dst,
451		 struct x86_reg src )
452{
453   emit_1ub(p, X86_TWOB);
454   emit_op_modrm( p, 0x10, 0x11, dst, src );
455}
456
457void sse_movhps( struct x86_function *p,
458		 struct x86_reg dst,
459		 struct x86_reg src )
460{
461   assert(dst.mod != mod_REG || src.mod != mod_REG);
462   emit_1ub(p, X86_TWOB);
463   emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */
464}
465
466void sse_movlps( struct x86_function *p,
467		 struct x86_reg dst,
468		 struct x86_reg src )
469{
470   assert(dst.mod != mod_REG || src.mod != mod_REG);
471   emit_1ub(p, X86_TWOB);
472   emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */
473}
474
475void sse_maxps( struct x86_function *p,
476		struct x86_reg dst,
477		struct x86_reg src )
478{
479   emit_2ub(p, X86_TWOB, 0x5F);
480   emit_modrm( p, dst, src );
481}
482
483void sse_maxss( struct x86_function *p,
484		struct x86_reg dst,
485		struct x86_reg src )
486{
487   emit_3ub(p, 0xF3, X86_TWOB, 0x5F);
488   emit_modrm( p, dst, src );
489}
490
491void sse_divss( struct x86_function *p,
492		struct x86_reg dst,
493		struct x86_reg src )
494{
495   emit_3ub(p, 0xF3, X86_TWOB, 0x5E);
496   emit_modrm( p, dst, src );
497}
498
499void sse_minps( struct x86_function *p,
500		struct x86_reg dst,
501		struct x86_reg src )
502{
503   emit_2ub(p, X86_TWOB, 0x5D);
504   emit_modrm( p, dst, src );
505}
506
507void sse_subps( struct x86_function *p,
508		struct x86_reg dst,
509		struct x86_reg src )
510{
511   emit_2ub(p, X86_TWOB, 0x5C);
512   emit_modrm( p, dst, src );
513}
514
515void sse_mulps( struct x86_function *p,
516		struct x86_reg dst,
517		struct x86_reg src )
518{
519   emit_2ub(p, X86_TWOB, 0x59);
520   emit_modrm( p, dst, src );
521}
522
523void sse_mulss( struct x86_function *p,
524		struct x86_reg dst,
525		struct x86_reg src )
526{
527   emit_3ub(p, 0xF3, X86_TWOB, 0x59);
528   emit_modrm( p, dst, src );
529}
530
531void sse_addps( struct x86_function *p,
532		struct x86_reg dst,
533		struct x86_reg src )
534{
535   emit_2ub(p, X86_TWOB, 0x58);
536   emit_modrm( p, dst, src );
537}
538
539void sse_addss( struct x86_function *p,
540		struct x86_reg dst,
541		struct x86_reg src )
542{
543   emit_3ub(p, 0xF3, X86_TWOB, 0x58);
544   emit_modrm( p, dst, src );
545}
546
547void sse_andnps( struct x86_function *p,
548                 struct x86_reg dst,
549                 struct x86_reg src )
550{
551   emit_2ub(p, X86_TWOB, 0x55);
552   emit_modrm( p, dst, src );
553}
554
555void sse_andps( struct x86_function *p,
556		struct x86_reg dst,
557		struct x86_reg src )
558{
559   emit_2ub(p, X86_TWOB, 0x54);
560   emit_modrm( p, dst, src );
561}
562
563void sse_rsqrtps( struct x86_function *p,
564                  struct x86_reg dst,
565                  struct x86_reg src )
566{
567   emit_2ub(p, X86_TWOB, 0x52);
568   emit_modrm( p, dst, src );
569}
570
571void sse_rsqrtss( struct x86_function *p,
572		  struct x86_reg dst,
573		  struct x86_reg src )
574{
575   emit_3ub(p, 0xF3, X86_TWOB, 0x52);
576   emit_modrm( p, dst, src );
577
578}
579
580void sse_movhlps( struct x86_function *p,
581		  struct x86_reg dst,
582		  struct x86_reg src )
583{
584   assert(dst.mod == mod_REG && src.mod == mod_REG);
585   emit_2ub(p, X86_TWOB, 0x12);
586   emit_modrm( p, dst, src );
587}
588
589void sse_movlhps( struct x86_function *p,
590		  struct x86_reg dst,
591		  struct x86_reg src )
592{
593   assert(dst.mod == mod_REG && src.mod == mod_REG);
594   emit_2ub(p, X86_TWOB, 0x16);
595   emit_modrm( p, dst, src );
596}
597
598void sse_orps( struct x86_function *p,
599               struct x86_reg dst,
600               struct x86_reg src )
601{
602   emit_2ub(p, X86_TWOB, 0x56);
603   emit_modrm( p, dst, src );
604}
605
606void sse_xorps( struct x86_function *p,
607                struct x86_reg dst,
608                struct x86_reg src )
609{
610   emit_2ub(p, X86_TWOB, 0x57);
611   emit_modrm( p, dst, src );
612}
613
614void sse_cvtps2pi( struct x86_function *p,
615		   struct x86_reg dst,
616		   struct x86_reg src )
617{
618   assert(dst.file == file_MMX &&
619	  (src.file == file_XMM || src.mod != mod_REG));
620
621   p->need_emms = 1;
622
623   emit_2ub(p, X86_TWOB, 0x2d);
624   emit_modrm( p, dst, src );
625}
626
627
628/* Shufps can also be used to implement a reduced swizzle when dest ==
629 * arg0.
630 */
631void sse_shufps( struct x86_function *p,
632		 struct x86_reg dest,
633		 struct x86_reg arg0,
634		 unsigned char shuf)
635{
636   emit_2ub(p, X86_TWOB, 0xC6);
637   emit_modrm(p, dest, arg0);
638   emit_1ub(p, shuf);
639}
640
641void sse_cmpps( struct x86_function *p,
642		struct x86_reg dest,
643		struct x86_reg arg0,
644		unsigned char cc)
645{
646   emit_2ub(p, X86_TWOB, 0xC2);
647   emit_modrm(p, dest, arg0);
648   emit_1ub(p, cc);
649}
650
651void sse_pmovmskb( struct x86_function *p,
652                   struct x86_reg dest,
653                   struct x86_reg src)
654{
655    emit_3ub(p, 0x66, X86_TWOB, 0xD7);
656    emit_modrm(p, dest, src);
657}
658
659/***********************************************************************
660 * SSE2 instructions
661 */
662
663/**
664 * Perform a reduced swizzle:
665 */
666void sse2_pshufd( struct x86_function *p,
667		  struct x86_reg dest,
668		  struct x86_reg arg0,
669		  unsigned char shuf)
670{
671   emit_3ub(p, 0x66, X86_TWOB, 0x70);
672   emit_modrm(p, dest, arg0);
673   emit_1ub(p, shuf);
674}
675
676void sse2_cvttps2dq( struct x86_function *p,
677                     struct x86_reg dst,
678                     struct x86_reg src )
679{
680   emit_3ub( p, 0xF3, X86_TWOB, 0x5B );
681   emit_modrm( p, dst, src );
682}
683
684void sse2_cvtps2dq( struct x86_function *p,
685		    struct x86_reg dst,
686		    struct x86_reg src )
687{
688   emit_3ub(p, 0x66, X86_TWOB, 0x5B);
689   emit_modrm( p, dst, src );
690}
691
692void sse2_packssdw( struct x86_function *p,
693		    struct x86_reg dst,
694		    struct x86_reg src )
695{
696   emit_3ub(p, 0x66, X86_TWOB, 0x6B);
697   emit_modrm( p, dst, src );
698}
699
700void sse2_packsswb( struct x86_function *p,
701		    struct x86_reg dst,
702		    struct x86_reg src )
703{
704   emit_3ub(p, 0x66, X86_TWOB, 0x63);
705   emit_modrm( p, dst, src );
706}
707
708void sse2_packuswb( struct x86_function *p,
709		    struct x86_reg dst,
710		    struct x86_reg src )
711{
712   emit_3ub(p, 0x66, X86_TWOB, 0x67);
713   emit_modrm( p, dst, src );
714}
715
716void sse2_rcpps( struct x86_function *p,
717                 struct x86_reg dst,
718                 struct x86_reg src )
719{
720   emit_2ub(p, X86_TWOB, 0x53);
721   emit_modrm( p, dst, src );
722}
723
724void sse2_rcpss( struct x86_function *p,
725		struct x86_reg dst,
726		struct x86_reg src )
727{
728   emit_3ub(p, 0xF3, X86_TWOB, 0x53);
729   emit_modrm( p, dst, src );
730}
731
732void sse2_movd( struct x86_function *p,
733		struct x86_reg dst,
734		struct x86_reg src )
735{
736   emit_2ub(p, 0x66, X86_TWOB);
737   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
738}
739
740
741
742
743/***********************************************************************
744 * x87 instructions
745 */
746void x87_fist( struct x86_function *p, struct x86_reg dst )
747{
748   emit_1ub(p, 0xdb);
749   emit_modrm_noreg(p, 2, dst);
750}
751
752void x87_fistp( struct x86_function *p, struct x86_reg dst )
753{
754   emit_1ub(p, 0xdb);
755   emit_modrm_noreg(p, 3, dst);
756}
757
758void x87_fild( struct x86_function *p, struct x86_reg arg )
759{
760   emit_1ub(p, 0xdf);
761   emit_modrm_noreg(p, 0, arg);
762}
763
764void x87_fldz( struct x86_function *p )
765{
766   emit_2ub(p, 0xd9, 0xee);
767}
768
769
770void x87_fldcw( struct x86_function *p, struct x86_reg arg )
771{
772   assert(arg.file == file_REG32);
773   assert(arg.mod != mod_REG);
774   emit_1ub(p, 0xd9);
775   emit_modrm_noreg(p, 5, arg);
776}
777
778void x87_fld1( struct x86_function *p )
779{
780   emit_2ub(p, 0xd9, 0xe8);
781}
782
783void x87_fldl2e( struct x86_function *p )
784{
785   emit_2ub(p, 0xd9, 0xea);
786}
787
788void x87_fldln2( struct x86_function *p )
789{
790   emit_2ub(p, 0xd9, 0xed);
791}
792
793void x87_fwait( struct x86_function *p )
794{
795   emit_1ub(p, 0x9b);
796}
797
798void x87_fnclex( struct x86_function *p )
799{
800   emit_2ub(p, 0xdb, 0xe2);
801}
802
803void x87_fclex( struct x86_function *p )
804{
805   x87_fwait(p);
806   x87_fnclex(p);
807}
808
809
810static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg,
811			  unsigned char dst0ub0,
812			  unsigned char dst0ub1,
813			  unsigned char arg0ub0,
814			  unsigned char arg0ub1,
815			  unsigned char argmem_noreg)
816{
817   assert(dst.file == file_x87);
818
819   if (arg.file == file_x87) {
820      if (dst.idx == 0)
821	 emit_2ub(p, dst0ub0, dst0ub1+arg.idx);
822      else if (arg.idx == 0)
823	 emit_2ub(p, arg0ub0, arg0ub1+arg.idx);
824      else
825	 assert(0);
826   }
827   else if (dst.idx == 0) {
828      assert(arg.file == file_REG32);
829      emit_1ub(p, 0xd8);
830      emit_modrm_noreg(p, argmem_noreg, arg);
831   }
832   else
833      assert(0);
834}
835
836void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
837{
838   x87_arith_op(p, dst, arg,
839		0xd8, 0xc8,
840		0xdc, 0xc8,
841		4);
842}
843
844void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
845{
846   x87_arith_op(p, dst, arg,
847		0xd8, 0xe0,
848		0xdc, 0xe8,
849		4);
850}
851
852void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
853{
854   x87_arith_op(p, dst, arg,
855		0xd8, 0xe8,
856		0xdc, 0xe0,
857		5);
858}
859
860void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
861{
862   x87_arith_op(p, dst, arg,
863		0xd8, 0xc0,
864		0xdc, 0xc0,
865		0);
866}
867
868void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
869{
870   x87_arith_op(p, dst, arg,
871		0xd8, 0xf0,
872		0xdc, 0xf8,
873		6);
874}
875
876void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg )
877{
878   x87_arith_op(p, dst, arg,
879		0xd8, 0xf8,
880		0xdc, 0xf0,
881		7);
882}
883
884void x87_fmulp( struct x86_function *p, struct x86_reg dst )
885{
886   assert(dst.file == file_x87);
887   assert(dst.idx >= 1);
888   emit_2ub(p, 0xde, 0xc8+dst.idx);
889}
890
891void x87_fsubp( struct x86_function *p, struct x86_reg dst )
892{
893   assert(dst.file == file_x87);
894   assert(dst.idx >= 1);
895   emit_2ub(p, 0xde, 0xe8+dst.idx);
896}
897
898void x87_fsubrp( struct x86_function *p, struct x86_reg dst )
899{
900   assert(dst.file == file_x87);
901   assert(dst.idx >= 1);
902   emit_2ub(p, 0xde, 0xe0+dst.idx);
903}
904
905void x87_faddp( struct x86_function *p, struct x86_reg dst )
906{
907   assert(dst.file == file_x87);
908   assert(dst.idx >= 1);
909   emit_2ub(p, 0xde, 0xc0+dst.idx);
910}
911
912void x87_fdivp( struct x86_function *p, struct x86_reg dst )
913{
914   assert(dst.file == file_x87);
915   assert(dst.idx >= 1);
916   emit_2ub(p, 0xde, 0xf8+dst.idx);
917}
918
919void x87_fdivrp( struct x86_function *p, struct x86_reg dst )
920{
921   assert(dst.file == file_x87);
922   assert(dst.idx >= 1);
923   emit_2ub(p, 0xde, 0xf0+dst.idx);
924}
925
926void x87_fucom( struct x86_function *p, struct x86_reg arg )
927{
928   assert(arg.file == file_x87);
929   emit_2ub(p, 0xdd, 0xe0+arg.idx);
930}
931
932void x87_fucomp( struct x86_function *p, struct x86_reg arg )
933{
934   assert(arg.file == file_x87);
935   emit_2ub(p, 0xdd, 0xe8+arg.idx);
936}
937
938void x87_fucompp( struct x86_function *p )
939{
940   emit_2ub(p, 0xda, 0xe9);
941}
942
943void x87_fxch( struct x86_function *p, struct x86_reg arg )
944{
945   assert(arg.file == file_x87);
946   emit_2ub(p, 0xd9, 0xc8+arg.idx);
947}
948
949void x87_fabs( struct x86_function *p )
950{
951   emit_2ub(p, 0xd9, 0xe1);
952}
953
954void x87_fchs( struct x86_function *p )
955{
956   emit_2ub(p, 0xd9, 0xe0);
957}
958
959void x87_fcos( struct x86_function *p )
960{
961   emit_2ub(p, 0xd9, 0xff);
962}
963
964
965void x87_fprndint( struct x86_function *p )
966{
967   emit_2ub(p, 0xd9, 0xfc);
968}
969
970void x87_fscale( struct x86_function *p )
971{
972   emit_2ub(p, 0xd9, 0xfd);
973}
974
975void x87_fsin( struct x86_function *p )
976{
977   emit_2ub(p, 0xd9, 0xfe);
978}
979
980void x87_fsincos( struct x86_function *p )
981{
982   emit_2ub(p, 0xd9, 0xfb);
983}
984
985void x87_fsqrt( struct x86_function *p )
986{
987   emit_2ub(p, 0xd9, 0xfa);
988}
989
990void x87_fxtract( struct x86_function *p )
991{
992   emit_2ub(p, 0xd9, 0xf4);
993}
994
995/* st0 = (2^st0)-1
996 *
997 * Restrictions: -1.0 <= st0 <= 1.0
998 */
999void x87_f2xm1( struct x86_function *p )
1000{
1001   emit_2ub(p, 0xd9, 0xf0);
1002}
1003
1004/* st1 = st1 * log2(st0);
1005 * pop_stack;
1006 */
1007void x87_fyl2x( struct x86_function *p )
1008{
1009   emit_2ub(p, 0xd9, 0xf1);
1010}
1011
1012/* st1 = st1 * log2(st0 + 1.0);
1013 * pop_stack;
1014 *
1015 * A fast operation, with restrictions: -.29 < st0 < .29
1016 */
1017void x87_fyl2xp1( struct x86_function *p )
1018{
1019   emit_2ub(p, 0xd9, 0xf9);
1020}
1021
1022
1023void x87_fld( struct x86_function *p, struct x86_reg arg )
1024{
1025   if (arg.file == file_x87)
1026      emit_2ub(p, 0xd9, 0xc0 + arg.idx);
1027   else {
1028      emit_1ub(p, 0xd9);
1029      emit_modrm_noreg(p, 0, arg);
1030   }
1031}
1032
1033void x87_fst( struct x86_function *p, struct x86_reg dst )
1034{
1035   if (dst.file == file_x87)
1036      emit_2ub(p, 0xdd, 0xd0 + dst.idx);
1037   else {
1038      emit_1ub(p, 0xd9);
1039      emit_modrm_noreg(p, 2, dst);
1040   }
1041}
1042
1043void x87_fstp( struct x86_function *p, struct x86_reg dst )
1044{
1045   if (dst.file == file_x87)
1046      emit_2ub(p, 0xdd, 0xd8 + dst.idx);
1047   else {
1048      emit_1ub(p, 0xd9);
1049      emit_modrm_noreg(p, 3, dst);
1050   }
1051}
1052
1053void x87_fcom( struct x86_function *p, struct x86_reg dst )
1054{
1055   if (dst.file == file_x87)
1056      emit_2ub(p, 0xd8, 0xd0 + dst.idx);
1057   else {
1058      emit_1ub(p, 0xd8);
1059      emit_modrm_noreg(p, 2, dst);
1060   }
1061}
1062
1063void x87_fcomp( struct x86_function *p, struct x86_reg dst )
1064{
1065   if (dst.file == file_x87)
1066      emit_2ub(p, 0xd8, 0xd8 + dst.idx);
1067   else {
1068      emit_1ub(p, 0xd8);
1069      emit_modrm_noreg(p, 3, dst);
1070   }
1071}
1072
1073
1074void x87_fnstsw( struct x86_function *p, struct x86_reg dst )
1075{
1076   assert(dst.file == file_REG32);
1077
1078   if (dst.idx == reg_AX &&
1079       dst.mod == mod_REG)
1080      emit_2ub(p, 0xdf, 0xe0);
1081   else {
1082      emit_1ub(p, 0xdd);
1083      emit_modrm_noreg(p, 7, dst);
1084   }
1085}
1086
1087
1088
1089
1090/***********************************************************************
1091 * MMX instructions
1092 */
1093
1094void mmx_emms( struct x86_function *p )
1095{
1096   assert(p->need_emms);
1097   emit_2ub(p, 0x0f, 0x77);
1098   p->need_emms = 0;
1099}
1100
1101void mmx_packssdw( struct x86_function *p,
1102		   struct x86_reg dst,
1103		   struct x86_reg src )
1104{
1105   assert(dst.file == file_MMX &&
1106	  (src.file == file_MMX || src.mod != mod_REG));
1107
1108   p->need_emms = 1;
1109
1110   emit_2ub(p, X86_TWOB, 0x6b);
1111   emit_modrm( p, dst, src );
1112}
1113
1114void mmx_packuswb( struct x86_function *p,
1115		   struct x86_reg dst,
1116		   struct x86_reg src )
1117{
1118   assert(dst.file == file_MMX &&
1119	  (src.file == file_MMX || src.mod != mod_REG));
1120
1121   p->need_emms = 1;
1122
1123   emit_2ub(p, X86_TWOB, 0x67);
1124   emit_modrm( p, dst, src );
1125}
1126
1127void mmx_movd( struct x86_function *p,
1128	       struct x86_reg dst,
1129	       struct x86_reg src )
1130{
1131   p->need_emms = 1;
1132   emit_1ub(p, X86_TWOB);
1133   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
1134}
1135
1136void mmx_movq( struct x86_function *p,
1137	       struct x86_reg dst,
1138	       struct x86_reg src )
1139{
1140   p->need_emms = 1;
1141   emit_1ub(p, X86_TWOB);
1142   emit_op_modrm( p, 0x6f, 0x7f, dst, src );
1143}
1144
1145
1146/***********************************************************************
1147 * Helper functions
1148 */
1149
1150
1151/* Retreive a reference to one of the function arguments, taking into
1152 * account any push/pop activity:
1153 */
1154struct x86_reg x86_fn_arg( struct x86_function *p,
1155			   unsigned arg )
1156{
1157   return x86_make_disp(x86_make_reg(file_REG32, reg_SP),
1158			p->stack_offset + arg * 4);	/* ??? */
1159}
1160
1161
1162void x86_init_func( struct x86_function *p )
1163{
1164   p->size = 0;
1165   p->store = NULL;
1166   p->csr = p->store;
1167}
1168
1169int x86_init_func_size( struct x86_function *p, unsigned code_size )
1170{
1171   p->size = code_size;
1172   p->store = _mesa_exec_malloc(code_size);
1173   p->csr = p->store;
1174   return p->store != NULL;
1175}
1176
1177void x86_release_func( struct x86_function *p )
1178{
1179   _mesa_exec_free(p->store);
1180   p->store = NULL;
1181   p->csr = NULL;
1182   p->size = 0;
1183}
1184
1185
1186void (*x86_get_func( struct x86_function *p ))(void)
1187{
1188   if (DISASSEM && p->store)
1189      printf("disassemble %p %p\n", p->store, p->csr);
1190   return (void (*)(void)) (unsigned long) p->store;
1191}
1192
1193#else
1194
1195void x86sse_dummy( void )
1196{
1197}
1198
1199#endif
1200
1201#else  /* USE_X86_ASM */
1202
1203int x86sse_c_dummy_var; /* silence warning */
1204
1205#endif /* USE_X86_ASM */
1206