t_vertex_sse.c revision dd4c1dd0382277b080fb4981e027250e10658ae8
1b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/*
2b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Copyright 2003 Tungsten Graphics, inc.
3b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * All Rights Reserved.
4b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *
5b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Permission is hereby granted, free of charge, to any person obtaining a
6b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * copy of this software and associated documentation files (the "Software"),
7b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * to deal in the Software without restriction, including without limitation
8b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * on the rights to use, copy, modify, merge, publish, distribute, sub
9b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * license, and/or sell copies of the Software, and to permit persons to whom
10b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * the Software is furnished to do so, subject to the following conditions:
11b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *
12b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * The above copyright notice and this permission notice (including the next
13b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * paragraph) shall be included in all copies or substantial portions of the
14b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Software.
15b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *
16b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
19b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * USE OR OTHER DEALINGS IN THE SOFTWARE.
23b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *
24b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Authors:
25b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *    Keith Whitwell <keithw@tungstengraphics.com>
26b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
27b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
28b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "glheader.h"
29b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "context.h"
30b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "colormac.h"
31b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "t_context.h"
32b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "t_vertex.h"
33b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include "simple_list.h"
34b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
35b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include <unistd.h>
36b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include <sys/types.h>
37b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include <sys/stat.h>
38b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#include <fcntl.h>
39b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
40b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define X    0
41b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define Y    1
42b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define Z    2
43b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define W    3
44b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
45dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell#define DISASSEM 1
46dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
47b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstruct x86_reg {
48b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLuint file:3;
49b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLuint idx:3;
50b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLuint mod:2;		/* mod_REG if this is just a register */
51b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLint  disp:24;		/* only +/- 23bits of offset - should be enough... */
52b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell};
53b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
54b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstruct x86_program {
55b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLcontext *ctx;
56b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
57b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLubyte *store;
58b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLubyte *csr;
59b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
60b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLuint stack_offset;
61b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
62b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLboolean inputs_safe;
63b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLboolean outputs_safe;
64b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
65b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct x86_reg identity;
66b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct x86_reg vp0;
67b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct x86_reg vp1;
68b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell};
69b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
70b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
71b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define X86_TWOB 0x0f
72b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
73b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* There are more but these are all we'll use:
74b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
75b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellenum x86_reg_file {
76b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   file_REG32,
77b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   file_XMM
78b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell};
79b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
80b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Values for mod field of modr/m byte
81b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
82b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellenum x86_reg_mod {
83b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   mod_INDIRECT,
84b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   mod_DISP8,
85b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   mod_DISP32,
86b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   mod_REG
87b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell};
88b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
89b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellenum x86_reg_name {
90b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   reg_AX,
91b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   reg_CX,
92b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   reg_DX,
93b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   reg_BX,
94b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   reg_SP,
95b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   reg_BP,
96b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   reg_SI,
97b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   reg_DI
98b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell};
99b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
100b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
101b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellenum x86_cc {
102b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   cc_O,			/* overflow */
103b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   cc_NO,			/* not overflow */
104b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   cc_NAE,			/* not above or equal / carry */
105b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   cc_AE,			/* above or equal / not carry */
106b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   cc_E,			/* equal / zero */
107b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   cc_NE			/* not equal / not zero */
108b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell};
109b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
110b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define cc_Z  cc_E
111b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell#define cc_NZ cc_NE
112b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
113b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
114b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Create and manipulate registers and regmem values:
115b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
116b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic struct x86_reg make_reg( GLuint file,
117b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell				GLuint idx )
118b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
119b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct x86_reg reg;
120b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
121b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   reg.file = file;
122b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   reg.idx = idx;
123b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   reg.mod = mod_REG;
124b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   reg.disp = 0;
125b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
126b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   return reg;
127b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
128b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
129b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic struct x86_reg make_disp( struct x86_reg reg,
130b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell				 GLint disp )
131b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
132b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   assert(reg.file == file_REG32);
133b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
134b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   if (reg.mod == mod_REG)
135b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      reg.disp = disp;
136b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   else
137b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      reg.disp += disp;
138b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
139b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   if (reg.disp == 0)
140b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      reg.mod = mod_INDIRECT;
141b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   else if (reg.disp <= 127 && reg.disp >= -128)
142b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      reg.mod = mod_DISP8;
143b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   else
144b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      reg.mod = mod_DISP32;
145b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
146b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   return reg;
147b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
148b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
149dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic struct x86_reg deref( struct x86_reg reg )
150dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{
151dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   return make_disp(reg, 0);
152dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell}
153dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
154dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic struct x86_reg get_base_reg( struct x86_reg reg )
155dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{
156dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   return make_reg( reg.file, reg.idx );
157dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell}
158dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
159dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
160b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Retreive a reference to one of the function arguments, taking into
161b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * account any push/pop activity:
162b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
163b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic struct x86_reg make_fn_arg( struct x86_program *p,
164b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell				   GLuint arg )
165b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
166b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   return make_disp(make_reg(file_REG32, reg_SP),
167b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		    p->stack_offset + arg * 4);	/* ??? */
168b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
169b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
170b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
171b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic struct x86_reg get_identity( struct x86_program *p )
172b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
173b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   return p->identity;
174b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
175b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
176b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic struct x86_reg get_sse_temp( struct x86_program *p )
177b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
178b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   return make_reg(file_XMM, 7); /* hardwired */
179b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
180b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
181b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void release_temp( struct x86_program *p,
182b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			  struct x86_reg reg )
183b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
184b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   assert(reg.file == file_XMM &&
185b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	  reg.idx == 7);
186b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
187b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
188b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Emit bytes to the instruction stream:
189b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
190b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_1b( struct x86_program *p, GLbyte b0 )
191b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
192b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   *(GLbyte *)(p->csr++) = b0;
193b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
194b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
195dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_1i( struct x86_program *p, GLint i0 )
196dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{
197dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   *(GLint *)(p->csr) = i0;
198dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   p->csr += 4;
199dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell}
200dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
201dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void disassem( struct x86_program *p, const char *fn )
202dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{
203dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell#if DISASSEM
204dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   static const char *last_fn;
205dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   if (fn && fn != last_fn) {
206dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell      _mesa_printf("0x%x: %s\n", p->csr, fn);
207dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell      last_fn = fn;
208dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   }
209dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell#endif
210dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell}
211dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
212dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_1ub_fn( struct x86_program *p, GLubyte b0, const char *fn )
213b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
214dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   disassem(p, fn);
215b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   *(p->csr++) = b0;
216b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
217b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
218dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_2ub_fn( struct x86_program *p, GLubyte b0, GLubyte b1, const char *fn )
219b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
220dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   disassem(p, fn);
221b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   *(p->csr++) = b0;
222b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   *(p->csr++) = b1;
223b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
224b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
225dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_3ub_fn( struct x86_program *p, GLubyte b0, GLubyte b1, GLubyte b2, const char *fn )
226b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
227dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   disassem(p, fn);
228b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   *(p->csr++) = b0;
229b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   *(p->csr++) = b1;
230b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   *(p->csr++) = b2;
231b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
232b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
233dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell#define emit_1ub(p, b0)         emit_1ub_fn(p, b0, __FUNCTION__)
234dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell#define emit_2ub(p, b0, b1)     emit_2ub_fn(p, b0, b1, __FUNCTION__)
235dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell#define emit_3ub(p, b0, b1, b2) emit_3ub_fn(p, b0, b1, b2, __FUNCTION__)
236b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
237b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
238b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Labels, jumps and fixup:
239b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
240b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic GLubyte *get_label( struct x86_program *p )
241b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
242b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   return p->csr;
243b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
244b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
245b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_jcc( struct x86_program *p,
246b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		      GLuint cc,
247b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		      GLubyte *label )
248b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
249b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLint offset = label - (get_label(p) + 2);
250dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
251b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   if (offset <= 127 && offset >= -128) {
252b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_1ub(p, 0x70 + cc);
253b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_1b(p, (GLbyte) offset);
254b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   }
255b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   else {
256b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      offset = label - (get_label(p) + 5);
257b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_2ub(p, 0x0f, 0x80 + cc);
258b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_1i(p, offset);
259b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   }
260b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
261b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
262b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Always use a 32bit offset for forward jumps:
263b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
264b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic GLubyte *emit_jcc_forward( struct x86_program *p,
265b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			       GLuint cc )
266b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
267b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_2ub(p, 0x0f, 0x80 + cc);
268b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_1i(p, 0);
269b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   return get_label(p);
270b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
271b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
272b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Fixup offset from forward jump:
273b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
274b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void do_fixup( struct x86_program *p,
275b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		      GLubyte *fixup )
276b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
277b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   *(int *)(fixup - 4) = get_label(p) - fixup;
278b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
279b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
280b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_push( struct x86_program *p,
281b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		       struct x86_reg reg )
282b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
283b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   assert(reg.mod == mod_REG);
284b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_1ub(p, 0x50 + reg.idx);
285b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   p->stack_offset += 4;
286b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
287b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
288b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_pop( struct x86_program *p,
289b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		       struct x86_reg reg )
290b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
291b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   assert(reg.mod == mod_REG);
292b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_1ub(p, 0x58 + reg.idx);
293b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   p->stack_offset -= 4;
294b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
295b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
296b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_inc( struct x86_program *p,
297b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		       struct x86_reg reg )
298b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
299b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   assert(reg.mod == mod_REG);
300b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_1ub(p, 0x40 + reg.idx);
301b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
302b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
303b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_dec( struct x86_program *p,
304b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		       struct x86_reg reg )
305b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
306b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   assert(reg.mod == mod_REG);
307dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_1ub(p, 0x48 + reg.idx);
308b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
309b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
310b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_ret( struct x86_program *p )
311b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
312b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_1ub(p, 0xc3);
313b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
314b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
315b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
316b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
317b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
318b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Build a modRM byte + possible displacement.  No treatment of SIB
319b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * indexing.  BZZT - no way to encode an absolute address.
320b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
321b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_modrm( struct x86_program *p,
322b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg reg,
323b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg regmem )
324b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
325b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLubyte val = 0;
326b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
327b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   assert(reg.mod == mod_REG);
328b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
329b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   val |= regmem.mod << 6;     	/* mod field */
330b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   val |= reg.idx << 3;		/* reg field */
331b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   val |= regmem.idx;		/* r/m field */
332b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
333dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_1ub_fn(p, val, 0);
334dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
335dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   /* Oh-oh we've stumbled into the SIB thing.
336dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell    */
337dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   if (regmem.idx == reg_SP) {
338dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell      emit_1ub_fn(p, 0x24, 0);		/* simplistic! */
339dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   }
340b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
341b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   switch (regmem.mod) {
342b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   case mod_REG:
343b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   case mod_INDIRECT:
344b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      break;
345b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   case mod_DISP8:
346b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_1b(p, regmem.disp);
347dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell      break;
348b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   case mod_DISP32:
349b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_1i(p, regmem.disp);
350dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell      break;
351b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   }
352b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
353b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
354b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Many x86 instructions have two opcodes to cope with the situations
355b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * where the destination is a register or memory reference
356b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * respectively.  This function selects the correct opcode based on
357b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * the arguments presented.
358b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
359b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_op_modrm( struct x86_program *p,
360b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   GLubyte op_dst_is_reg,
361b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   GLubyte op_dst_is_mem,
362b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dst,
363b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg src )
364b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
365b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   switch (dst.mod) {
366b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   case mod_REG:
367dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell      emit_1ub_fn(p, op_dst_is_reg, 0);
368b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_modrm(p, dst, src);
369b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      break;
370b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   case mod_INDIRECT:
371b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   case mod_DISP32:
372b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   case mod_DISP8:
373b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      assert(src.mod == mod_REG);
374dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell      emit_1ub_fn(p, op_dst_is_mem, 0);
375b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_modrm(p, src, dst);
376b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      break;
377b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   }
378b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
379b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
380b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_mov( struct x86_program *p,
381b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		      struct x86_reg dst,
382b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		      struct x86_reg src )
383b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
384b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_op_modrm( p, 0x8b, 0x89, dst, src );
385b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
386b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
387b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_xor( struct x86_program *p,
388b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		      struct x86_reg dst,
389b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		      struct x86_reg src )
390b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
391b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_op_modrm( p, 0x33, 0x31, dst, src );
392b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
393b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
394dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_cmp( struct x86_program *p,
395dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell		      struct x86_reg dst,
396dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell		      struct x86_reg src )
397dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{
398dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_op_modrm( p, 0x3b, 0x39, dst, src );
399dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell}
400dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
401b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_movlps( struct x86_program *p,
402b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 struct x86_reg dst,
403b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 struct x86_reg src )
404b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
405b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_1ub(p, X86_TWOB);
406b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_op_modrm( p, 0x12, 0x13, dst, src );
407b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
408b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
409b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_movhps( struct x86_program *p,
410b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 struct x86_reg dst,
411b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 struct x86_reg src )
412b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
413b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_1ub(p, X86_TWOB);
414b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_op_modrm( p, 0x16, 0x17, dst, src );
415b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
416b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
417b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_movd( struct x86_program *p,
418b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		       struct x86_reg dst,
419b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		       struct x86_reg src )
420b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
421b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_2ub(p, 0x66, X86_TWOB);
422b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_op_modrm( p, 0x6e, 0x7e, dst, src );
423b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
424b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
425b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_movss( struct x86_program *p,
426b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		       struct x86_reg dst,
427b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		       struct x86_reg src )
428b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
429b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_2ub(p, 0xF3, X86_TWOB);
430b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_op_modrm( p, 0x10, 0x11, dst, src );
431b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
432b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
433b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_movaps( struct x86_program *p,
434b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 struct x86_reg dst,
435b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 struct x86_reg src )
436b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
437b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_1ub(p, X86_TWOB);
438b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_op_modrm( p, 0x28, 0x29, dst, src );
439b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
440b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
441b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_movups( struct x86_program *p,
442b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 struct x86_reg dst,
443b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 struct x86_reg src )
444b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
445b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_1ub(p, X86_TWOB);
446b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_op_modrm( p, 0x10, 0x11, dst, src );
447b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
448b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
449b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* SSE operations often only have one format, with dest constrained to
450b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * be a register:
451b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
452b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_mulps( struct x86_program *p,
453b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg dst,
454b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg src )
455b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
456b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_2ub(p, X86_TWOB, 0x59);
457b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_modrm( p, dst, src );
458b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
459b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
460b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_addps( struct x86_program *p,
461b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg dst,
462b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg src )
463b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
464b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_2ub(p, X86_TWOB, 0x58);
465b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_modrm( p, dst, src );
466b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
467b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
468b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_cvtps2dq( struct x86_program *p,
469b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg dst,
470b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg src )
471b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
472b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_3ub(p, 0x66, X86_TWOB, 0x5B);
473b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_modrm( p, dst, src );
474b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
475b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
476b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_packssdw( struct x86_program *p,
477b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg dst,
478b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg src )
479b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
480b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_3ub(p, 0x66, X86_TWOB, 0x6B);
481b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_modrm( p, dst, src );
482b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
483b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
484b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_packsswb( struct x86_program *p,
485b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg dst,
486b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg src )
487b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
488b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_3ub(p, 0x66, X86_TWOB, 0x63);
489b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_modrm( p, dst, src );
490b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
491b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
492dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_packuswb( struct x86_program *p,
493dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell			struct x86_reg dst,
494dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell			struct x86_reg src )
495dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{
496dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_3ub(p, 0x66, X86_TWOB, 0x67);
497dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_modrm( p, dst, src );
498dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell}
499dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
500b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Load effective address:
501b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
502b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_lea( struct x86_program *p,
503b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		      struct x86_reg dst,
504b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		      struct x86_reg src )
505b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
506b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_1ub(p, 0x8d);
507b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_modrm( p, dst, src );
508b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
509b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
510b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_add_imm( struct x86_program *p,
511b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			  struct x86_reg dst,
512b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			  struct x86_reg src,
513b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			  GLint value )
514b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
515b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_lea(p, dst, make_disp(src, value));
516b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
517b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
518dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwellstatic void emit_test( struct x86_program *p,
519dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell		       struct x86_reg dst,
520dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell		       struct x86_reg src )
521dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell{
522dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_1ub(p, 0x85);
523dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_modrm( p, dst, src );
524dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell}
525dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
526b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
527b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
528b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
529b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/**
530b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * Perform a reduced swizzle:
531b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
532b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_pshufd( struct x86_program *p,
533b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 struct x86_reg dest,
534b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 struct x86_reg arg0,
535b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 GLubyte x,
536b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 GLubyte y,
537b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 GLubyte z,
538b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 GLubyte w)
539b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
540b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_3ub(p, 0x66, X86_TWOB, 0x70);
541b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_modrm(p, dest, arg0);
542b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_1ub(p, (x|(y<<2)|(z<<4)|w<<6));
543b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
544b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
545b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
546b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_pk4ub( struct x86_program *p,
547b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg dest,
548b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg arg0 )
549b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
550b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_cvtps2dq(p, dest, arg0);
551b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_packssdw(p, dest, dest);
552dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_packuswb(p, dest, dest);
553b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
554b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
555b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load4f_4( struct x86_program *p,
556b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dest,
557b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg arg0 )
558b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
559b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movups(p, dest, arg0);
560b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
561b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
562b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load4f_3( struct x86_program *p,
563b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dest,
564b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg arg0 )
565b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
566b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Have to jump through some hoops:
567b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    *
568b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    * 0 0 0 1 -- skip if reg[3] preserved over loop iterations
569b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    * c 0 0 1
570b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    * 0 0 c 1
571b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    * a b c 1
572b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
573b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movups(p, dest, get_identity(p));
574b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movss(p, dest, make_disp(arg0, 8));
575b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_pshufd(p, dest, dest, Y,Z,X,W );
576b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movlps(p, dest, arg0);
577b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
578b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
579b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load4f_2( struct x86_program *p,
580b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dest,
581b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg arg0 )
582b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
583b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Pull in 2 dwords, then copy the top 2 dwords with 0,1 from id.
584b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
585b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movlps(p, dest, arg0);
586b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movhps(p, dest, get_identity(p));
587b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
588b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
589b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load4f_1( struct x86_program *p,
590b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dest,
591b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg arg0 )
592b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
593b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Initialized with [0,0,0,1] from id, then pull in the single low
594b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    * word.
595b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
596b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movaps(p, dest, get_identity(p));
597b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movss(p, dest, arg0);
598b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
599b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
600b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
601b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
602b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load3f_3( struct x86_program *p,
603b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dest,
604b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg arg0 )
605b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
606b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Over-reads by 1 dword - potential SEGV...  Deal with in
607b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    * array_cache by treating size-3 arrays specially, copying to
608b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    * temporary storage if last element (how can you tell?) falls on a
609b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    * 4k boundary.
610b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
611b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   if (p->inputs_safe) {
612b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_movaps(p, dest, arg0);
613b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   }
614b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   else {
615b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      /* c . . .
616b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell       * c c c c
617b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell       * a b c c
618b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell       */
619b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_movss(p, dest, make_disp(arg0, 8));
620b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_pshufd(p, dest, dest, X,X,X,X);
621b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_movlps(p, dest, arg0);
622b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   }
623b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
624b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
625b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load3f_2( struct x86_program *p,
626b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dest,
627b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg arg0 )
628b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
629b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_load4f_2(p, dest, arg0);
630b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
631b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
632b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load3f_1( struct x86_program *p,
633b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dest,
634b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg arg0 )
635b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
636b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_load4f_1(p, dest, arg0);
637b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
638b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
639b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load2f_2( struct x86_program *p,
640b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dest,
641b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg arg0 )
642b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
643b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movlps(p, dest, arg0);
644b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
645b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
646b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load2f_1( struct x86_program *p,
647b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dest,
648b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg arg0 )
649b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
650b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_load4f_1(p, dest, arg0);
651b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
652b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
653b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load1f_1( struct x86_program *p,
654b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dest,
655b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg arg0 )
656b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
657b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movss(p, dest, arg0);
658b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
659b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
660b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void (*load[4][4])( struct x86_program *p,
661b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dest,
662b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg arg0 ) = {
663b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   { emit_load1f_1,
664b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell     emit_load1f_1,
665b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell     emit_load1f_1,
666b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell     emit_load1f_1 },
667b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
668b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   { emit_load2f_1,
669b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell     emit_load2f_2,
670b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell     emit_load2f_2,
671b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell     emit_load2f_2 },
672b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
673b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   { emit_load3f_1,
674b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell     emit_load3f_2,
675b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell     emit_load3f_3,
676b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell     emit_load3f_3 },
677b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
678b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   { emit_load4f_1,
679b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell     emit_load4f_2,
680b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell     emit_load4f_3,
681b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell     emit_load4f_4 }
682b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell};
683b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
684b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_load( struct x86_program *p,
685dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell		       struct x86_reg dest,
686b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		       GLuint sz,
687b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		       struct x86_reg src,
688b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell		       GLuint src_sz)
689b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
690dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   load[sz-1][src_sz-1](p, dest, src);
691b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
692b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
693b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
694b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store4f( struct x86_program *p,
695b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			  struct x86_reg dest,
696b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			  struct x86_reg arg0 )
697b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
698b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movups(p, dest, arg0);
699b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
700b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
701b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store3f( struct x86_program *p,
702b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			  struct x86_reg dest,
703b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			  struct x86_reg arg0 )
704b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
705b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   if (p->outputs_safe) {
706b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      /* Emit the extra dword anyway.  This may hurt writecombining,
707b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell       * may cause other problems.
708b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell       */
709b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_movups(p, dest, arg0);
710b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   }
711b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   else {
712b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      /* Alternate strategy - emit two, shuffle, emit one.
713b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell       */
714b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      struct x86_reg tmp = get_sse_temp(p);
715b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_movlps(p, dest, arg0);
716b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
717b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_pshufd(p, tmp, arg0, Z, Z, Z, Z );
718b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_movss(p, make_disp(dest,8), tmp);
719b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      release_temp(p, tmp);
720b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   }
721b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
722b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
723b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store2f( struct x86_program *p,
724b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg dest,
725b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			   struct x86_reg arg0 )
726b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
727b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movlps(p, dest, arg0);
728b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
729b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
730b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store1f( struct x86_program *p,
731b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			  struct x86_reg dest,
732b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			  struct x86_reg arg0 )
733b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
734b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_movss(p, dest, arg0);
735b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
736b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
737b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
738b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void (*store[4])( struct x86_program *p,
739b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 struct x86_reg dest,
740b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			 struct x86_reg arg0 ) =
741b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
742b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_store1f,
743b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_store2f,
744b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_store3f,
745b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_store4f
746b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell};
747b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
748b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic void emit_store( struct x86_program *p,
749b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg dest,
750b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			GLuint sz,
751b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell			struct x86_reg temp )
752b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
753b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
754b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   store[sz-1](p, dest, temp);
755b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
756b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
757b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
758b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic GLint get_offset( const void *a, const void *b )
759b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
760b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   return (const char *)b - (const char *)a;
761b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
762b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
763b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
764b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
765b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell/* Lots of hardcoding
766b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *
767b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * EAX -- pointer to current output vertex
768b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell * ECX -- pointer to current attribute
769b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell *
770b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell */
771b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellstatic GLboolean build_vertex_emit( struct x86_program *p )
772b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
773b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLcontext *ctx = p->ctx;
774b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   TNLcontext *tnl = TNL_CONTEXT(ctx);
775b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
776b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct tnl_clipspace_attr *a = vtx->attr;
777b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLuint j;
778b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
779b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct x86_reg vertexEAX = make_reg(file_REG32, reg_AX);
780b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct x86_reg srcEDI = make_reg(file_REG32, reg_CX);
781b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct x86_reg countEBP = make_reg(file_REG32, reg_BP);
782b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct x86_reg vtxESI = make_reg(file_REG32, reg_SI);
783b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct x86_reg tmp = make_reg(file_XMM, 0);
784b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct x86_reg vp0 = make_reg(file_XMM, 1);
785b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct x86_reg vp1 = make_reg(file_XMM, 2);
786dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   struct x86_reg chan0 = make_reg(file_XMM, 3);
787b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   GLubyte *fixup, *label;
788b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
789b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   p->csr = p->store;
790b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
791b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Push a few regs?
792b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
793b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_push(p, srcEDI);
794b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_push(p, countEBP);
795b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_push(p, vtxESI);
796b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
797dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
798dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   /* Get vertex count, compare to zero
799dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell    */
800dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_xor(p, srcEDI, srcEDI);
801dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_mov(p, countEBP, make_fn_arg(p, 2));
802dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_cmp(p, countEBP, srcEDI);
803dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   fixup = emit_jcc_forward(p, cc_E);
804dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
805dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
806b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Initialize destination register.
807b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
808b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_mov(p, vertexEAX, make_fn_arg(p, 3));
809b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
810b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Dereference ctx to get tnl, then vtx:
811b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
812b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_mov(p, vtxESI, make_fn_arg(p, 1));
813b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_mov(p, vtxESI, make_disp(vtxESI, get_offset(ctx, &ctx->swtnl_context)));
814b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   vtxESI = make_disp(vtxESI, get_offset(tnl, &tnl->clipspace));
815b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
816b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
817b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Possibly load vp0, vp1 for viewport calcs:
818b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
819b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   if (vtx->need_viewport) {
820b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_movups(p, vp0, make_disp(vtxESI, get_offset(vtx, &vtx->vp_scale[0])));
821b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_movups(p, vp1, make_disp(vtxESI, get_offset(vtx, &vtx->vp_xlate[0])));
822b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   }
823b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
824dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   /* always load, needed or not:
825dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell    */
826dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_movups(p, chan0, make_disp(vtxESI, get_offset(vtx, &vtx->chan_scale[0])));
827dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell
828b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Note address for loop jump */
829b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   label = get_label(p);
830b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
831b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Emit code for each of the attributes.  Currently routes
832b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    * everything through SSE registers, even when it might be more
833b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    * efficient to stick with regular old x86.  No optimization or
834b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    * other tricks - enough new ground to cover here just getting
835b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    * things working.
836b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
837b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   for (j = 0; j < vtx->attr_count; j++) {
838b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      struct x86_reg dest = make_disp(vertexEAX, vtx->attr[j].vertoffset);
839b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      struct x86_reg ptr_to_src = make_disp(vtxESI, get_offset(vtx, &vtx->attr[j].inputptr));
840b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
841b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      /* Load current a[j].inputptr
842b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell       */
843b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_mov(p, srcEDI, ptr_to_src);
844b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
845b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      /* Now, load an XMM reg from src, perhaps transform, then save.
846b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell       * Could be shortcircuited in specific cases:
847b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell       */
848b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      switch (a[j].format) {
849b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_1F:
850dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_load(p, tmp, 1, deref(srcEDI), vtx->attr[j].inputsize);
851b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_store(p, dest, 1, tmp);
852b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_2F:
853dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_load(p, tmp, 2, deref(srcEDI), vtx->attr[j].inputsize);
854b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_store(p, dest, 2, tmp);
855b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_3F:
856b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 /* Potentially the worst case - hardcode 2+1 copying:
857b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	  */
858dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_load(p, tmp, 3, deref(srcEDI), vtx->attr[j].inputsize);
859b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_store(p, dest, 3, tmp);
860b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_4F:
861dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize);
862b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_store(p, dest, 4, tmp);
863b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 break;
864b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_2F_VIEWPORT:
865dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_load(p, tmp, 2, deref(srcEDI), vtx->attr[j].inputsize);
866dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_mulps(p, tmp, vp0);
867dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_addps(p, tmp, vp1);
868b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_store(p, dest, 2, tmp);
869b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 break;
870b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_3F_VIEWPORT:
871dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_load(p, tmp, 3, deref(srcEDI), vtx->attr[j].inputsize);
872dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_mulps(p, tmp, vp0);
873dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_addps(p, tmp, vp1);
874b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_store(p, dest, 3, tmp);
875b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 break;
876b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_4F_VIEWPORT:
877dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize);
878dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_mulps(p, tmp, vp0);
879dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_addps(p, tmp, vp1);
880b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_store(p, dest, 4, tmp);
881b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 break;
882b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_3F_XYW:
883dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize);
884b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_pshufd(p, tmp, tmp, X, Y, W, Z);
885b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_store(p, dest, 3, tmp);
886b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 break;
887b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
888b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 /* Try and bond 3ub + 1ub pairs into a single 4ub operation?
889b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	  */
890b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_1UB_1F:
891b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_3UB_3F_RGB:
892b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_3UB_3F_BGR:
893dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 _mesa_printf("non-implemneted format %d\n", a[j].format);
894b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 return GL_FALSE;	/* add this later */
895b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
896b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_4UB_4F_RGBA:
897dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize);
898dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_mulps(p, tmp, chan0);
899b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_pk4ub(p, tmp, tmp);
900b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_store(p, dest, 1, tmp);
901b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 break;
902b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_4UB_4F_BGRA:
903dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize);
904b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_pshufd(p, tmp, tmp, Z, Y, X, W);
905dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_mulps(p, tmp, chan0);
906b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_pk4ub(p, tmp, tmp);
907b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_store(p, dest, 1, tmp);
908b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 break;
909b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_4UB_4F_ARGB:
910dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize);
911b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_pshufd(p, tmp, tmp, W, X, Y, Z);
912dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_mulps(p, tmp, chan0);
913b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_pk4ub(p, tmp, tmp);
914b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_store(p, dest, 1, tmp);
915b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 break;
916b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_4UB_4F_ABGR:
917dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize);
918b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_pshufd(p, tmp, tmp, W, Z, Y, X);
919dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 emit_mulps(p, tmp, chan0);
920b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_pk4ub(p, tmp, tmp);
921b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 emit_store(p, dest, 1, tmp);
922b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 break;
923b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      case EMIT_4CHAN_4F_RGBA:
924b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 switch (CHAN_TYPE) {
925b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 case GL_UNSIGNED_BYTE:
926dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	    emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize);
927dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	    emit_mulps(p, tmp, chan0);
928b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	    emit_pk4ub(p, tmp, tmp);
929b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	    emit_store(p, dest, 1, tmp);
930b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	    break;
931b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 case GL_FLOAT:
932dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	    emit_load(p, tmp, 4, deref(srcEDI), vtx->attr[j].inputsize);
933b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	    emit_store(p, dest, 4, tmp);
934b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	    break;
935dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 case GL_UNSIGNED_SHORT:
936b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 default:
937dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	    _mesa_printf("unknown CHAN_TYPE %s\n", _mesa_lookup_enum_by_nr(CHAN_TYPE));
938dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	    return GL_FALSE;
939b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 }
940dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 break;
941b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      default:
942dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 _mesa_printf("unknown a[%d].format %d\n", j, a[j].format);
943b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell	 return GL_FALSE;	/* catch any new opcodes */
944b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      }
945b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
946b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      /* add a[j].inputstride (hardcoded value - could just as easily
947b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell       * pull the stride value from memory each time).
948b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell       */
949b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_add_imm(p, srcEDI, srcEDI, a[j].inputstride);
950b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
951b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      /* save new value of a[j].inputptr
952b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell       */
953b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      emit_mov(p, ptr_to_src, srcEDI);
954b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
955b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   }
956b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
957b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Next vertex:
958b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
959b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_add_imm(p, vertexEAX, vertexEAX, vtx->vertex_size);
960b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
961b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* decr count, loop if not zero
962b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
963b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_dec(p, countEBP);
964dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_test(p, countEBP, countEBP);
965b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_jcc(p, cc_NZ, label);
966b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
967b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Land forward jump here:
968b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
969b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   do_fixup(p, fixup);
970b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
971b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   /* Pop regs and return
972b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell    */
973dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell   emit_pop(p, get_base_reg(vtxESI));
974b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_pop(p, countEBP);
975b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_pop(p, srcEDI);
976b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   emit_ret(p);
977b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
978b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   vtx->emit = (tnl_emit_func)p->store;
979b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   return GL_TRUE;
980b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
981b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
982b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwellvoid _tnl_generate_sse_emit( GLcontext *ctx )
983b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell{
984b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct tnl_clipspace *vtx = GET_VERTEX_STATE(ctx);
985b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   struct x86_program p;
986b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
987b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   memset(&p, 0, sizeof(p));
988b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   p.ctx = ctx;
989b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   p.store = MALLOC(1024);
990b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
991b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   p.inputs_safe = 1;		/* for now */
992b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   p.outputs_safe = 1;		/* for now */
993b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
994b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   if (build_vertex_emit(&p)) {
995b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      _tnl_register_fastpath( vtx, GL_TRUE );
996dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell      if (DISASSEM)
997dd4c1dd0382277b080fb4981e027250e10658ae8Keith Whitwell	 _mesa_printf("disassemble 0x%x 0x%x\n", p.store, p.csr);
998b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   }
999b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   else {
1000b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell      FREE(p.store);
1001b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   }
1002b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell
1003b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   (void)emit_movd;
1004b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   (void)emit_inc;
1005b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell   (void)emit_xor;
1006b745bf08cd5e772f86360267995a96e9b73384b0Keith Whitwell}
1007