1
2/*
3 * Mesa 3-D graphics library
4 *
5 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26/*
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
29 * in there will break the build on some platforms.
30 */
31
32#include "assyntax.h"
33#include "matypes.h"
34#include "xform_args.h"
35
36	SEG_TEXT
37
38#define FP_ONE		1065353216
39#define FP_ZERO		0
40
41#define SRC0		REGOFF(0, ESI)
42#define SRC1		REGOFF(4, ESI)
43#define SRC2		REGOFF(8, ESI)
44#define SRC3		REGOFF(12, ESI)
45#define DST0		REGOFF(0, EDI)
46#define DST1		REGOFF(4, EDI)
47#define DST2		REGOFF(8, EDI)
48#define DST3		REGOFF(12, EDI)
49#define MAT0		REGOFF(0, EDX)
50#define MAT1		REGOFF(4, EDX)
51#define MAT2		REGOFF(8, EDX)
52#define MAT3		REGOFF(12, EDX)
53#define MAT4		REGOFF(16, EDX)
54#define MAT5		REGOFF(20, EDX)
55#define MAT6		REGOFF(24, EDX)
56#define MAT7		REGOFF(28, EDX)
57#define MAT8		REGOFF(32, EDX)
58#define MAT9		REGOFF(36, EDX)
59#define MAT10		REGOFF(40, EDX)
60#define MAT11		REGOFF(44, EDX)
61#define MAT12		REGOFF(48, EDX)
62#define MAT13		REGOFF(52, EDX)
63#define MAT14		REGOFF(56, EDX)
64#define MAT15		REGOFF(60, EDX)
65
66
67ALIGNTEXT16
68GLOBL GLNAME( _mesa_x86_transform_points3_general )
69HIDDEN(_mesa_x86_transform_points3_general)
70GLNAME( _mesa_x86_transform_points3_general ):
71
72#define FRAME_OFFSET 8
73	PUSH_L( ESI )
74	PUSH_L( EDI )
75
76	MOV_L( ARG_SOURCE, ESI )
77	MOV_L( ARG_DEST, EDI )
78
79	MOV_L( ARG_MATRIX, EDX )
80	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
81
82	TEST_L( ECX, ECX )
83	JZ( LLBL(x86_p3_gr_done) )
84
85	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
86	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
87
88	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
89	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
90
91	SHL_L( CONST(4), ECX )
92	MOV_L( REGOFF(V4F_START, ESI), ESI )
93
94	MOV_L( REGOFF(V4F_START, EDI), EDI )
95	ADD_L( EDI, ECX )
96
97ALIGNTEXT16
98LLBL(x86_p3_gr_loop):
99
100	FLD_S( SRC0 )			/* F4 */
101	FMUL_S( MAT0 )
102	FLD_S( SRC0 )			/* F5 F4 */
103	FMUL_S( MAT1 )
104	FLD_S( SRC0 )			/* F6 F5 F4 */
105	FMUL_S( MAT2 )
106	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
107	FMUL_S( MAT3 )
108
109	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
110	FMUL_S( MAT4 )
111	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
112	FMUL_S( MAT5 )
113	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
114	FMUL_S( MAT6 )
115	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
116	FMUL_S( MAT7 )
117
118	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
119	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
120	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
121	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
122	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
123	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
124
125	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */
126	FMUL_S( MAT8 )
127	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */
128	FMUL_S( MAT9 )
129	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */
130	FMUL_S( MAT10 )
131	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
132	FMUL_S( MAT11 )
133
134	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
135	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
136	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
137	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
138	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
139	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
140
141	FXCH( ST(3) )			/* F4 F6 F5 F7 */
142	FADD_S( MAT12 )
143	FXCH( ST(2) )			/* F5 F6 F4 F7 */
144	FADD_S( MAT13 )
145	FXCH( ST(1) )			/* F6 F5 F4 F7 */
146	FADD_S( MAT14 )
147	FXCH( ST(3) )			/* F7 F5 F4 F6 */
148	FADD_S( MAT15 )
149
150	FXCH( ST(2) )			/* F4 F5 F7 F6 */
151	FSTP_S( DST0 )		/* F5 F7 F6 */
152	FSTP_S( DST1 )		/* F7 F6 */
153	FXCH( ST(1) )			/* F6 F7 */
154	FSTP_S( DST2 )		/* F7 */
155	FSTP_S( DST3 )		/* */
156
157LLBL(x86_p3_gr_skip):
158
159	ADD_L( CONST(16), EDI )
160	ADD_L( EAX, ESI )
161	CMP_L( ECX, EDI )
162	JNE( LLBL(x86_p3_gr_loop) )
163
164LLBL(x86_p3_gr_done):
165
166	POP_L( EDI )
167	POP_L( ESI )
168	RET
169#undef FRAME_OFFSET
170
171
172
173
174ALIGNTEXT16
175GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
176HIDDEN(_mesa_x86_transform_points3_perspective)
177GLNAME( _mesa_x86_transform_points3_perspective ):
178
179#define FRAME_OFFSET 12
180	PUSH_L( ESI )
181	PUSH_L( EDI )
182	PUSH_L( EBX )
183
184	MOV_L( ARG_SOURCE, ESI )
185	MOV_L( ARG_DEST, EDI )
186
187	MOV_L( ARG_MATRIX, EDX )
188	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
189
190	TEST_L( ECX, ECX )
191	JZ( LLBL(x86_p3_pr_done) )
192
193	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
194	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
195
196	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
197	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
198
199	SHL_L( CONST(4), ECX )
200	MOV_L( REGOFF(V4F_START, ESI), ESI )
201
202	MOV_L( REGOFF(V4F_START, EDI), EDI )
203	ADD_L( EDI, ECX )
204
205ALIGNTEXT16
206LLBL(x86_p3_pr_loop):
207
208	FLD_S( SRC0 )			/* F4 */
209	FMUL_S( MAT0 )
210
211	FLD_S( SRC1 )			/* F5 F4 */
212	FMUL_S( MAT5 )
213
214	FLD_S( SRC2 )			/* F0 F5 F4 */
215	FMUL_S( MAT8 )
216	FLD_S( SRC2 )			/* F1 F0 F5 F4 */
217	FMUL_S( MAT9 )
218	FLD_S( SRC2 )			/* F2 F1 F0 F5 F4 */
219	FMUL_S( MAT10 )
220
221	FXCH( ST(2) )			/* F0 F1 F2 F5 F4 */
222	FADDP( ST0, ST(4) )		/* F1 F2 F5 F4 */
223	FADDP( ST0, ST(2) )		/* F2 F5 F4 */
224	FLD_S( MAT14 )		/* F6 F2 F5 F4 */
225	FXCH( ST(1) )			/* F2 F6 F5 F4 */
226	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
227
228	MOV_L( SRC2, EBX )
229	XOR_L( CONST(-2147483648), EBX )/* change sign */
230
231	FXCH( ST(2) )			/* F4 F5 F6 */
232	FSTP_S( DST0 )		/* F5 F6 */
233	FSTP_S( DST1 )		/* F6 */
234	FSTP_S( DST2 )		/* */
235	MOV_L( EBX, DST3 )
236
237LLBL(x86_p3_pr_skip):
238
239	ADD_L( CONST(16), EDI )
240	ADD_L( EAX, ESI )
241	CMP_L( ECX, EDI )
242	JNE( LLBL(x86_p3_pr_loop) )
243
244LLBL(x86_p3_pr_done):
245
246	POP_L( EBX )
247	POP_L( EDI )
248	POP_L( ESI )
249	RET
250#undef FRAME_OFFSET
251
252
253
254
255ALIGNTEXT16
256GLOBL GLNAME( _mesa_x86_transform_points3_3d )
257HIDDEN(_mesa_x86_transform_points3_3d)
258GLNAME( _mesa_x86_transform_points3_3d ):
259
260#define FRAME_OFFSET 8
261	PUSH_L( ESI )
262	PUSH_L( EDI )
263
264	MOV_L( ARG_SOURCE, ESI )
265	MOV_L( ARG_DEST, EDI )
266
267	MOV_L( ARG_MATRIX, EDX )
268	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
269
270	TEST_L( ECX, ECX )
271	JZ( LLBL(x86_p3_3dr_done) )
272
273	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
274	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
275
276	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
277	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
278
279	SHL_L( CONST(4), ECX )
280	MOV_L( REGOFF(V4F_START, ESI), ESI )
281
282	MOV_L( REGOFF(V4F_START, EDI), EDI )
283	ADD_L( EDI, ECX )
284
285ALIGNTEXT16
286LLBL(x86_p3_3dr_loop):
287
288	FLD_S( SRC0 )			/* F4 */
289	FMUL_S( MAT0 )
290	FLD_S( SRC0 )			/* F5 F4 */
291	FMUL_S( MAT1 )
292	FLD_S( SRC0 )			/* F6 F5 F4 */
293	FMUL_S( MAT2 )
294
295	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
296	FMUL_S( MAT4 )
297	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
298	FMUL_S( MAT5 )
299	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
300	FMUL_S( MAT6 )
301
302	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
303	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
304	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
305	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
306
307	FLD_S( SRC2 )			/* F0 F6 F5 F4 */
308	FMUL_S( MAT8 )
309	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */
310	FMUL_S( MAT9 )
311	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */
312	FMUL_S( MAT10 )
313
314	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
315	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
316	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
317	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
318
319	FXCH( ST(2) )			/* F4 F5 F6 */
320	FADD_S( MAT12 )
321	FXCH( ST(1) )			/* F5 F4 F6 */
322	FADD_S( MAT13 )
323	FXCH( ST(2) )			/* F6 F4 F5 */
324	FADD_S( MAT14 )
325
326	FXCH( ST(1) )			/* F4 F6 F5 */
327	FSTP_S( DST0   )		/* F6 F5 */
328	FXCH( ST(1) )			/* F5 F6 */
329	FSTP_S( DST1   )		/* F6 */
330	FSTP_S( DST2   )		/* */
331
332LLBL(x86_p3_3dr_skip):
333
334	ADD_L( CONST(16), EDI )
335	ADD_L( EAX, ESI )
336	CMP_L( ECX, EDI )
337	JNE( LLBL(x86_p3_3dr_loop) )
338
339LLBL(x86_p3_3dr_done):
340
341	POP_L( EDI )
342	POP_L( ESI )
343	RET
344#undef FRAME_OFFSET
345
346
347
348
349ALIGNTEXT16
350GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
351HIDDEN(_mesa_x86_transform_points3_3d_no_rot)
352GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
353
354#define FRAME_OFFSET 8
355	PUSH_L( ESI )
356	PUSH_L( EDI )
357
358	MOV_L( ARG_SOURCE, ESI )
359	MOV_L( ARG_DEST, EDI )
360
361
362	MOV_L( ARG_MATRIX, EDX )
363	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
364
365	TEST_L( ECX, ECX )
366	JZ( LLBL(x86_p3_3dnrr_done) )
367
368	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
369	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
370
371	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
372	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
373
374	SHL_L( CONST(4), ECX )
375	MOV_L( REGOFF(V4F_START, ESI), ESI )
376
377	MOV_L( REGOFF(V4F_START, EDI), EDI )
378	ADD_L( EDI, ECX )
379
380ALIGNTEXT16
381LLBL(x86_p3_3dnrr_loop):
382
383	FLD_S( SRC0 )			/* F4 */
384	FMUL_S( MAT0 )
385
386	FLD_S( SRC1 )			/* F1 F4 */
387	FMUL_S( MAT5 )
388
389	FLD_S( SRC2 )			/* F2 F1 F4 */
390	FMUL_S( MAT10 )
391
392	FXCH( ST(2) )			/* F4 F1 F2 */
393	FADD_S( MAT12 )
394	FLD_S( MAT13 )		/* F5 F4 F1 F2 */
395	FXCH( ST(2) )			/* F1 F4 F5 F2 */
396	FADDP( ST0, ST(2) )		/* F4 F5 F2 */
397	FLD_S( MAT14 )		/* F6 F4 F5 F2 */
398	FXCH( ST(3) )			/* F2 F4 F5 F6 */
399	FADDP( ST0, ST(3) )		/* F4 F5 F6 */
400
401	FSTP_S( DST0   )		/* F5 F6 */
402	FSTP_S( DST1   )		/* F6 */
403	FSTP_S( DST2   )		/* */
404
405LLBL(x86_p3_3dnrr_skip):
406
407	ADD_L( CONST(16), EDI )
408	ADD_L( EAX, ESI )
409	CMP_L( ECX, EDI )
410	JNE( LLBL(x86_p3_3dnrr_loop) )
411
412LLBL(x86_p3_3dnrr_done):
413
414	POP_L( EDI )
415	POP_L( ESI )
416	RET
417#undef FRAME_OFFSET
418
419
420
421
422ALIGNTEXT16
423GLOBL GLNAME( _mesa_x86_transform_points3_2d )
424HIDDEN(_mesa_x86_transform_points3_2d)
425GLNAME( _mesa_x86_transform_points3_2d ):
426
427#define FRAME_OFFSET 12
428	PUSH_L( ESI )
429	PUSH_L( EDI )
430	PUSH_L( EBX )
431
432	MOV_L( ARG_SOURCE, ESI )
433	MOV_L( ARG_DEST, EDI )
434
435	MOV_L( ARG_MATRIX, EDX )
436	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
437
438	TEST_L( ECX, ECX )
439	JZ( LLBL(x86_p3_2dr_done) )
440
441	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
442	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
443
444	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
445	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
446
447	SHL_L( CONST(4), ECX )
448	MOV_L( REGOFF(V4F_START, ESI), ESI )
449
450	MOV_L( REGOFF(V4F_START, EDI), EDI )
451	ADD_L( EDI, ECX )
452
453ALIGNTEXT16
454LLBL(x86_p3_2dr_loop):
455
456	FLD_S( SRC0 )			/* F4 */
457	FMUL_S( MAT0 )
458	FLD_S( SRC0 )			/* F5 F4 */
459	FMUL_S( MAT1 )
460
461	FLD_S( SRC1 )			/* F0 F5 F4 */
462	FMUL_S( MAT4 )
463	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
464	FMUL_S( MAT5 )
465
466	FXCH( ST(1) )			/* F0 F1 F5 F4 */
467	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
468	FADDP( ST0, ST(1) )		/* F5 F4 */
469
470	FXCH( ST(1) )			/* F4 F5 */
471	FADD_S( MAT12 )
472	FXCH( ST(1) )			/* F5 F4 */
473	FADD_S( MAT13 )
474
475	MOV_L( SRC2, EBX )
476
477	FXCH( ST(1) )			/* F4 F5 */
478	FSTP_S( DST0   )		/* F5 */
479	FSTP_S( DST1   )		/* */
480	MOV_L( EBX, DST2 )
481
482LLBL(x86_p3_2dr_skip):
483
484	ADD_L( CONST(16), EDI )
485	ADD_L( EAX, ESI )
486	CMP_L( ECX, EDI )
487	JNE( LLBL(x86_p3_2dr_loop) )
488
489LLBL(x86_p3_2dr_done):
490
491	POP_L( EBX )
492	POP_L( EDI )
493	POP_L( ESI )
494	RET
495#undef FRAME_OFFSET
496
497
498
499
500ALIGNTEXT16
501GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
502HIDDEN(_mesa_x86_transform_points3_2d_no_rot)
503GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
504
505#define FRAME_OFFSET 12
506	PUSH_L( ESI )
507	PUSH_L( EDI )
508	PUSH_L( EBX )
509
510	MOV_L( ARG_SOURCE, ESI )
511	MOV_L( ARG_DEST, EDI )
512
513	MOV_L( ARG_MATRIX, EDX )
514	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
515
516	TEST_L( ECX, ECX )
517	JZ( LLBL(x86_p3_2dnrr_done) )
518
519	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
520	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
521
522	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
523	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
524
525	SHL_L( CONST(4), ECX )
526	MOV_L( REGOFF(V4F_START, ESI), ESI )
527
528	MOV_L( REGOFF(V4F_START, EDI), EDI )
529	ADD_L( EDI, ECX )
530
531ALIGNTEXT16
532LLBL(x86_p3_2dnrr_loop):
533
534	FLD_S( SRC0 )			/* F4 */
535	FMUL_S( MAT0 )
536
537	FLD_S( SRC1 )			/* F1 F4 */
538	FMUL_S( MAT5 )
539
540	FXCH( ST(1) )			/* F4 F1 */
541	FADD_S( MAT12 )
542	FLD_S( MAT13 )		/* F5 F4 F1 */
543
544	FXCH( ST(2) )			/* F1 F4 F5 */
545	FADDP( ST0, ST(2) )		/* F4 F5 */
546
547	MOV_L( SRC2, EBX )
548
549	FSTP_S( DST0 )		/* F5 */
550	FSTP_S( DST1 )		/* */
551	MOV_L( EBX, DST2 )
552
553LLBL(x86_p3_2dnrr_skip):
554
555	ADD_L( CONST(16), EDI )
556	ADD_L( EAX, ESI )
557	CMP_L( ECX, EDI )
558	JNE( LLBL(x86_p3_2dnrr_loop) )
559
560LLBL(x86_p3_2dnrr_done):
561
562	POP_L( EBX )
563	POP_L( EDI )
564	POP_L( ESI )
565	RET
566#undef FRAME_OFFSET
567
568
569
570
571ALIGNTEXT16
572GLOBL GLNAME( _mesa_x86_transform_points3_identity )
573HIDDEN(_mesa_x86_transform_points3_identity)
574GLNAME(_mesa_x86_transform_points3_identity ):
575
576#define FRAME_OFFSET 16
577	PUSH_L( ESI )
578	PUSH_L( EDI )
579	PUSH_L( EBX )
580	PUSH_L( EBP )
581
582	MOV_L( ARG_SOURCE, ESI )
583	MOV_L( ARG_DEST, EDI )
584
585	MOV_L( ARG_MATRIX, EDX )
586	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
587
588	TEST_L( ECX, ECX )
589	JZ( LLBL(x86_p3_ir_done) )
590
591	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
592	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
593
594	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
595	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
596
597	SHL_L( CONST(4), ECX )
598	MOV_L( REGOFF(V4F_START, ESI), ESI )
599
600	MOV_L( REGOFF(V4F_START, EDI), EDI )
601	ADD_L( EDI, ECX )
602
603	CMP_L( ESI, EDI )
604	JE( LLBL(x86_p3_ir_done) )
605
606ALIGNTEXT16
607LLBL(x86_p3_ir_loop):
608
609#if 1
610	MOV_L( SRC0, EBX )
611	MOV_L( SRC1, EBP )
612	MOV_L( SRC2, EDX )
613
614	MOV_L( EBX, DST0 )
615	MOV_L( EBP, DST1 )
616	MOV_L( EDX, DST2 )
617#else
618	FLD_S( SRC0 )
619	FLD_S( SRC1 )
620	FLD_S( SRC2 )
621
622	FSTP_S( DST2 )
623	FSTP_S( DST1 )
624	FSTP_S( DST0 )
625#endif
626
627LLBL(x86_p3_ir_skip):
628
629	ADD_L( CONST(16), EDI )
630	ADD_L( EAX, ESI )
631	CMP_L( ECX, EDI )
632	JNE( LLBL(x86_p3_ir_loop) )
633
634LLBL(x86_p3_ir_done):
635
636	POP_L( EBP )
637	POP_L( EBX )
638	POP_L( EDI )
639	POP_L( ESI )
640	RET
641
642#if defined (__ELF__) && defined (__linux__)
643	.section .note.GNU-stack,"",%progbits
644#endif
645