1
2/*
3 * Mesa 3-D graphics library
4 * Version:  3.5
5 *
6 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26/*
27 * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
28 * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
29 * in there will break the build on some platforms.
30 */
31
32#include "assyntax.h"
33#include "matypes.h"
34#include "xform_args.h"
35
36	SEG_TEXT
37
38#define FP_ONE		1065353216
39#define FP_ZERO		0
40
41#define SRC0		REGOFF(0, ESI)
42#define SRC1		REGOFF(4, ESI)
43#define SRC2		REGOFF(8, ESI)
44#define SRC3		REGOFF(12, ESI)
45#define DST0		REGOFF(0, EDI)
46#define DST1		REGOFF(4, EDI)
47#define DST2		REGOFF(8, EDI)
48#define DST3		REGOFF(12, EDI)
49#define MAT0		REGOFF(0, EDX)
50#define MAT1		REGOFF(4, EDX)
51#define MAT2		REGOFF(8, EDX)
52#define MAT3		REGOFF(12, EDX)
53#define MAT4		REGOFF(16, EDX)
54#define MAT5		REGOFF(20, EDX)
55#define MAT6		REGOFF(24, EDX)
56#define MAT7		REGOFF(28, EDX)
57#define MAT8		REGOFF(32, EDX)
58#define MAT9		REGOFF(36, EDX)
59#define MAT10		REGOFF(40, EDX)
60#define MAT11		REGOFF(44, EDX)
61#define MAT12		REGOFF(48, EDX)
62#define MAT13		REGOFF(52, EDX)
63#define MAT14		REGOFF(56, EDX)
64#define MAT15		REGOFF(60, EDX)
65
66
67ALIGNTEXT16
68GLOBL GLNAME( _mesa_x86_transform_points2_general )
69HIDDEN(_mesa_x86_transform_points2_general)
70GLNAME( _mesa_x86_transform_points2_general ):
71
72#define FRAME_OFFSET 8
73	PUSH_L( ESI )
74	PUSH_L( EDI )
75
76	MOV_L( ARG_SOURCE, ESI )
77	MOV_L( ARG_DEST, EDI )
78
79	MOV_L( ARG_MATRIX, EDX )
80	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
81
82	TEST_L( ECX, ECX )
83	JZ( LLBL(x86_p2_gr_done) )
84
85	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
86	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
87
88	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
89	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
90
91	SHL_L( CONST(4), ECX )
92	MOV_L( REGOFF(V4F_START, ESI), ESI )
93
94	MOV_L( REGOFF(V4F_START, EDI), EDI )
95	ADD_L( EDI, ECX )
96
97ALIGNTEXT16
98LLBL(x86_p2_gr_loop):
99
100	FLD_S( SRC0 )			/* F4 */
101	FMUL_S( MAT0 )
102	FLD_S( SRC0 )			/* F5 F4 */
103	FMUL_S( MAT1 )
104	FLD_S( SRC0 )			/* F6 F5 F4 */
105	FMUL_S( MAT2 )
106	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
107	FMUL_S( MAT3 )
108
109	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
110	FMUL_S( MAT4 )
111	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
112	FMUL_S( MAT5 )
113	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
114	FMUL_S( MAT6 )
115	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
116	FMUL_S( MAT7 )
117
118	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
119	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
120	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
121	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
122	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
123	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
124
125	FXCH( ST(3) )			/* F4 F6 F5 F7 */
126	FADD_S( MAT12 )
127	FXCH( ST(2) )			/* F5 F6 F4 F7 */
128	FADD_S( MAT13 )
129	FXCH( ST(1) )			/* F6 F5 F4 F7 */
130	FADD_S( MAT14 )
131	FXCH( ST(3) )			/* F7 F5 F4 F6 */
132	FADD_S( MAT15 )
133
134	FXCH( ST(2) )			/* F4 F5 F7 F6 */
135	FSTP_S( DST0 )			/* F5 F7 F6 */
136	FSTP_S( DST1 )			/* F7 F6 */
137	FXCH( ST(1) )			/* F6 F7 */
138	FSTP_S( DST2 )			/* F7 */
139	FSTP_S( DST3 )			/* */
140
141LLBL(x86_p2_gr_skip):
142
143	ADD_L( CONST(16), EDI )
144	ADD_L( EAX, ESI )
145	CMP_L( ECX, EDI )
146	JNE( LLBL(x86_p2_gr_loop) )
147
148LLBL(x86_p2_gr_done):
149
150	POP_L( EDI )
151	POP_L( ESI )
152	RET
153#undef FRAME_OFFSET
154
155
156
157
158ALIGNTEXT16
159GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
160HIDDEN(_mesa_x86_transform_points2_perspective)
161GLNAME( _mesa_x86_transform_points2_perspective ):
162
163#define FRAME_OFFSET 12
164	PUSH_L( ESI )
165	PUSH_L( EDI )
166	PUSH_L( EBX )
167
168	MOV_L( ARG_SOURCE, ESI )
169	MOV_L( ARG_DEST, EDI )
170
171	MOV_L( ARG_MATRIX, EDX )
172	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
173
174	TEST_L( ECX, ECX )
175	JZ( LLBL(x86_p2_pr_done) )
176
177	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
178	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
179
180	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
181	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
182
183	SHL_L( CONST(4), ECX )
184	MOV_L( REGOFF(V4F_START, ESI), ESI )
185
186	MOV_L( REGOFF(V4F_START, EDI), EDI )
187	ADD_L( EDI, ECX )
188
189	MOV_L( MAT14, EBX )
190
191ALIGNTEXT16
192LLBL(x86_p2_pr_loop):
193
194	FLD_S( SRC0 )			/* F4 */
195	FMUL_S( MAT0 )
196
197	FLD_S( SRC1 )			/* F1 F4 */
198	FMUL_S( MAT5 )
199
200	FXCH( ST(1) )			/* F4 F1 */
201	FSTP_S( DST0   )		/* F1 */
202	FSTP_S( DST1   )		/* */
203	MOV_L( EBX, DST2 )
204	MOV_L( CONST(FP_ZERO), DST3 )
205
206LLBL(x86_p2_pr_skip):
207
208	ADD_L( CONST(16), EDI )
209	ADD_L( EAX, ESI )
210	CMP_L( ECX, EDI )
211	JNE( LLBL(x86_p2_pr_loop) )
212
213LLBL(x86_p2_pr_done):
214
215	POP_L( EBX )
216	POP_L( EDI )
217	POP_L( ESI )
218	RET
219#undef FRAME_OFFSET
220
221
222
223
224ALIGNTEXT16
225GLOBL GLNAME( _mesa_x86_transform_points2_3d )
226HIDDEN(_mesa_x86_transform_points2_3d)
227GLNAME( _mesa_x86_transform_points2_3d ):
228
229#define FRAME_OFFSET 8
230	PUSH_L( ESI )
231	PUSH_L( EDI )
232
233	MOV_L( ARG_SOURCE, ESI )
234	MOV_L( ARG_DEST, EDI )
235
236	MOV_L( ARG_MATRIX, EDX )
237	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
238
239	TEST_L( ECX, ECX )
240	JZ( LLBL(x86_p2_3dr_done) )
241
242	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
243	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
244
245	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
246	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
247
248	SHL_L( CONST(4), ECX )
249	MOV_L( REGOFF(V4F_START, ESI), ESI )
250
251	MOV_L( REGOFF(V4F_START, EDI), EDI )
252	ADD_L( EDI, ECX )
253
254ALIGNTEXT16
255LLBL(x86_p2_3dr_loop):
256
257	FLD_S( SRC0 )			/* F4 */
258	FMUL_S( MAT0 )
259	FLD_S( SRC0 )			/* F5 F4 */
260	FMUL_S( MAT1 )
261	FLD_S( SRC0 )			/* F6 F5 F4 */
262	FMUL_S( MAT2 )
263
264	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
265	FMUL_S( MAT4 )
266	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
267	FMUL_S( MAT5 )
268	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
269	FMUL_S( MAT6 )
270
271	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
272	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
273	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
274	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
275
276	FXCH( ST(2) )			/* F4 F5 F6 */
277	FADD_S( MAT12 )
278	FXCH( ST(1) )			/* F5 F4 F6 */
279	FADD_S( MAT13 )
280	FXCH( ST(2) )			/* F6 F4 F5 */
281	FADD_S( MAT14 )
282
283	FXCH( ST(1) )			/* F4 F6 F5 */
284	FSTP_S( DST0 )			/* F6 F5 */
285	FXCH( ST(1) )			/* F5 F6 */
286	FSTP_S( DST1 )			/* F6 */
287	FSTP_S( DST2 )			/* */
288
289LLBL(x86_p2_3dr_skip):
290
291	ADD_L( CONST(16), EDI )
292	ADD_L( EAX, ESI )
293	CMP_L( ECX, EDI )
294	JNE( LLBL(x86_p2_3dr_loop) )
295
296LLBL(x86_p2_3dr_done):
297
298	POP_L( EDI )
299	POP_L( ESI )
300	RET
301#undef FRAME_OFFSET
302
303
304
305
306ALIGNTEXT16
307GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
308HIDDEN(_mesa_x86_transform_points2_3d_no_rot)
309GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
310
311#define FRAME_OFFSET 12
312	PUSH_L( ESI )
313	PUSH_L( EDI )
314	PUSH_L( EBX )
315
316	MOV_L( ARG_SOURCE, ESI )
317	MOV_L( ARG_DEST, EDI )
318
319	MOV_L( ARG_MATRIX, EDX )
320	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
321
322	TEST_L( ECX, ECX )
323	JZ( LLBL(x86_p2_3dnrr_done) )
324
325	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
326	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
327
328	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
329	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
330
331	SHL_L( CONST(4), ECX )
332	MOV_L( REGOFF(V4F_START, ESI), ESI )
333
334	MOV_L( REGOFF(V4F_START, EDI), EDI )
335	ADD_L( EDI, ECX )
336
337	MOV_L( MAT14, EBX )
338
339ALIGNTEXT16
340LLBL(x86_p2_3dnrr_loop):
341
342	FLD_S( SRC0 )			/* F4 */
343	FMUL_S( MAT0 )
344
345	FLD_S( SRC1 )			/* F1 F4 */
346	FMUL_S( MAT5 )
347
348	FXCH( ST(1) )			/* F4 F1 */
349	FADD_S( MAT12 )
350	FLD_S( MAT13 )		/* F5 F4 F1 */
351	FXCH( ST(2) )			/* F1 F4 F5 */
352	FADDP( ST0, ST(2) )		/* F4 F5 */
353
354	FSTP_S( DST0 )		/* F5 */
355	FSTP_S( DST1 )		/* */
356	MOV_L( EBX, DST2 )
357
358LLBL(x86_p2_3dnrr_skip):
359
360	ADD_L( CONST(16), EDI )
361	ADD_L( EAX, ESI )
362	CMP_L( ECX, EDI )
363	JNE( LLBL(x86_p2_3dnrr_loop) )
364
365LLBL(x86_p2_3dnrr_done):
366
367	POP_L( EBX )
368	POP_L( EDI )
369	POP_L( ESI )
370	RET
371#undef FRAME_OFFSET
372
373
374
375
376ALIGNTEXT16
377GLOBL GLNAME( _mesa_x86_transform_points2_2d )
378HIDDEN(_mesa_x86_transform_points2_2d)
379GLNAME( _mesa_x86_transform_points2_2d ):
380
381#define FRAME_OFFSET 8
382	PUSH_L( ESI )
383	PUSH_L( EDI )
384
385	MOV_L( ARG_SOURCE, ESI )
386	MOV_L( ARG_DEST, EDI )
387
388	MOV_L( ARG_MATRIX, EDX )
389	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
390
391	TEST_L( ECX, ECX )
392	JZ( LLBL(x86_p2_2dr_done) )
393
394	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
395	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
396
397	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
398	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
399
400	SHL_L( CONST(4), ECX )
401	MOV_L( REGOFF(V4F_START, ESI), ESI )
402
403	MOV_L( REGOFF(V4F_START, EDI), EDI )
404	ADD_L( EDI, ECX )
405
406ALIGNTEXT16
407LLBL(x86_p2_2dr_loop):
408
409	FLD_S( SRC0 )			/* F4 */
410	FMUL_S( MAT0 )
411	FLD_S( SRC0 )			/* F5 F4 */
412	FMUL_S( MAT1 )
413
414	FLD_S( SRC1 )			/* F0 F5 F4 */
415	FMUL_S( MAT4 )
416	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
417	FMUL_S( MAT5 )
418
419	FXCH( ST(1) )			/* F0 F1 F5 F4 */
420	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
421	FADDP( ST0, ST(1) )		/* F5 F4 */
422
423	FXCH( ST(1) )			/* F4 F5 */
424	FADD_S( MAT12 )
425	FXCH( ST(1) )			/* F5 F4 */
426	FADD_S( MAT13 )
427
428	FXCH( ST(1) )			/* F4 F5 */
429	FSTP_S( DST0 )		/* F5 */
430	FSTP_S( DST1 )		/* */
431
432LLBL(x86_p2_2dr_skip):
433
434	ADD_L( CONST(16), EDI )
435	ADD_L( EAX, ESI )
436	CMP_L( ECX, EDI )
437	JNE( LLBL(x86_p2_2dr_loop) )
438
439LLBL(x86_p2_2dr_done):
440
441	POP_L( EDI )
442	POP_L( ESI )
443	RET
444#undef FRAME_OFFSET
445
446
447
448
449ALIGNTEXT4
450GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
451HIDDEN(_mesa_x86_transform_points2_2d_no_rot)
452GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
453
454#define FRAME_OFFSET 8
455	PUSH_L( ESI )
456	PUSH_L( EDI )
457
458	MOV_L( ARG_SOURCE, ESI )
459	MOV_L( ARG_DEST, EDI )
460
461	MOV_L( ARG_MATRIX, EDX )
462	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
463
464	TEST_L( ECX, ECX )
465	JZ( LLBL(x86_p2_2dnrr_done) )
466
467	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
468	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
469
470	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
471	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
472
473	SHL_L( CONST(4), ECX )
474	MOV_L( REGOFF(V4F_START, ESI), ESI )
475
476	MOV_L( REGOFF(V4F_START, EDI), EDI )
477	ADD_L( EDI, ECX )
478
479ALIGNTEXT16
480LLBL(x86_p2_2dnrr_loop):
481
482	FLD_S( SRC0 )			/* F4 */
483	FMUL_S( MAT0 )
484
485	FLD_S( SRC1 )			/* F1 F4 */
486	FMUL_S( MAT5 )
487
488	FXCH( ST(1) )			/* F4 F1 */
489	FADD_S( MAT12 )
490	FLD_S( MAT13 )		/* F5 F4 F1 */
491	FXCH( ST(2) )			/* F1 F4 F5 */
492	FADDP( ST0, ST(2) )		/* F4 F5 */
493
494	FSTP_S( DST0   )		/* F5 */
495	FSTP_S( DST1   )		/* */
496
497LLBL(x86_p2_2dnrr_skip):
498
499	ADD_L( CONST(16), EDI )
500	ADD_L( EAX, ESI )
501	CMP_L( ECX, EDI )
502	JNE( LLBL(x86_p2_2dnrr_loop) )
503
504LLBL(x86_p2_2dnrr_done):
505
506	POP_L( EDI )
507	POP_L( ESI )
508	RET
509#undef FRAME_OFFSET
510
511
512
513
514ALIGNTEXT16
515GLOBL GLNAME( _mesa_x86_transform_points2_identity )
516HIDDEN(_mesa_x86_transform_points2_identity)
517GLNAME( _mesa_x86_transform_points2_identity ):
518
519#define FRAME_OFFSET 12
520	PUSH_L( ESI )
521	PUSH_L( EDI )
522	PUSH_L( EBX )
523
524	MOV_L( ARG_SOURCE, ESI )
525	MOV_L( ARG_DEST, EDI )
526
527	MOV_L( ARG_MATRIX, EDX )
528	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
529
530	TEST_L( ECX, ECX )
531	JZ( LLBL(x86_p2_ir_done) )
532
533	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
534	OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
535
536	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
537	MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
538
539	SHL_L( CONST(4), ECX )
540	MOV_L( REGOFF(V4F_START, ESI), ESI )
541
542	MOV_L( REGOFF(V4F_START, EDI), EDI )
543	ADD_L( EDI, ECX )
544
545	CMP_L( ESI, EDI )
546	JE( LLBL(x86_p2_ir_done) )
547
548ALIGNTEXT16
549LLBL(x86_p2_ir_loop):
550
551	MOV_L( SRC0, EBX )
552	MOV_L( SRC1, EDX )
553
554	MOV_L( EBX, DST0 )
555	MOV_L( EDX, DST1 )
556
557LLBL(x86_p2_ir_skip):
558
559	ADD_L( CONST(16), EDI )
560	ADD_L( EAX, ESI )
561	CMP_L( ECX, EDI )
562	JNE( LLBL(x86_p2_ir_loop) )
563
564LLBL(x86_p2_ir_done):
565
566	POP_L( EBX )
567	POP_L( EDI )
568	POP_L( ESI )
569	RET
570#undef FRAME_OFFSET
571
572#if defined (__ELF__) && defined (__linux__)
573	.section .note.GNU-stack,"",%progbits
574#endif
575