1
2/*
3 * Mesa 3-D graphics library
4 * Version:  3.5
5 *
6 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26#ifdef USE_3DNOW_ASM
27#include "assyntax.h"
28#include "matypes.h"
29#include "xform_args.h"
30
31    SEG_TEXT
32
33#define FRAME_OFFSET	4
34
35
36ALIGNTEXT16
37GLOBL GLNAME( _mesa_3dnow_transform_points2_general )
38HIDDEN(_mesa_3dnow_transform_points2_general)
39GLNAME( _mesa_3dnow_transform_points2_general ):
40
41    PUSH_L    ( ESI )
42
43    MOV_L     ( ARG_DEST, ECX )
44    MOV_L     ( ARG_MATRIX, ESI )
45    MOV_L     ( ARG_SOURCE, EAX )
46    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
47    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
48    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
49    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
50
51    PUSH_L    ( EDI )
52
53    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
54    MOV_L     ( ESI, ECX )
55    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
56    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
57    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
58
59    TEST_L    ( ESI, ESI )
60    JZ        ( LLBL( G3TPGR_3 ) )
61
62    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
63    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
64
65    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
66    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
67
68    MOVD      ( REGOFF(8, ECX), MM2 )	/*                 | m02             */
69    PUNPCKLDQ ( REGOFF(24, ECX), MM2 )	/* m12             | m02             */
70
71    MOVD      ( REGOFF(12, ECX), MM3 )	/*                 | m03             */
72    PUNPCKLDQ ( REGOFF(28, ECX), MM3 )	/* m13             | m03             */
73
74    MOVQ      ( REGOFF(48, ECX), MM4 )	/* m31             | m30             */
75    MOVQ      ( REGOFF(56, ECX), MM5 )	/* m33             | m32             */
76
77ALIGNTEXT16
78LLBL( G3TPGR_2 ):
79
80    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
81    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
82
83    PFMUL     ( MM0, MM6 )		/* x1*m10          | x0*m00          */
84    PFMUL     ( MM1, MM7 )		/* x1*m11          | x0*m01          */
85
86    PFACC     ( MM7, MM6 )		/* x0*m01+x1*m11   | x0*x00+x1*m10   */
87    PFADD     ( MM4, MM6 )		/* x0*...*m11+m31  | x0*...*m10+m30  */
88
89    MOVQ      ( MM6, REGIND(EDX) )	/* write r1, r0                      */
90    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
91
92    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
93    PFMUL     ( MM2, MM6 )		/* x1*m12          | x0*m02          */
94
95    PFMUL     ( MM3, MM7 )		/* x1*m13          | x0*m03          */
96    ADD_L     ( EDI, EAX )		/* next vertex                       */
97
98    PFACC     ( MM7, MM6 )		/* x0*m03+x1*m13   | x0*x02+x1*m12   */
99    PFADD     ( MM5, MM6 )		/* x0*...*m13+m33  | x0*...*m12+m32  */
100
101    MOVQ      ( MM6, REGOFF(8, EDX) )	/* write r3, r2                      */
102    ADD_L     ( CONST(16), EDX )	/* next r                            */
103
104    DEC_L     ( ESI )			/* decrement vertex counter          */
105    JNZ       ( LLBL( G3TPGR_2 ) )	/* cnt > 0 ? -> process next vertex  */
106
107LLBL( G3TPGR_3 ):
108
109    FEMMS
110    POP_L     ( EDI )
111    POP_L     ( ESI )
112    RET
113
114
115
116
117ALIGNTEXT16
118GLOBL GLNAME( _mesa_3dnow_transform_points2_perspective )
119HIDDEN(_mesa_3dnow_transform_points2_perspective)
120GLNAME( _mesa_3dnow_transform_points2_perspective ):
121
122    PUSH_L    ( ESI )
123
124    MOV_L     ( ARG_DEST, ECX )
125    MOV_L     ( ARG_MATRIX, ESI )
126    MOV_L     ( ARG_SOURCE, EAX )
127    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
128    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
129    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
130    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
131
132    PUSH_L    ( EDI )
133
134    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
135    MOV_L     ( ESI, ECX )
136    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
137    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
138    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
139
140    TEST_L    ( ESI, ESI )
141    JZ        ( LLBL( G3TPPR_3 ) )
142
143    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
144    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
145
146    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
147
148ALIGNTEXT16
149LLBL( G3TPPR_2 ):
150
151    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
152    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
153
154    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
155    MOVQ      ( MM3, REGOFF(8, EDX) )	/* write r2  (=m32), r3 (=0)         */
156
157    ADD_L     ( EDI, EAX )		/* next vertex                       */
158    ADD_L     ( CONST(16), EDX )	/* next r                            */
159
160    DEC_L     ( ESI )			/* decrement vertex counter          */
161    JNZ       ( LLBL( G3TPPR_2 ) )	/* cnt > 0 ? -> process next vertex  */
162
163LLBL( G3TPPR_3 ):
164
165    FEMMS
166    POP_L     ( EDI )
167    POP_L     ( ESI )
168    RET
169
170
171
172
173ALIGNTEXT16
174GLOBL GLNAME( _mesa_3dnow_transform_points2_3d )
175HIDDEN(_mesa_3dnow_transform_points2_3d)
176GLNAME( _mesa_3dnow_transform_points2_3d ):
177
178    PUSH_L    ( ESI )
179
180    MOV_L     ( ARG_DEST, ECX )
181    MOV_L     ( ARG_MATRIX, ESI )
182    MOV_L     ( ARG_SOURCE, EAX )
183    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
184    OR_B      ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
185    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
186    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
187
188    PUSH_L    ( EDI )
189
190    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
191    MOV_L     ( ESI, ECX )
192    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
193    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
194    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
195
196    TEST_L    ( ESI, ESI )
197    JZ        ( LLBL( G3TP3R_3 ) )
198
199    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
200    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
201
202    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
203    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
204
205    MOVD      ( REGOFF(8, ECX), MM2 )	/*                 | m02             */
206    PUNPCKLDQ ( REGOFF(24, ECX), MM2 )	/* m12             | m02             */
207
208    MOVQ      ( REGOFF(48, ECX), MM4 )	/* m31             | m30             */
209    MOVD      ( REGOFF(56, ECX), MM5 )	/*                 | m32             */
210
211ALIGNTEXT16
212LLBL( G3TP3R_2 ):
213
214    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
215    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
216
217    PFMUL     ( MM0, MM6 )		/* x1*m10          | x0*m00          */
218    PFMUL     ( MM1, MM7 )		/* x1*m11          | x0*m01          */
219
220    PFACC     ( MM7, MM6 )		/* x0*m01+x1*m11   | x0*x00+x1*m10   */
221    PFADD     ( MM4, MM6 )		/* x0*...*m11+m31  | x0*...*m10+m30  */
222
223    MOVQ      ( MM6, REGIND(EDX) )	/* write r1, r0                      */
224    MOVQ      ( REGIND(EAX), MM6 )	/* x1              | x0              */
225
226    MOVQ      ( MM6, MM7 )		/* x1              | x0              */
227    PFMUL     ( MM2, MM6 )		/* x1*m12          | x0*m02          */
228
229    PFACC     ( MM7, MM6 )		/* ***trash***     | x0*x02+x1*m12   */
230    PFADD     ( MM5, MM6 )		/* ***trash***     | x0*...*m12+m32  */
231
232    MOVD      ( MM6, REGOFF(8, EDX) )	/* write r2                          */
233    ADD_L     ( EDI, EAX )		/* next vertex                       */
234
235    ADD_L     ( CONST(16), EDX )	/* next r                            */
236    DEC_L     ( ESI )			/* decrement vertex counter          */
237
238    JNZ       ( LLBL( G3TP3R_2 ) )	/* cnt > 0 ? -> process next vertex  */
239
240LLBL( G3TP3R_3 ):
241
242    FEMMS
243    POP_L     ( EDI )
244    POP_L     ( ESI )
245    RET
246
247
248
249
250ALIGNTEXT16
251GLOBL GLNAME( _mesa_3dnow_transform_points2_3d_no_rot )
252HIDDEN(_mesa_3dnow_transform_points2_3d_no_rot)
253GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ):
254
255    PUSH_L    ( ESI )
256
257    MOV_L     ( ARG_DEST, ECX )
258    MOV_L     ( ARG_MATRIX, ESI )
259    MOV_L     ( ARG_SOURCE, EAX )
260    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
261    OR_B      ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
262    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
263    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
264
265    PUSH_L    ( EDI )
266
267    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
268    MOV_L     ( ESI, ECX )
269    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
270    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
271    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
272
273    TEST_L    ( ESI, ESI )
274    JZ        ( LLBL( G3TP3NRR_3 ) )
275
276    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
277    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
278
279    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
280    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
281
282ALIGNTEXT16
283LLBL( G3TP3NRR_2 ):
284
285    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
286    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
287
288    PFADD     ( MM2, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
289    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
290
291    MOVD      ( MM3, REGOFF(8, EDX) )	/* write r2                          */
292    ADD_L     ( EDI, EAX )		/* next vertex                       */
293
294    ADD_L     ( CONST(16), EDX )	/* next r                            */
295    DEC_L     ( ESI )			/* decrement vertex counter          */
296
297    JNZ       ( LLBL( G3TP3NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
298
299LLBL( G3TP3NRR_3 ):
300
301    FEMMS
302    POP_L     ( EDI )
303    POP_L     ( ESI )
304    RET
305
306
307
308
309ALIGNTEXT16
310GLOBL GLNAME( _mesa_3dnow_transform_points2_2d )
311HIDDEN(_mesa_3dnow_transform_points2_2d)
312GLNAME( _mesa_3dnow_transform_points2_2d ):
313
314    PUSH_L    ( ESI )
315
316    MOV_L     ( ARG_DEST, ECX )
317    MOV_L     ( ARG_MATRIX, ESI )
318    MOV_L     ( ARG_SOURCE, EAX )
319    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
320    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
321    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
322    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
323
324    PUSH_L    ( EDI )
325
326    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
327    MOV_L     ( ESI, ECX )
328    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
329    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
330    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
331
332    TEST_L    ( ESI, ESI )
333    JZ        ( LLBL( G3TP2R_3 ) )
334
335    MOVQ      ( REGIND(ECX), MM0 )	/* m01             | m00             */
336    MOVQ      ( REGOFF(16, ECX), MM1 )	/* m11             | m10             */
337
338    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
339
340ALIGNTEXT16
341LLBL( G3TP2R_2 ):
342
343    MOVD      ( REGIND(EAX), MM4 )	/*                 | x0              */
344    MOVD      ( REGOFF(4, EAX), MM5 )	/*                 | x1              */
345
346    PUNPCKLDQ ( MM4, MM4 )		/* x0              | x0              */
347    ADD_L     ( EDI, EAX )		/* next vertex                       */
348
349    PFMUL     ( MM0, MM4 )		/* x0*m01          | x0*m00          */
350    PUNPCKLDQ ( MM5, MM5 )		/* x1              | x1              */
351
352    PFMUL     ( MM1, MM5 )		/* x1*m11          | x1*m10          */
353    PFADD     ( MM2, MM4 )		/* x...x1*m11+31   | x0*..*m10+m30   */
354
355    PFADD     ( MM5, MM4 )		/* x0*m01+x1*m11   | x0*m00+x1*m10   */
356    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
357
358    ADD_L     ( CONST(16), EDX )	/* next r                            */
359    DEC_L     ( ESI )			/* decrement vertex counter          */
360
361    JNZ       ( LLBL( G3TP2R_2 ) )	/* cnt > 0 ? -> process next vertex  */
362
363LLBL( G3TP2R_3 ):
364
365    FEMMS
366    POP_L     ( EDI )
367    POP_L     ( ESI )
368    RET
369
370
371
372
373ALIGNTEXT16
374GLOBL GLNAME( _mesa_3dnow_transform_points2_2d_no_rot )
375HIDDEN(_mesa_3dnow_transform_points2_2d_no_rot)
376GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ):
377
378    PUSH_L    ( ESI )
379
380    MOV_L     ( ARG_DEST, ECX )
381    MOV_L     ( ARG_MATRIX, ESI )
382    MOV_L     ( ARG_SOURCE, EAX )
383    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
384    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
385    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
386    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
387
388    PUSH_L    ( EDI )
389
390    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
391    MOV_L     ( ESI, ECX )
392    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
393    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
394    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
395
396    TEST_L    ( ESI, ESI )
397    JZ        ( LLBL( G3TP2NRR_3 ) )
398
399    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
400    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
401
402    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
403
404ALIGNTEXT16
405LLBL( G3TP2NRR_2 ):
406
407    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
408    ADD_L     ( EDI, EAX )		/* next vertex                       */
409
410    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
411    PFADD     ( MM2, MM4 )		/* m31             | x0*m00+m30      */
412
413    MOVQ      ( MM4, REGIND(EDX) )	/* write r1, r0                      */
414    ADD_L     ( CONST(16), EDX )	/* next r                            */
415
416    DEC_L     ( ESI )			/* decrement vertex counter          */
417    JNZ       ( LLBL( G3TP2NRR_2 ) )	/* cnt > 0 ? -> process next vertex  */
418
419LLBL( G3TP2NRR_3 ):
420
421    FEMMS
422    POP_L     ( EDI )
423    POP_L     ( ESI )
424    RET
425
426
427
428
429ALIGNTEXT16
430GLOBL GLNAME( _mesa_3dnow_transform_points2_identity )
431HIDDEN(_mesa_3dnow_transform_points2_identity)
432GLNAME( _mesa_3dnow_transform_points2_identity ):
433
434    PUSH_L    ( ESI )
435
436    MOV_L     ( ARG_DEST, ECX )
437    MOV_L     ( ARG_MATRIX, ESI )
438    MOV_L     ( ARG_SOURCE, EAX )
439    MOV_L     ( CONST(2), REGOFF(V4F_SIZE, ECX) )
440    OR_B      ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
441    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
442    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
443
444    PUSH_L    ( EDI )
445
446    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
447    MOV_L     ( ESI, ECX )
448    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
449    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
450    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
451
452    TEST_L    ( ESI, ESI )
453    JZ        ( LLBL( G3TPIR_3 ) )
454
455ALIGNTEXT16
456LLBL( G3TPIR_3 ):
457
458    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
459    ADD_L     ( EDI, EAX )		/* next vertex                       */
460
461    MOVQ      ( MM0, REGIND(EDX) )	/* r1              | r0              */
462    ADD_L     ( CONST(16), EDX )	/* next r                            */
463
464    DEC_L     ( ESI )			/* decrement vertex counter          */
465    JNZ       ( LLBL( G3TPIR_3 ) )	/* cnt > 0 ? -> process next vertex  */
466
467LLBL( G3TPIR_4 ):
468
469    FEMMS
470    POP_L     ( EDI )
471    POP_L     ( ESI )
472    RET
473#endif
474
475#if defined (__ELF__) && defined (__linux__)
476	.section .note.GNU-stack,"",%progbits
477#endif
478