1
2/*
3 * Mesa 3-D graphics library
4 * Version:  3.5
5 *
6 * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included
16 * in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
21 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26#ifdef USE_3DNOW_ASM
27#include "assyntax.h"
28#include "matypes.h"
29#include "xform_args.h"
30
31    SEG_TEXT
32
33#define FRAME_OFFSET	4
34
35
36ALIGNTEXT16
37GLOBL GLNAME( _mesa_3dnow_transform_points3_general )
38HIDDEN(_mesa_3dnow_transform_points3_general)
39GLNAME( _mesa_3dnow_transform_points3_general ):
40
41    PUSH_L    ( ESI )
42
43    MOV_L     ( ARG_DEST, ECX )
44    MOV_L     ( ARG_MATRIX, ESI )
45    MOV_L     ( ARG_SOURCE, EAX )
46    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
47    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
48    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
49    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
50
51    PUSH_L    ( EDI )
52
53    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
54    MOV_L     ( ESI, ECX )
55    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
56    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
57    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
58
59    TEST_L    ( ESI, ESI )
60    JZ        ( LLBL( G3TPGR_2 ) )
61
62    PREFETCHW ( REGIND(EDX) )
63
64ALIGNTEXT16
65LLBL( G3TPGR_1 ):
66
67    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
68
69    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
70    MOVD      ( REGOFF(8, EAX), MM2 )	/*                 | x2              */
71
72    ADD_L     ( EDI, EAX )		/* next vertex                       */
73    PREFETCH  ( REGIND(EAX) )
74
75    MOVQ      ( MM0, MM1 )		/* x1              | x0              */
76    PUNPCKLDQ ( MM2, MM2 )		/* x2              | x2              */
77
78    PUNPCKLDQ ( MM0, MM0 )		/* x0              | x0              */
79    MOVQ      ( MM2, MM5 )		/* x2              | x2              */
80
81    PUNPCKHDQ ( MM1, MM1 )		/* x1              | x1              */
82    PFMUL     ( REGOFF(32, ECX), MM2 )	/* x2*m9           | x2*m8           */
83
84    MOVQ      ( MM0, MM3 )		/* x0              | x0              */
85    PFMUL     ( REGOFF(40, ECX), MM5 )	/* x2*m11          | x2*m10          */
86
87    MOVQ      ( MM1, MM4 )		/* x1              | x1              */
88    PFMUL     ( REGIND(ECX), MM0 )	/* x0*m1           | x0*m0           */
89
90    PFADD     ( REGOFF(48, ECX), MM2 )	/* x2*m9+m13       | x2*m8+m12       */
91    PFMUL     ( REGOFF(16, ECX), MM1 )	/* x1*m5           | x1*m4           */
92
93    PFADD     ( REGOFF(56, ECX), MM5 )	/* x2*m11+m15      | x2*m10+m14      */
94    PFADD     ( MM0, MM1 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
95
96    PFMUL     ( REGOFF(8, ECX), MM3 )	/* x0*m3           | x0*m2           */
97    PFADD     ( MM1, MM2 )		/* r1              | r0              */
98
99    PFMUL     ( REGOFF(24, ECX), MM4 )	/* x1*m7           | x1*m6           */
100    ADD_L     ( CONST(16), EDX )	/* next output vertex                */
101
102    PFADD     ( MM3, MM4 )		/* x0*m3+x1*m7     | x0*m2+x1*m6     */
103    MOVQ      ( MM2, REGOFF(-16, EDX) )	/* write r0, r1                      */
104
105    PFADD     ( MM4, MM5 )		/* r3              | r2              */
106    MOVQ      ( MM5, REGOFF(-8, EDX) )	/* write r2, r3                      */
107
108    DEC_L     ( ESI )			/* decrement vertex counter          */
109    JNZ       ( LLBL( G3TPGR_1 ) )	/* cnt > 0 ? -> process next vertex  */
110
111LLBL( G3TPGR_2 ):
112
113    FEMMS
114    POP_L     ( EDI )
115    POP_L     ( ESI )
116    RET
117
118
119
120
121ALIGNTEXT16
122GLOBL GLNAME( _mesa_3dnow_transform_points3_perspective )
123HIDDEN(_mesa_3dnow_transform_points3_perspective)
124GLNAME( _mesa_3dnow_transform_points3_perspective ):
125
126    PUSH_L    ( ESI )
127
128    MOV_L     ( ARG_DEST, ECX )
129    MOV_L     ( ARG_MATRIX, ESI )
130    MOV_L     ( ARG_SOURCE, EAX )
131    MOV_L     ( CONST(4), REGOFF(V4F_SIZE, ECX) )
132    OR_B      ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
133    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
134    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
135
136    PUSH_L    ( EDI )
137
138    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
139    MOV_L     ( ESI, ECX )
140    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
141    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
142    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
143
144    TEST_L    ( ESI, ESI )
145    JZ        ( LLBL( G3TPPR_2 ) )
146
147    PREFETCH  ( REGIND(EAX) )
148    PREFETCHW ( REGIND(EDX) )
149
150    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
151    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
152
153    MOVQ      ( REGOFF(32, ECX), MM1 )	/* m21             | m20             */
154    MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
155
156    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
157
158ALIGNTEXT16
159LLBL( G3TPPR_1 ):
160
161    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
162
163    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
164    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
165
166    ADD_L     ( EDI, EAX )		/* next vertex                       */
167    PREFETCH  ( REGIND(EAX) )
168
169    PXOR      ( MM7, MM7 )		/* 0               | 0               */
170    MOVQ      ( MM5, MM6 )		/*                 | x2              */
171
172    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
173    PFSUB     ( MM5, MM7 )		/*                 | -x2             */
174
175    PFMUL     ( MM2, MM6 )		/*                 | x2*m22          */
176    PUNPCKLDQ ( MM5, MM5 )		/* x2              | x2              */
177
178    ADD_L     ( CONST(16), EDX )	/* next r                            */
179    PFMUL     ( MM1, MM5 )		/* x2*m21          | x2*m20          */
180
181    PFADD     ( MM3, MM6 )		/*                 | x2*m22+m32      */
182    PFADD     ( MM4, MM5 )		/* x1*m11+x2*m21   | x0*m00+x2*m20   */
183
184    MOVQ      ( MM5, REGOFF(-16, EDX) )	/* write r0, r1                      */
185    MOVD      ( MM6, REGOFF(-8, EDX) )	/* write r2                          */
186
187    MOVD      ( MM7, REGOFF(-4, EDX) )	/* write r3                          */
188
189    DEC_L     ( ESI )			/* decrement vertex counter          */
190    JNZ       ( LLBL( G3TPPR_1 ) )	/* cnt > 0 ? -> process next vertex  */
191
192LLBL( G3TPPR_2 ):
193
194    FEMMS
195    POP_L     ( EDI )
196    POP_L     ( ESI )
197    RET
198
199
200
201
202ALIGNTEXT16
203GLOBL GLNAME( _mesa_3dnow_transform_points3_3d )
204HIDDEN(_mesa_3dnow_transform_points3_3d)
205GLNAME( _mesa_3dnow_transform_points3_3d ):
206
207    PUSH_L    ( ESI )
208
209    MOV_L     ( ARG_DEST, ECX )
210    MOV_L     ( ARG_MATRIX, ESI )
211    MOV_L     ( ARG_SOURCE, EAX )
212    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
213    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
214    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
215    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
216
217    PUSH_L    ( EDI )
218
219    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
220    MOV_L     ( ESI, ECX )
221    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
222    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
223    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
224
225    TEST_L    ( ESI, ESI )
226    JZ        ( LLBL( G3TP3R_2 ) )
227
228    PREFETCH  ( REGIND(EAX) )
229    PREFETCH  ( REGIND(EDX) )
230
231    MOVD      ( REGOFF(8, ECX), MM7 )	/*                 | m2              */
232    PUNPCKLDQ ( REGOFF(24, ECX), MM7 )	/* m6              | m2              */
233
234
235ALIGNTEXT16
236LLBL( G3TP3R_1 ):
237
238    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
239
240    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
241    MOVD      ( REGOFF(8, EAX), MM1 )	/*                 | x2              */
242
243    ADD_L     ( EDI, EAX )		/* next vertex                       */
244    PREFETCH  ( REGIND(EAX) )
245
246    MOVQ      ( MM0, MM2 )		/* x1              | x0              */
247    ADD_L     ( CONST(16), EDX )	/* next r                            */
248
249    PUNPCKLDQ ( MM2, MM2 )		/* x0              | x0              */
250    MOVQ      ( MM0, MM3 )		/* x1              | x0              */
251
252    PFMUL     ( REGIND(ECX), MM2 )	/* x0*m1           | x0*m0           */
253    PUNPCKHDQ ( MM3, MM3 )		/* x1              | x1              */
254
255    MOVQ      ( MM1, MM4 )		/*                 | x2              */
256    PFMUL     ( REGOFF(16, ECX), MM3 )	/* x1*m5           | x1*m4           */
257
258    PUNPCKLDQ ( MM4, MM4 )		/* x2              | x2              */
259    PFADD     ( MM2, MM3 )		/* x0*m1+x1*m5     | x0*m0+x1*m4     */
260
261    PFMUL     ( REGOFF(32, ECX), MM4 )	/* x2*m9           | x2*m8           */
262    PFADD     ( REGOFF(48, ECX), MM3 )	/* x0*m1+...+m11   | x0*m0+x1*m4+m12 */
263
264    PFMUL     ( MM7, MM0 )		/* x1*m6           | x0*m2           */
265    PFADD     ( MM4, MM3 )		/* r1              | r0              */
266
267    PFMUL     ( REGOFF(40, ECX), MM1 )	/*                 | x2*m10          */
268    PUNPCKLDQ ( REGOFF(56, ECX), MM1 )	/* m14             | x2*m10          */
269
270    PFACC     ( MM0, MM1 )
271
272    MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
273    PFACC     ( MM1, MM1 )		/*                 | r2              */
274
275    MOVD      ( MM1, REGOFF(-8, EDX) )	/* write r2                          */
276
277    DEC_L     ( ESI )			/* decrement vertex counter          */
278    JNZ       ( LLBL( G3TP3R_1 ) )	/* cnt > 0 ? -> process next vertex  */
279
280LLBL( G3TP3R_2 ):
281
282    FEMMS
283    POP_L     ( EDI )
284    POP_L     ( ESI )
285    RET
286
287
288
289
290ALIGNTEXT16
291GLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot )
292HIDDEN(_mesa_3dnow_transform_points3_3d_no_rot)
293GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ):
294
295    PUSH_L    ( ESI )
296
297    MOV_L     ( ARG_DEST, ECX )
298    MOV_L     ( ARG_MATRIX, ESI )
299    MOV_L     ( ARG_SOURCE, EAX )
300    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
301    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
302    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
303    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
304
305    PUSH_L    ( EDI )
306
307    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
308    MOV_L     ( ESI, ECX )
309    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
310    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
311    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
312
313    TEST_L    ( ESI, ESI )
314    JZ        ( LLBL( G3TP3NRR_2 ) )
315
316    PREFETCH  ( REGIND(EAX) )
317    PREFETCHW ( REGIND(EDX) )
318
319    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
320    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
321
322    MOVD      ( REGOFF(40, ECX), MM2 )	/*                 | m22             */
323    PUNPCKLDQ ( MM2, MM2 )		/* m22             | m22             */
324
325    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
326    MOVD      ( REGOFF(56, ECX), MM3 )	/*                 | m32             */
327
328    PUNPCKLDQ ( MM3, MM3 )		/* m32             | m32             */
329
330
331ALIGNTEXT16
332LLBL( G3TP3NRR_1 ):
333
334    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
335
336    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
337    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
338
339    ADD_L     ( EDI, EAX )		/* next vertex                       */
340    PREFETCHW ( REGIND(EAX) )
341
342    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
343
344    PFADD     ( MM1, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
345    PFMUL     ( MM2, MM5 )		/*                 | x2*m22          */
346
347    PFADD     ( MM3, MM5 )		/*                 | x2*m22+m32      */
348    MOVQ      ( MM4, REGIND(EDX) )	/* write r0, r1                      */
349
350    ADD_L     ( CONST(16), EDX )	/* next r                            */
351    DEC_L     ( ESI )			/* decrement vertex counter          */
352
353    MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2                          */
354    JNZ       ( LLBL( G3TP3NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
355
356LLBL( G3TP3NRR_2 ):
357
358    FEMMS
359    POP_L     ( EDI )
360    POP_L     ( ESI )
361    RET
362
363
364
365
366ALIGNTEXT16
367GLOBL GLNAME( _mesa_3dnow_transform_points3_2d )
368HIDDEN(_mesa_3dnow_transform_points3_2d)
369GLNAME( _mesa_3dnow_transform_points3_2d ):
370
371    PUSH_L    ( ESI )
372
373    MOV_L     ( ARG_DEST, ECX )
374    MOV_L     ( ARG_MATRIX, ESI )
375    MOV_L     ( ARG_SOURCE, EAX )
376    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
377    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
378    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
379    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
380
381    PUSH_L    ( EDI )
382
383    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
384    MOV_L     ( ESI, ECX )
385    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
386    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
387    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
388
389    TEST_L    ( ESI, ESI )
390    JZ        ( LLBL( G3TP2R_3) )
391
392    PREFETCH  ( REGIND(EAX) )
393    PREFETCHW ( REGIND(EDX) )
394
395    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
396    PUNPCKLDQ ( REGOFF(16, ECX), MM0 )	/* m10             | m00             */
397
398    MOVD      ( REGOFF(4, ECX), MM1 )	/*                 | m01             */
399    PUNPCKLDQ ( REGOFF(20, ECX), MM1 )	/* m11             | m01             */
400
401    MOVQ      ( REGOFF(48, ECX), MM2 )	/* m31             | m30             */
402
403ALIGNTEXT16
404LLBL( G3TP2R_2 ):
405
406    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
407
408    MOVQ      ( REGIND(EAX), MM3 )	/* x1              | x0              */
409    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
410
411    ADD_L     ( EDI, EAX )		/* next vertex                       */
412    PREFETCH  ( REGIND(EAX) )
413
414    MOVQ      ( MM3, MM4 )		/* x1              | x0              */
415    PFMUL     ( MM0, MM3 )		/* x1*m10          | x0*m00          */
416
417    ADD_L     ( CONST(16), EDX )	/* next r                            */
418    PFMUL     ( MM1, MM4 )		/* x1*m11          | x0*m01          */
419
420    PFACC     ( MM4, MM3 )		/* x0*m00+x1*m10   | x0*m01+x1*m11   */
421    MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2 (=x2)                    */
422
423    PFADD     ( MM2, MM3 )		/* x0*...*m10+m30  | x0*...*m11+m31  */
424    MOVQ      ( MM3, REGOFF(-16, EDX) )	/* write r0, r1                      */
425
426    DEC_L     ( ESI )			/* decrement vertex counter          */
427    JNZ       ( LLBL( G3TP2R_2 ) )	/* cnt > 0 ? -> process next vertex  */
428
429LLBL( G3TP2R_3 ):
430
431    FEMMS
432    POP_L     ( EDI )
433    POP_L     ( ESI )
434    RET
435
436
437
438
439ALIGNTEXT16
440GLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot )
441HIDDEN(_mesa_3dnow_transform_points3_2d_no_rot)
442GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ):
443
444    PUSH_L    ( ESI )
445
446    MOV_L     ( ARG_DEST, ECX )
447    MOV_L     ( ARG_MATRIX, ESI )
448    MOV_L     ( ARG_SOURCE, EAX )
449    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
450    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
451    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
452    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
453
454    PUSH_L    ( EDI )
455
456    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
457    MOV_L     ( ESI, ECX )
458    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
459    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
460    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
461
462    TEST_L    ( ESI, ESI )
463    JZ        ( LLBL( G3TP2NRR_2 ) )
464
465    PREFETCH  ( REGIND(EAX) )
466    PREFETCHW ( REGIND(EDX) )
467
468    MOVD      ( REGIND(ECX), MM0 )	/*                 | m00             */
469    PUNPCKLDQ ( REGOFF(20, ECX), MM0 )	/* m11             | m00             */
470
471    MOVQ      ( REGOFF(48, ECX), MM1 )	/* m31             | m30             */
472
473
474ALIGNTEXT16
475LLBL( G3TP2NRR_1 ):
476
477    PREFETCHW ( REGOFF(32, EDX) )	/* prefetch 2 vertices ahead         */
478
479    MOVQ      ( REGIND(EAX), MM4 )	/* x1              | x0              */
480    MOVD      ( REGOFF(8, EAX), MM5 )	/*                 | x2              */
481
482    ADD_L     ( EDI, EAX )		/* next vertex                       */
483    PREFETCH  ( REGIND(EAX) )
484
485    PFMUL     ( MM0, MM4 )		/* x1*m11          | x0*m00          */
486    ADD_L     ( CONST(16), EDX )	/* next r                            */
487
488    PFADD     ( MM1, MM4 )		/* x1*m11+m31      | x0*m00+m30      */
489
490    MOVQ      ( MM4, REGOFF(-16, EDX) )	/* write r0, r1                      */
491    MOVD      ( MM5, REGOFF(-8, EDX) )	/* write r2 (=x2)                    */
492
493    DEC_L     ( ESI )			/* decrement vertex counter          */
494    JNZ       ( LLBL( G3TP2NRR_1 ) )	/* cnt > 0 ? -> process next vertex  */
495
496LLBL( G3TP2NRR_2 ):
497
498    FEMMS
499    POP_L     ( EDI )
500    POP_L     ( ESI )
501    RET
502
503
504
505
506ALIGNTEXT16
507GLOBL GLNAME( _mesa_3dnow_transform_points3_identity )
508HIDDEN(_mesa_3dnow_transform_points3_identity)
509GLNAME( _mesa_3dnow_transform_points3_identity ):
510
511    PUSH_L    ( ESI )
512
513    MOV_L     ( ARG_DEST, ECX )
514    MOV_L     ( ARG_MATRIX, ESI )
515    MOV_L     ( ARG_SOURCE, EAX )
516    MOV_L     ( CONST(3), REGOFF(V4F_SIZE, ECX) )
517    OR_B      ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
518    MOV_L     ( REGOFF(V4F_COUNT, EAX), EDX )
519    MOV_L     ( EDX, REGOFF(V4F_COUNT, ECX) )
520
521    PUSH_L    ( EDI )
522
523    MOV_L     ( REGOFF(V4F_START, ECX), EDX )
524    MOV_L     ( ESI, ECX )
525    MOV_L     ( REGOFF(V4F_COUNT, EAX), ESI )
526    MOV_L     ( REGOFF(V4F_STRIDE, EAX), EDI )
527    MOV_L     ( REGOFF(V4F_START, EAX), EAX )
528
529    TEST_L    ( ESI, ESI )
530    JZ        ( LLBL( G3TPIR_2 ) )
531
532    PREFETCHW ( REGIND(EDX) )
533
534ALIGNTEXT16
535LLBL( G3TPIR_1 ):
536
537    PREFETCHW ( REGOFF(32, EDX) )
538
539    MOVQ      ( REGIND(EAX), MM0 )	/* x1              | x0              */
540    MOVD      ( REGOFF(8, EAX), MM1 )	/*                 | x2              */
541
542    ADD_L     ( EDI, EAX )		/* next vertex                       */
543    ADD_L     ( CONST(16), EDX )	/* next r                            */
544
545    DEC_L     ( ESI )			/* decrement vertex counter          */
546    MOVQ      ( MM0, REGOFF(-16, EDX) )	/* r1              | r0              */
547
548    MOVD      ( MM1, REGOFF(-8, EDX) )	/*                 | r2              */
549    JNZ       ( LLBL( G3TPIR_1 ) )	/* cnt > 0 ? -> process next vertex  */
550
551LLBL( G3TPIR_2 ):
552
553    FEMMS
554    POP_L     ( EDI )
555    POP_L     ( ESI )
556    RET
557#endif
558
559#if defined (__ELF__) && defined (__linux__)
560	.section .note.GNU-stack,"",%progbits
561#endif
562