3dnow_normal.S revision 946ad2720a00696a59a253d81110ea96d397a463
1/* $Id: 3dnow_normal.S,v 1.3 2002/08/08 15:36:50 brianp Exp $ */
2
3/*
4 * Mesa 3-D graphics library
5 * Version:  4.1
6 *
7 * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27/*
28 * 3Dnow assembly code by Holger Waechtler
29 */
30
31#include "matypes.h"
32#include "norm_args.h"
33
34        SEG_TEXT
35
36#define M(i)    REGOFF(i * 4, ECX)
37#define STRIDE  REGOFF(12, ESI)
38
39
40ALIGNTEXT16
41GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
42GLNAME(_mesa_3dnow_transform_normalize_normals):
43
44 #define FRAME_OFFSET 12
45
46    PUSH_L     ( EDI )
47    PUSH_L     ( ESI )
48    PUSH_L     ( EBP )
49
50    MOV_L      ( ARG_LENGTHS, EDI )
51    MOV_L      ( ARG_IN, ESI )
52    MOV_L      ( ARG_DEST, EAX )
53    MOV_L      ( REGOFF(V3F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
54    MOV_L      ( EBP, REGOFF(V3F_COUNT, EAX) )
55    MOV_L      ( REGOFF(V3F_START, ESI), EDX ) /*  in->start    */
56    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
57    MOV_L      ( ARG_MAT, ECX )
58    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
59
60    CMP_L      ( CONST(0), EBP )        /*   count > 0 ??  */
61    JE         ( LLBL (G3TN_end) )
62
63    MOV_L      ( REGOFF (V3F_COUNT, ESI), EBP )
64    FEMMS
65
66    PUSH_L     ( EBP )
67    PUSH_L     ( EAX )
68    PUSH_L     ( EDX )                  /*  save counter & pointer for   */
69                                        /*  the normalize pass           */
70 #undef  FRAME_OFFSET
71 #define FRAME_OFFSET 24
72
73    MOVQ       ( M(0), MM3 )            /*  m1              | m0         */
74    MOVQ       ( M(4), MM4 )            /*  m5              | m4         */
75
76    MOVD       ( M(2), MM5 )            /*                  | m2         */
77    PUNPCKLDQ  ( M(6), MM5 )            /*  m6              | m2         */
78
79    MOVQ       ( M(8), MM6 )            /*  m9              | m8         */
80    MOVQ       ( M(10), MM7 )           /*                  | m10        */
81
82    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                 */
83    JNE        ( LLBL (G3TN_scale_end ) )
84
85    MOVD       ( ARG_SCALE, MM0 )       /*               | scale           */
86    PUNPCKLDQ  ( MM0, MM0 )             /* scale         | scale           */
87
88    PFMUL      ( MM0, MM3 )             /* scale * m1    | scale * m0      */
89    PFMUL      ( MM0, MM4 )             /* scale * m5    | scale * m4      */
90    PFMUL      ( MM0, MM5 )             /* scale * m6    | scale * m2      */
91    PFMUL      ( MM0, MM6 )             /* scale * m9    | scale * m8      */
92    PFMUL      ( MM0, MM7 )             /*               | scale * m10     */
93
94ALIGNTEXT32
95LLBL (G3TN_scale_end):
96LLBL (G3TN_transform):
97    MOVQ       ( REGIND (EDX), MM0 )    /*  x1              | x0         */
98    MOVD       ( REGOFF (8, EDX), MM2 ) /*                  | x2         */
99
100    MOVQ       ( MM0, MM1 )             /*  x1              | x0           */
101    PUNPCKLDQ  ( MM2, MM2 )             /*  x2              | x2           */
102
103    PFMUL      ( MM3, MM0 )             /*  x1*m1           | x0*m0        */
104    ADD_L      ( CONST(16), EAX )       /*  next r                         */
105
106    PREFETCHW  ( REGIND(EAX) )
107
108    PFMUL      ( MM4, MM1 )             /*  x1*m5           | x0*m4        */
109    PFACC      ( MM1, MM0 )             /*  x0*m4+x1*m5     | x0*m0+x1*m1  */
110
111    PFMUL      ( MM5, MM2 )             /*  x2*m6           | x2*m2        */
112    PFADD      ( MM2, MM0 )             /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
113
114    MOVQ       ( REGIND (EDX), MM1 )    /*  x1           | x0              */
115    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                   */
116
117    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8           */
118    MOVD       ( REGOFF (8, EDX), MM2 ) /*               | x2              */
119
120    PFMUL      ( MM7, MM2 )             /*               | x2*m10          */
121    PFACC      ( MM1, MM1 )             /*  *not used*   | x0*m8+x1*m9     */
122
123    PFADD      ( MM2, MM1 )             /*  *not used*   | x0*m8+x1*m9+x2*m*/
124    ADD_L      ( STRIDE, EDX )          /*  next normal                    */
125
126    PREFETCH   ( REGIND(EDX) )
127
128    MOVD       ( MM1, REGOFF(-8, EAX) ) /*  write r2                       */
129    DEC_L      ( EBP )                  /*  decrement normal counter       */
130    JA         ( LLBL (G3TN_transform) )
131
132
133    POP_L      ( EDX )                  /*  end of transform ---           */
134    POP_L      ( EAX )                  /*    now normalizing ...          */
135    POP_L      ( EBP )
136
137    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                 */
138    JE         ( LLBL (G3TN_norm ) )    /*  calculate lengths              */
139
140
141ALIGNTEXT32
142LLBL (G3TN_norm_w_lengths):
143
144    PREFETCHW  ( REGOFF(12,EAX) )
145
146    MOVQ       ( REGIND(EAX), MM0 )     /*  x1              | x0           */
147    MOVD       ( REGOFF(8, EAX), MM1 )  /*                  | x2           */
148
149    MOVD       ( REGIND (EDI), MM3 )    /*                  | length (x)   */
150    PFMUL      ( MM3, MM1 )             /*                  | x2 (normalize*/
151
152    PUNPCKLDQ  ( MM3, MM3 )             /*  length (x)      | length (x)   */
153    PFMUL      ( MM3, MM0 )             /*  x1 (normalized) | x0 (normalize*/
154
155    ADD_L      ( STRIDE, EDX )          /*  next normal                    */
156    ADD_L      ( CONST(4), EDI )        /*  next length                    */
157
158    PREFETCH   ( REGIND(EDI) )
159
160    MOVQ       ( MM0, REGIND(EAX) )     /*  write new x0, x1               */
161    MOVD       ( MM1, REGOFF(8, EAX) )  /*  write new x2                   */
162
163    ADD_L      ( CONST(16), EAX )       /*  next r                         */
164    DEC_L      ( EBP )                  /*  decrement normal counter       */
165
166    JA         ( LLBL (G3TN_norm_w_lengths) )
167    JMP        ( LLBL (G3TN_exit_3dnow) )
168
169ALIGNTEXT32
170LLBL (G3TN_norm):
171
172    PREFETCHW  ( REGIND(EAX) )
173
174    MOVQ       ( MM0, MM3 )             /*  x1              | x0           */
175    MOVQ       ( MM1, MM4 )             /*                  | x2           */
176
177    PFMUL      ( MM0, MM3 )             /*  x1*x1           | x0*x0        */
178    ADD_L      ( CONST(16), EAX )       /*  next r                         */
179
180    PFMUL      ( MM1, MM4 )             /*                  | x2*x2        */
181    PFADD      ( MM4, MM3 )             /*                  | x0*x0+x2*x2  */
182
183    PFACC      ( MM3, MM3 )             /* **not used**    | x0*x0+x1*x1+x2**/
184    PFRSQRT    ( MM3, MM5 )             /*  1/sqrt (x0*x0+x1*x1+x2*x2)     */
185
186    MOVQ       ( MM5, MM4 )
187    PUNPCKLDQ  ( MM3, MM3 )
188
189    DEC_L      ( EBP )                  /*  decrement normal counter       */
190    PFMUL      ( MM5, MM5 )
191
192    PFRSQIT1   ( MM3, MM5 )
193    PFRCPIT2   ( MM4, MM5 )
194
195    PFMUL      ( MM5, MM0 )             /*  x1 (normalized) | x0 (normalize*/
196
197    MOVQ       ( MM0, REGOFF(-16, EAX) ) /*  write new x0, x1              */
198    PFMUL      ( MM5, MM1 )             /*                 | x2 (normalize*/
199
200    MOVD       ( MM1, REGOFF(-8, EAX) ) /*  write new x2                  */
201    MOVQ       ( REGIND (EAX), MM0 )    /*  x1             | x0           */
202
203    MOVD       ( REGOFF(8, EAX), MM1 )  /*                 | x2           */
204    JA         ( LLBL (G3TN_norm) )
205
206LLBL (G3TN_exit_3dnow):
207    FEMMS
208
209LLBL (G3TN_end):
210    POP_L      ( EBP )
211    POP_L      ( ESI )
212    POP_L      ( EDI )
213    RET
214
215
216
217ALIGNTEXT16
218GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
219GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
220
221 #undef FRAME_OFFSET
222 #define FRAME_OFFSET 12
223
224    PUSH_L     ( EDI )
225    PUSH_L     ( ESI )
226    PUSH_L     ( EBP )
227
228    MOV_L      ( ARG_LENGTHS, EDI )
229    MOV_L      ( ARG_IN, ESI )
230    MOV_L      ( ARG_DEST, EAX )
231    MOV_L      ( REGOFF(V3F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
232    MOV_L      ( EBP, REGOFF(V3F_COUNT, EAX) )
233    MOV_L      ( ARG_MAT, ECX )
234    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
235    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
236    MOV_L      ( REGOFF(V3F_START, ESI), EDX ) /*  in->start    */
237
238    CMP_L      ( CONST(0), EBP ) /*   count > 0 ??  */
239    JE         ( LLBL (G3TNNR_end) )
240
241    FEMMS
242
243    MOVD       ( M(0), MM0 )            /*               | m0                 */
244    PUNPCKLDQ  ( M(5), MM0 )            /* m5            | m0                 */
245
246    MOVD       ( M(10), MM2 )           /*               | m10                */
247    PUNPCKLDQ  ( MM2, MM2 )             /* m10           | m10                */
248
249    CMP_L      ( CONST(0), EDI )        /*  lengths == 0 ?                    */
250    JNE        ( LLBL (G3TNNR_scale_end ) )
251
252    MOVD       ( ARG_SCALE, MM7 )       /*               | scale              */
253    PUNPCKLDQ  ( MM7, MM7 )             /* scale         | scale              */
254
255    PFMUL      ( MM7, MM0 )             /* scale * m5    | scale * m0         */
256    PFMUL      ( MM7, MM2 )             /* scale * m10   | scale * m10        */
257
258ALIGNTEXT32
259LLBL (G3TNNR_scale_end):
260    CMP_L      ( CONST(0), EDI )        /* lengths == 0 ?                     */
261    JE         ( LLBL (G3TNNR_norm) )   /* need to calculate lengths          */
262
263    MOVD       ( REGIND(EDI), MM3 )     /*                 | length (x)       */
264
265
266ALIGNTEXT32
267LLBL (G3TNNR_norm_w_lengths):           /* use precalculated lengths          */
268
269    PREFETCHW  ( REGIND(EAX) )
270
271    MOVQ       ( REGIND(EDX), MM6 )     /* x1            | x0                 */
272    MOVD       ( REGOFF(8, EDX), MM7 )  /*               | x2                 */
273
274    PFMUL      ( MM0, MM6 )             /* x1*m5         | x0*m0              */
275    ADD_L      ( STRIDE, EDX )          /* next normal                        */
276
277    PREFETCH   ( REGIND(EDX) )
278
279    PFMUL      ( MM2, MM7 )             /*               | x2*m10             */
280    ADD_L      ( CONST(16), EAX )       /* next r                             */
281
282    PFMUL      ( MM3, MM7 )             /*               | x2 (normalized)  */
283    PUNPCKLDQ  ( MM3, MM3 )             /* length (x)    | length (x)       */
284
285    ADD_L      ( CONST(4), EDI )        /* next length                        */
286    PFMUL      ( MM3, MM6 )             /* x1 (normalized) | x0 (normalized)  */
287
288    DEC_L      ( EBP )                  /* decrement normal counter           */
289    MOVQ       ( MM6, REGOFF(-16, EAX) ) /* write r0, r1                      */
290
291    MOVD       ( MM7, REGOFF(-8, EAX) ) /* write r2                           */
292    MOVD       ( REGIND(EDI), MM3 )     /*                 | length (x)       */
293
294    JA         ( LLBL (G3TNNR_norm_w_lengths) )
295    JMP        ( LLBL (G3TNNR_exit_3dnow) )
296
297ALIGNTEXT32
298LLBL (G3TNNR_norm):                     /* need to calculate lengths          */
299
300    PREFETCHW  ( REGIND(EAX) )
301
302    MOVQ       ( REGIND(EDX), MM6 )     /* x1              | x0               */
303    MOVD       ( REGOFF(8, EDX), MM7 )  /*                 | x2               */
304
305    PFMUL      ( MM0, MM6 )             /* x1*m5           | x0*m0            */
306    ADD_L      ( CONST(16), EAX )       /* next r                             */
307
308    PFMUL      ( MM2, MM7 )             /*                 | x2*m10           */
309    MOVQ       ( MM6, MM3 )             /* x1 (transformed)| x0 (transformed) */
310
311    MOVQ       ( MM7, MM4 )             /*                 | x2 (transformed) */
312    PFMUL      ( MM6, MM3 )             /* x1*x1           | x0*x0            */
313
314
315    PFMUL      ( MM7, MM4 )             /*                 | x2*x2            */
316    PFACC      ( MM3, MM3 )             /* **not used**    | x0*x0+x1*x1      */
317
318    PFADD      ( MM4, MM3 )             /*                 | x0*x0+x1*x1+x2*x2*/
319    ADD_L      ( STRIDE, EDX )          /* next normal            */
320
321    PREFETCH   ( REGIND(EDX) )
322
323    PFRSQRT    ( MM3, MM5 )             /* 1/sqrt (x0*x0+x1*x1+x2*x2)         */
324    MOVQ       ( MM5, MM4 )
325
326    PUNPCKLDQ  ( MM3, MM3 )
327    PFMUL      ( MM5, MM5 )
328
329    PFRSQIT1   ( MM3, MM5 )
330    DEC_L      ( EBP )                  /* decrement normal counter           */
331
332    PFRCPIT2   ( MM4, MM5 )
333    PFMUL      ( MM5, MM6 )             /* x1 (normalized) | x0 (normalized)  */
334
335    MOVQ       ( MM6, REGOFF(-16, EAX) ) /* write r0, r1                      */
336    PFMUL      ( MM5, MM7 )             /*                 | x2 (normalized)  */
337
338    MOVD       ( MM7, REGOFF(-8, EAX) ) /* write r2                           */
339    JA         ( LLBL (G3TNNR_norm) )
340
341
342LLBL (G3TNNR_exit_3dnow):
343    FEMMS
344
345LLBL (G3TNNR_end):
346    POP_L      ( EBP )
347    POP_L      ( ESI )
348    POP_L      ( EDI )
349    RET
350
351
352
353
354
355
356ALIGNTEXT16
357GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
358GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
359
360 #undef FRAME_OFFSET
361 #define FRAME_OFFSET 12
362
363    PUSH_L     ( EDI )
364    PUSH_L     ( ESI )
365    PUSH_L     ( EBP )
366
367    MOV_L      ( ARG_IN, EAX )
368    MOV_L      ( ARG_DEST, EDX )
369    MOV_L      ( REGOFF(V3F_COUNT, EAX), EBP ) /*  dest->count = in->count   */
370    MOV_L      ( EBP, REGOFF(V3F_COUNT, EDX) )
371    MOV_L      ( ARG_IN, ESI )
372    MOV_L      ( ARG_MAT, ECX )
373    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
374    MOV_L      ( REGOFF(V3F_START, EDX), EAX ) /*  dest->start  */
375    MOV_L      ( REGOFF(V3F_START, ESI), EDX ) /*  in->start    */
376
377    CMP_L      ( CONST(0), EBP )
378    JE         ( LLBL (G3TRNR_end) )
379
380    FEMMS
381
382    MOVD       ( ARG_SCALE, MM6 )       /*               | scale              */
383    PUNPCKLDQ  ( MM6, MM6 )             /* scale         | scale              */
384
385    MOVD       ( REGIND(ECX), MM0 )     /*               | m0                 */
386    PUNPCKLDQ  ( REGOFF(20, ECX), MM0 ) /* m5            | m0                 */
387
388    PFMUL      ( MM6, MM0 )             /* scale*m5      | scale*m0           */
389    MOVD       ( REGOFF(40, ECX), MM2 ) /*               | m10                */
390
391    PFMUL      ( MM6, MM2 )             /*               | scale*m10          */
392
393ALIGNTEXT32
394LLBL (G3TRNR_rescale):
395
396    PREFETCHW  ( REGIND(EAX) )
397
398    MOVQ       ( REGIND(EDX), MM4 )     /* x1            | x0                 */
399    MOVD       ( REGOFF(8, EDX), MM5 )  /*               | x2                 */
400
401    PFMUL      ( MM0, MM4 )             /* x1*m5         | x0*m0              */
402    ADD_L      ( STRIDE, EDX )          /* next normal                        */
403
404    PREFETCH   ( REGIND(EDX) )
405
406    PFMUL      ( MM2, MM5 )             /*               | x2*m10             */
407    ADD_L      ( CONST(16), EAX )       /* next r                             */
408
409    DEC_L      ( EBP )                  /* decrement normal counter           */
410    MOVQ       ( MM4, REGOFF(-16, EAX) ) /* write r0, r1                      */
411
412    MOVD       ( MM5, REGOFF(-8, EAX) ) /* write r2                           */
413    JA         ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal  */
414
415    FEMMS
416
417LLBL (G3TRNR_end):
418    POP_L      ( EBP )
419    POP_L      ( ESI )
420    POP_L      ( EDI )
421    RET
422
423
424
425
426
427ALIGNTEXT16
428GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
429GLNAME(_mesa_3dnow_transform_rescale_normals):
430
431 #undef  FRAME_OFFSET
432 #define FRAME_OFFSET 8
433
434    PUSH_L     ( EDI )
435    PUSH_L     ( ESI )
436
437    MOV_L      ( ARG_IN, ESI )
438    MOV_L      ( ARG_DEST, EAX )
439    MOV_L      ( ARG_MAT, ECX )
440    MOV_L      ( REGOFF(V3F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
441    MOV_L      ( EDI, REGOFF(V3F_COUNT, EAX) )
442    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
443    MOV_L      ( REGOFF(V3F_START, ESI), EDX ) /*  in->start    */
444    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
445
446    CMP_L      ( CONST(0), EDI )
447    JE         ( LLBL (G3TR_end) )
448
449    FEMMS
450
451    MOVQ       ( REGIND(ECX), MM3 )     /* m1            | m0                 */
452
453    MOVQ       ( REGOFF(16,ECX), MM4 )  /* m5            | m4                 */
454    MOVD       ( ARG_SCALE, MM0 )       /* scale       */
455
456    MOVD       ( REGOFF(8,ECX), MM5 )   /*               | m2                 */
457    PUNPCKLDQ  ( MM0, MM0 )             /* scale         | scale              */
458
459    PUNPCKLDQ  ( REGOFF(24, ECX), MM5 )
460    PFMUL      ( MM0, MM3 )             /* scale*m1      | scale*m0           */
461
462    MOVQ       ( REGOFF(32, ECX), MM6 ) /* m9            | m8*/
463    PFMUL      ( MM0, MM4 )             /* scale*m5      | scale*m4           */
464
465    MOVD       ( REGOFF(40, ECX), MM7 ) /*               | m10                */
466    PFMUL      ( MM0, MM5 )             /* scale*m6      | scale*m2           */
467
468    PFMUL      ( MM0, MM6 )             /* scale*m9      | scale*m8           */
469
470    PFMUL      ( MM0, MM7 )             /*               | scale*m10          */
471
472ALIGNTEXT32
473LLBL (G3TR_rescale):
474
475    PREFETCHW  ( REGIND(EAX) )
476
477    MOVQ       ( REGIND(EDX), MM0 )     /* x1            | x0                 */
478    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
479
480    MOVQ       ( MM0, MM1 )             /* x1            | x0                 */
481    PUNPCKLDQ  ( MM2, MM2 )             /* x2            | x2                 */
482
483    PFMUL      ( MM3, MM0 )             /* x1*m1         | x0*m0              */
484    ADD_L      ( CONST(16), EAX )       /* next r                             */
485
486    PFMUL      ( MM4, MM1 )             /* x1*m5         | x0*m4              */
487    PFACC      ( MM1, MM0 )             /* x0*m4+x1*m5   | x0*m0+x1*m1        */
488
489    MOVQ       ( REGIND(EDX), MM1 )     /* x1            | x0                 */
490
491    PFMUL      ( MM5, MM2 )             /* x2*m6         | x2*m2              */
492    PFADD      ( MM2, MM0 )             /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2  */
493
494    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
495    ADD_L      ( STRIDE, EDX )          /* next normal                    */
496
497    PREFETCH   ( REGIND(EDX) )
498
499    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                      */
500    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8              */
501
502    PFMUL      ( MM7, MM2 )             /*               | x2*m10             */
503    PFACC      ( MM1, MM1 )             /* *not used*    | x0*m8+x1*m9        */
504
505    PFADD      ( MM2, MM1 )             /* *not used*    | x0*m8+x1*m9+x2*m10 */
506    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write r2                           */
507
508    DEC_L      ( EDI )                  /* decrement normal counter           */
509    JA         ( LLBL (G3TR_rescale) )
510
511    FEMMS
512
513LLBL (G3TR_end):
514    POP_L       ( ESI )
515    POP_L       ( EDI )
516    RET
517
518
519
520
521
522
523
524ALIGNTEXT16
525GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
526GLNAME(_mesa_3dnow_transform_normals_no_rot):
527
528 #undef  FRAME_OFFSET
529 #define FRAME_OFFSET 8
530
531    PUSH_L     ( EDI )
532    PUSH_L     ( ESI )
533
534    MOV_L      ( ARG_IN, ESI )
535    MOV_L      ( ARG_DEST, EAX )
536    MOV_L      ( ARG_MAT, ECX )
537    MOV_L      ( REGOFF(V3F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
538    MOV_L      ( EDI, REGOFF(V3F_COUNT, EAX) )
539    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
540    MOV_L      ( REGOFF(V3F_START, ESI), EDX ) /*  in->start    */
541    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
542
543    CMP_L      ( CONST(0), EDI )
544    JE         ( LLBL (G3TNR_end) )
545
546    FEMMS
547
548    MOVD       ( REGIND(ECX), MM0 )     /*               | m0                 */
549    PUNPCKLDQ  ( REGOFF(20, ECX), MM0 ) /* m5            | m0                 */
550
551    MOVD       ( REGOFF(40, ECX), MM2 ) /*               | m10                */
552    PUNPCKLDQ  ( MM2, MM2 )             /* m10           | m10                */
553
554ALIGNTEXT32
555LLBL (G3TNR_transform):
556
557    PREFETCHW  ( REGIND(EAX) )
558
559    MOVQ       ( REGIND(EDX), MM4 )     /* x1            | x0                 */
560    MOVD       ( REGOFF(8, EDX), MM5 )  /*               | x2                 */
561
562    PFMUL      ( MM0, MM4 )             /* x1*m5         | x0*m0              */
563    ADD_L      ( STRIDE, EDX)           /* next normal      */
564
565    PREFETCH   ( REGIND(EDX) )
566
567    PFMUL      ( MM2, MM5 )             /*               | x2*m10             */
568    ADD_L      ( CONST(16), EAX )       /* next r                             */
569
570    DEC_L      ( EDI )                  /* decrement normal counter           */
571    MOVQ       ( MM4, REGOFF(-16, EAX) ) /* write r0, r1                      */
572
573    MOVD       ( MM5, REGOFF(-8, EAX) ) /* write r2                           */
574    JA         ( LLBL (G3TNR_transform) )
575
576    FEMMS
577
578LLBL (G3TNR_end):
579    POP_L       ( ESI )
580    POP_L       ( EDI )
581    RET
582
583
584
585
586
587
588
589
590ALIGNTEXT16
591GLOBL GLNAME(_mesa_3dnow_transform_normals)
592GLNAME(_mesa_3dnow_transform_normals):
593
594 #undef  FRAME_OFFSET
595 #define FRAME_OFFSET 8
596
597    PUSH_L     ( EDI )
598    PUSH_L     ( ESI )
599
600    MOV_L      ( ARG_IN, ESI )
601    MOV_L      ( ARG_DEST, EAX )
602    MOV_L      ( ARG_MAT, ECX )
603    MOV_L      ( REGOFF(V3F_COUNT, ESI), EDI ) /*  dest->count = in->count   */
604    MOV_L      ( EDI, REGOFF(V3F_COUNT, EAX) )
605    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
606    MOV_L      ( REGOFF(V3F_START, ESI), EDX ) /*  in->start    */
607    MOV_L      ( REGOFF(MATRIX_INV, ECX), ECX ) /*  mat->inv     */
608
609    CMP_L      ( CONST(0), EDI )        /* count > 0 ??                       */
610    JE         ( LLBL (G3T_end) )
611
612    FEMMS
613
614    MOVQ       ( REGIND(ECX), MM3 )     /* m1            | m0                 */
615    MOVQ       ( REGOFF(16, ECX), MM4 ) /* m5            | m4                 */
616
617    MOVD       ( REGOFF(8, ECX), MM5 )  /*               | m2                 */
618    PUNPCKLDQ  ( REGOFF(24, ECX), MM5 ) /* m6            | m2                 */
619
620    MOVQ       ( REGOFF(32, ECX), MM6 ) /* m9            | m8                 */
621    MOVD       ( REGOFF(40, ECX), MM7 ) /*               | m10                */
622
623ALIGNTEXT32
624LLBL (G3T_transform):
625
626    PREFETCHW  ( REGIND(EAX) )
627
628    MOVQ       ( REGIND(EDX), MM0 )     /* x1            | x0                 */
629    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
630
631    MOVQ       ( MM0, MM1 )             /* x1            | x0                 */
632    PUNPCKLDQ  ( MM2, MM2 )             /* x2            | x2                 */
633
634    PFMUL      ( MM3, MM0 )             /* x1*m1         | x0*m0              */
635    ADD_L      ( CONST(16), EAX )       /* next r                             */
636
637    PFMUL      ( MM4, MM1 )             /* x1*m5         | x0*m4              */
638    PFACC      ( MM1, MM0 )             /* x0*m4+x1*m5   | x0*m0+x1*m1        */
639
640    PFMUL      ( MM5, MM2 )             /* x2*m6         | x2*m2              */
641    PFADD      ( MM2, MM0 )             /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2  */
642
643    MOVQ       ( REGIND(EDX), MM1 )     /* x1            | x0                 */
644    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write r0, r1                      */
645
646    PFMUL      ( MM6, MM1 )             /* x1*m9         | x0*m8              */
647    MOVD       ( REGOFF(8, EDX), MM2 )  /*               | x2                 */
648
649    PFMUL      ( MM7, MM2 )             /*               | x2*m10             */
650    ADD_L      ( STRIDE, EDX )          /* next normal               */
651
652    PREFETCH   ( REGIND(EDX) )
653
654    PFACC      ( MM1, MM1 )             /* *not used*    | x0*m8+x1*m9        */
655    PFADD      ( MM2, MM1 )             /* *not used*    | x0*m8+x1*m9+x2*m10 */
656
657    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write r2                           */
658    DEC_L      ( EDI )                  /* decrement normal counter           */
659
660    JA         ( LLBL (G3T_transform) )
661
662    FEMMS
663
664LLBL (G3T_end):
665    POP_L  ( ESI )
666    POP_L  ( EDI )
667    RET
668
669
670
671
672
673
674ALIGNTEXT16
675GLOBL GLNAME(_mesa_3dnow_normalize_normals)
676GLNAME(_mesa_3dnow_normalize_normals):
677
678 #undef  FRAME_OFFSET
679 #define FRAME_OFFSET 12
680
681    PUSH_L     ( EDI )
682    PUSH_L     ( ESI )
683    PUSH_L     ( EBP )
684
685    MOV_L      ( ARG_IN, ESI )
686    MOV_L      ( ARG_DEST, EAX )
687    MOV_L      ( REGOFF(V3F_COUNT, ESI), EBP ) /*  dest->count = in->count   */
688    MOV_L      ( EBP, REGOFF(V3F_COUNT, EAX) )
689    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
690    MOV_L      ( REGOFF(V3F_START, ESI), ECX ) /*  in->start    */
691    MOV_L      ( ARG_LENGTHS, EDX )
692
693    CMP_L      ( CONST(0), EBP ) /* count > 0 ?? */
694    JE         ( LLBL (G3N_end) )
695
696    FEMMS
697
698    CMP_L      ( CONST(0), EDX )        /* lengths == 0 ?                     */
699    JE         ( LLBL (G3N_norm2) )     /* calculate lengths                  */
700
701ALIGNTEXT32
702LLBL (G3N_norm1):                       /* use precalculated lengths          */
703
704    PREFETCH   ( REGIND(EAX) )
705
706    MOVQ       ( REGIND(ECX), MM0 )     /* x1              | x0               */
707    MOVD       ( REGOFF(8, ECX), MM1 )  /*                 | x2               */
708
709    MOVD       ( REGIND(EDX), MM3 )     /*                 | length (x)       */
710    PFMUL      ( MM3, MM1 )             /*                 | x2 (normalized)  */
711
712    PUNPCKLDQ  ( MM3, MM3 )             /* length (x)      | length (x)       */
713    ADD_L      ( STRIDE, ECX )          /* next normal            */
714
715    PREFETCH   ( REGIND(ECX) )
716
717    PFMUL      ( MM3, MM0 )             /* x1 (normalized) | x0 (normalized)  */
718    MOVQ       ( MM0, REGIND(EAX) )     /* write new x0, x1                   */
719
720    MOVD       ( MM1, REGOFF(8, EAX) )  /* write new x2                       */
721    ADD_L      ( CONST(16), EAX )       /* next r                             */
722
723    ADD_L      ( CONST(4), EDX )        /* next length                        */
724    DEC_L      ( EBP )                  /* decrement normal counter           */
725
726    JA         ( LLBL (G3N_norm1) )
727
728    JMP        ( LLBL (G3N_end1) )
729
730ALIGNTEXT32
731LLBL (G3N_norm2):                       /* need to calculate lengths          */
732
733    PREFETCHW  ( REGIND(EAX) )
734
735    MOVQ       ( MM0, MM3 )             /* x1              | x0               */
736    ADD_L      ( STRIDE, ECX )          /* next normal    */
737
738    PREFETCH   ( REGIND(ECX) )
739
740    MOVQ       ( REGIND(ECX), MM0 )     /* x1              | x0               */
741    MOVD       ( REGOFF(8, ECX), MM1 )  /*                 | x2               */
742
743    PFMUL      ( MM0, MM3 )             /* x1*x1           | x0*x0            */
744    MOVQ       ( MM1, MM4 )             /*                 | x2               */
745
746    ADD_L      ( CONST(16), EAX )       /* next r                             */
747    PFMUL      ( MM1, MM4 )             /*                 | x2*x2            */
748
749    PFADD      ( MM4, MM3 )             /*                 | x0*x0+x2*x2      */
750    PFACC      ( MM3, MM3 )             /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
751
752    PFRSQRT    ( MM3, MM5 )             /* 1/sqrt (x0*x0+x1*x1+x2*x2)         */
753    MOVQ       ( MM5, MM4 )
754
755    PUNPCKLDQ  ( MM3, MM3 )
756    PFMUL      ( MM5, MM5 )
757
758    PFRSQIT1   ( MM3, MM5 )
759    DEC_L      ( EBP )                  /* decrement normal counter           */
760
761    PFRCPIT2   ( MM4, MM5 )
762
763    PFMUL      ( MM5, MM0 )             /* x1 (normalized) | x0 (normalized)  */
764    MOVQ       ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1                  */
765
766    PFMUL      ( MM5, MM1 )             /*                 | x2 (normalized)  */
767    MOVD       ( MM1, REGOFF(-8, EAX) ) /* write new x2                       */
768
769    JA         ( LLBL (G3N_norm2) )
770
771LLBL (G3N_end1):
772    FEMMS
773
774LLBL (G3N_end):
775    POP_L      ( EBP )
776    POP_L      ( ESI )
777    POP_L      ( EDI )
778    RET
779
780
781
782
783
784
785ALIGNTEXT16
786GLOBL GLNAME(_mesa_3dnow_rescale_normals)
787GLNAME(_mesa_3dnow_rescale_normals):
788
789 #undef  FRAME_OFFSET
790 #define FRAME_OFFSET 8
791    PUSH_L     ( EDI )
792    PUSH_L     ( ESI )
793
794    MOV_L      ( ARG_IN, ESI )
795    MOV_L      ( ARG_DEST, EAX )
796    MOV_L      ( REGOFF(V3F_COUNT, ESI), EDX ) /*  dest->count = in->count   */
797    MOV_L      ( EDX, REGOFF(V3F_COUNT, EAX) )
798    MOV_L      ( REGOFF(V3F_START, EAX), EAX ) /*  dest->start  */
799    MOV_L      ( REGOFF(V3F_START, ESI), ECX ) /*  in->start    */
800
801    CMP_L      ( CONST(0), EDX )
802    JE         ( LLBL (G3R_end) )
803
804    FEMMS
805
806    MOVD       ( ARG_SCALE, MM0 )       /* scale                              */
807    PUNPCKLDQ  ( MM0, MM0 )
808
809ALIGNTEXT32
810LLBL (G3R_rescale):
811
812    PREFETCHW  ( REGIND(EAX) )
813
814    MOVQ       ( REGIND(ECX), MM1 )     /* x1            | x0                 */
815    MOVD       ( REGOFF(8, ECX), MM2 )  /*               | x2                 */
816
817    PFMUL      ( MM0, MM1 )             /* x1*scale      | x0*scale           */
818    ADD_L      ( STRIDE, ECX )          /* next normal                  */
819
820    PREFETCH   ( REGIND(ECX) )
821
822    PFMUL      ( MM0, MM2 )             /*               | x2*scale           */
823    ADD_L      ( CONST(16), EAX )       /* next r                             */
824
825    MOVQ       ( MM1, REGOFF(-16, EAX) ) /* write r0, r1                      */
826    MOVD       ( MM2, REGOFF(-8, EAX) ) /* write r2                           */
827
828    DEC_L      ( EDX )                  /* decrement normal counter           */
829    JA         ( LLBL (G3R_rescale) )
830
831    FEMMS
832
833LLBL (G3R_end):
834    POP_L      ( ESI )
835    POP_L      ( EDI )
836    RET
837