1
2	/* TODO
3	 *
4	 * 1) It would be nice if load/store double could be used
5	 *    at least for the matrix parts.  I think for the matrices
6	 *    it is safe, but for the vertices it probably is not due to
7	 *    things like glInterleavedArrays etc.
8	 *
9	 *    UPDATE: Trying this now in sparc_matrix.h -DaveM_990624
10	 *
11	 * 2) One extremely slick trick would be if we could enclose
12	 *    groups of xform calls on the same vertices such that
13	 *    we just load the matrix into f16-->f31 before the calls
14	 *    and then we would not have to do them here.  This may be
15	 *    tricky and not much of a gain though.
16	 */
17
18#include "sparc_matrix.h"
19
20#if defined(SVR4) || defined(__SVR4) || defined(__svr4__) || defined(__arch64__)
21	/* Solaris requires this for 64-bit. */
22        .register %g2, #scratch
23        .register %g3, #scratch
24#endif
25
26	.text
27	.align	64
28
29__set_v4f_1:
30	ld	[%o0 + V4F_FLAGS], %g2
31	mov	1, %g1
32	st	%g1, [%o0 + V4F_SIZE]
33	or	%g2, VEC_SIZE_1, %g2
34	retl
35	 st	%g2, [%o0 + V4F_FLAGS]
36__set_v4f_2:
37	ld	[%o0 + V4F_FLAGS], %g2
38	mov	2, %g1
39	st	%g1, [%o0 + V4F_SIZE]
40	or	%g2, VEC_SIZE_2, %g2
41	retl
42	 st	%g2, [%o0 + V4F_FLAGS]
43__set_v4f_3:
44	ld	[%o0 + V4F_FLAGS], %g2
45	mov	3, %g1
46	st	%g1, [%o0 + V4F_SIZE]
47	or	%g2, VEC_SIZE_3, %g2
48	retl
49	 st	%g2, [%o0 + V4F_FLAGS]
50__set_v4f_4:
51	ld	[%o0 + V4F_FLAGS], %g2
52	mov	4, %g1
53	st	%g1, [%o0 + V4F_SIZE]
54	or	%g2, VEC_SIZE_4, %g2
55	retl
56	 st	%g2, [%o0 + V4F_FLAGS]
57
58	/* First the raw versions. */
59
60	.globl	_mesa_sparc_transform_points1_general
61_mesa_sparc_transform_points1_general:
62	ld	[%o2 + V4F_STRIDE], %o5
63	LDPTR	[%o2 + V4F_START], %g1
64	LDPTR	[%o0 + V4F_START], %g2
65	ld	[%o2 + V4F_COUNT], %g3
66
67	LDMATRIX_0_1_2_3_12_13_14_15(%o1)
68
69	cmp	%g3, 1
70	st	%g3, [%o0 + V4F_COUNT]
71	bl	3f
72	 clr	%o1
73
74	be	2f
75	 andn	%g3, 1, %o2
76
771:	ld	[%g1 + 0x00], %f0	! LSU	Group
78	add	%g1, %o5, %g1		! IEU0
79	ld	[%g1 + 0x00], %f8	! LSU	Group
80	add	%o1, 2, %o1		! IEU0
81	add	%g1, %o5, %g1		! IEU1
82	fmuls	%f0, M0, %f1		! FGM	Group	1-cycle stall on %f0
83	fmuls	%f0, M1, %f2		! FGM	Group
84	fmuls	%f0, M2, %f3		! FGM	Group
85	fmuls	%f0, M3, %f4		! FGM	Group
86	fmuls	%f8, M0, %f9		! FGM	Group	f1 available
87	fadds	%f1, M12, %f1		! FGA
88	st	%f1, [%g2 + 0x00]	! LSU
89	fmuls	%f8, M1, %f10		! FGM	Group	f2 available
90	fadds	%f2, M13, %f2		! FGA
91	st	%f2, [%g2 + 0x04]	! LSU
92	fmuls	%f8, M2, %f11		! FGM	Group	f3 available
93	fadds	%f3, M14, %f3		! FGA
94	st	%f3, [%g2 + 0x08]	! LSU
95	fmuls	%f8, M3, %f12		! FGM	Group	f4 available
96	fadds	%f4, M15, %f4		! FGA
97	st	%f4, [%g2 + 0x0c]	! LSU
98	fadds	%f9, M12, %f9		! FGA	Group	f9 available
99	st	%f9, [%g2 + 0x10]	! LSU
100	fadds	%f10, M13, %f10		! FGA	Group	f10 available
101	st	%f10, [%g2 + 0x14]	! LSU
102	fadds	%f11, M14, %f11		! FGA	Group	f11 available
103	st	%f11, [%g2 + 0x18]	! LSU
104	fadds	%f12, M15, %f12		! FGA	Group	f12 available
105	st	%f12, [%g2 + 0x1c]	! LSU
106	cmp	%o1, %o2		! IEU1
107	bne	1b			! CTI
108	 add	%g2, 0x20, %g2		! IEU0	Group
109
110	cmp	%o1, %g3
111	be	3f
112	 nop
113
1142:	ld	[%g1 + 0x00], %f0	! LSU	Group
115	fmuls	%f0, M0, %f1		! FGM	Group	1-cycle stall on %f0
116	fmuls	%f0, M1, %f2		! FGM	Group
117	fmuls	%f0, M2, %f3		! FGM	Group
118	fmuls	%f0, M3, %f4		! FGM	Group
119	fadds	%f1, M12, %f1		! FGA	Group
120	st	%f1, [%g2 + 0x00]	! LSU
121	fadds	%f2, M13, %f2		! FGA	Group
122	st	%f2, [%g2 + 0x04]	! LSU
123	fadds	%f3, M14, %f3		! FGA	Group
124	st	%f3, [%g2 + 0x08]	! LSU
125	fadds	%f4, M15, %f4		! FGA	Group
126	st	%f4, [%g2 + 0x0c]	! LSU
127
1283:
129	ba	__set_v4f_4
130	 nop
131
132	.globl	_mesa_sparc_transform_points1_identity
133_mesa_sparc_transform_points1_identity:
134	cmp	%o0, %o2
135	be	4f
136	 ld	[%o2 + V4F_STRIDE], %o5
137	LDPTR	[%o2 + V4F_START], %g1
138	LDPTR	[%o0 + V4F_START], %g2
139	ld	[%o2 + V4F_COUNT], %g3
140
141	cmp	%g3, 1
142	st	%g3, [%o0 + V4F_COUNT]
143	bl	3f
144	 clr	%o1
145
146	be	2f
147	 andn	%g3, 1, %o2
148
1491:	ld	[%g1 + 0x00], %f0	! LSU	Group
150	add	%g1, %o5, %g1		! IEU0
151	ld	[%g1 + 0x00], %f1	! LSU	Group
152	add	%o1, 2, %o1		! IEU0
153	add	%g1, %o5, %g1		! IEU1
154	st	%f0, [%g2 + 0x00]	! LSU	Group
155	cmp	%o1, %o2		! IEU1
156	st	%f1, [%g2 + 0x10]	! LSU	Group
157	bne	1b			! CTI
158	 add	%g2, 0x20, %g2		! IEU0
159
160	cmp	%o1, %g3
161	be	3f
162	 nop
163
1642:	ld	[%g1 + 0x00], %f0
165	addx	%g0, %g0, %g0
166	st	%f0, [%g2 + 0x00]
167
1683:
169	ba	__set_v4f_1
170	 nop
171
1724:	retl
173	 nop
174
175	.globl	_mesa_sparc_transform_points1_2d
176_mesa_sparc_transform_points1_2d:
177	ld	[%o2 + V4F_STRIDE], %o5
178	LDPTR	[%o2 + V4F_START], %g1
179	LDPTR	[%o0 + V4F_START], %g2
180	ld	[%o2 + V4F_COUNT], %g3
181
182	LDMATRIX_0_1_12_13(%o1)
183
184	cmp	%g3, 1
185	st	%g3, [%o0 + V4F_COUNT]
186	bl	3f
187	 clr	%o1
188
189	be	2f
190	 andn	%g3, 1, %o2
191
1921:	ld	[%g1 + 0x00], %f0	! LSU	Group
193	add	%g1, %o5, %g1		! IEU0
194	ld	[%g1 + 0x00], %f8	! LSU	Group
195	add	%o1, 2, %o1		! IEU0
196	add	%g1, %o5, %g1		! IEU1
197	fmuls	%f0, M0, %f1		! FGM	Group
198	fmuls	%f0, M1, %f2		! FGM	Group
199	fmuls	%f8, M0, %f9		! FGM	Group
200	fmuls	%f8, M1, %f10		! FGM	Group
201	fadds	%f1, M12, %f3		! FGA	Group	f1 available
202	st	%f3, [%g2 + 0x00]	! LSU
203	fadds	%f2, M13, %f4		! FGA	Group	f2 available
204	st	%f4, [%g2 + 0x04]	! LSU
205	fadds	%f9, M12, %f11		! FGA	Group	f9 available
206	st	%f11, [%g2 + 0x10]	! LSU
207	fadds	%f10, M13, %f12		! FGA	Group	f10 available
208	st	%f12, [%g2 + 0x14]	! LSU
209	cmp	%o1, %o2		! IEU1
210	bne	1b			! CTI
211	 add	%g2, 0x20, %g2		! IEU0	Group
212
213	cmp	%o1, %g3
214	be	3f
215	 nop
216
2172:	ld	[%g1 + 0x00], %f0
218	fmuls	%f0, M0, %f1
219	fmuls	%f0, M1, %f2
220	fadds	%f1, M12, %f3
221	st	%f3, [%g2 + 0x00]
222	fadds	%f2, M13, %f4
223	st	%f4, [%g2 + 0x04]
224
2253:
226	ba	__set_v4f_2
227	 nop
228
229	.globl	_mesa_sparc_transform_points1_2d_no_rot
230_mesa_sparc_transform_points1_2d_no_rot:
231	ld	[%o2 + V4F_STRIDE], %o5
232	LDPTR	[%o2 + V4F_START], %g1
233	LDPTR	[%o0 + V4F_START], %g2
234	ld	[%o2 + V4F_COUNT], %g3
235
236	LDMATRIX_0_12_13(%o1)
237
238	cmp	%g3, 1
239	st	%g3, [%o0 + V4F_COUNT]
240	bl	3f
241	 clr	%o1
242
243	be	2f
244	 andn	%g3, 1, %o2
245
2461:	ld	[%g1 + 0x00], %f0	! LSU	Group
247	add	%g1, %o5, %g1		! IEU0
248	ld	[%g1 + 0x00], %f4	! LSU	Group
249	add	%o1, 2, %o1		! IEU0
250	add	%g1, %o5, %g1		! IEU1
251	fmuls	%f0, M0, %f1		! FGM	Group
252	fmuls	%f4, M0, %f5		! FGM	Group
253	fadds	%f1, M12, %f3		! FGA	Group, 2 cycle stall, f1 available
254	st	%f3, [%g2 + 0x00]	! LSU
255	st	M13, [%g2 + 0x04]	! LSU	Group, f5 available
256	fadds	%f5, M12, %f6		! FGA
257	st	%f6, [%g2 + 0x10]	! LSU	Group
258	st	M13, [%g2 + 0x14]	! LSU	Group
259	cmp	%o1, %o2		! IEU1
260	bne	1b			! CTI
261	 add	%g2, 0x20, %g2		! IEU0	Group
262
263	cmp	%o1, %g3
264	be	3f
265	 nop
266
2672:	ld	[%g1 + 0x00], %f0
268	fmuls	%f0, M0, %f1
269	fadds	%f1, M12, %f3
270	st	%f3, [%g2 + 0x00]
271	st	M13, [%g2 + 0x04]
272
2733:
274	ba	__set_v4f_2
275	 nop
276
277	.globl	_mesa_sparc_transform_points1_3d
278_mesa_sparc_transform_points1_3d:
279	ld	[%o2 + V4F_STRIDE], %o5
280	LDPTR	[%o2 + V4F_START], %g1
281	LDPTR	[%o0 + V4F_START], %g2
282	ld	[%o2 + V4F_COUNT], %g3
283
284	LDMATRIX_0_1_2_12_13_14(%o1)
285
286	cmp	%g3, 1
287	st	%g3, [%o0 + V4F_COUNT]
288	bl	3f
289	 clr	%o1
290
291	be	2f
292	 andn	%g3, 1, %o2
293
2941:	ld	[%g1 + 0x00], %f0	! LSU	Group
295	add	%g1, %o5, %g1		! IEU0
296	ld	[%g1 + 0x00], %f4	! LSU	Group
297	add	%o1, 2, %o1		! IEU0
298	add	%g1, %o5, %g1		! IEU1
299	fmuls	%f0, M0, %f1		! FGM	Group
300	fmuls	%f0, M1, %f2		! FGM	Group
301	fmuls	%f0, M2, %f3		! FGM	Group
302	fmuls	%f4, M0, %f5		! FGM	Group
303	fadds	%f1, M12, %f1		! FGA	Group, f1 available
304	st	%f1, [%g2 + 0x00]	! LSU
305	fmuls	%f4, M1, %f6		! FGM
306	fadds	%f2, M13, %f2		! FGA	Group, f2 available
307	st	%f2, [%g2 + 0x04]	! LSU
308	fmuls	%f4, M2, %f7		! FGM
309	fadds	%f3, M14, %f3		! FGA	Group, f3 available
310	st	%f3, [%g2 + 0x08]	! LSU
311	fadds	%f5, M12, %f5		! FGA	Group, f5 available
312	st	%f5, [%g2 + 0x10]	! LSU
313	fadds	%f6, M13, %f6		! FGA	Group, f6 available
314	st	%f6, [%g2 + 0x14]	! LSU
315	fadds	%f7, M14, %f7		! FGA	Group, f7 available
316	st	%f7, [%g2 + 0x18]	! LSU
317	cmp	%o1, %o2		! IEU1
318	bne	1b			! CTI
319	 add	%g2, 0x20, %g2		! IEU0	Group
320
321	cmp	%o1, %g3
322	be	3f
323	 nop
324
3252:	ld	[%g1 + 0x00], %f0
326	fmuls	%f0, M0, %f1
327	fmuls	%f0, M1, %f2
328	fmuls	%f0, M2, %f3
329	fadds	%f1, M12, %f1
330	st	%f1, [%g2 + 0x00]
331	fadds	%f2, M13, %f2
332	st	%f2, [%g2 + 0x04]
333	fadds	%f3, M14, %f3
334	st	%f3, [%g2 + 0x08]
335
3363:
337	ba	__set_v4f_3
338	 nop
339
340	.globl	_mesa_sparc_transform_points1_3d_no_rot
341_mesa_sparc_transform_points1_3d_no_rot:
342	ld	[%o2 + V4F_STRIDE], %o5
343	LDPTR	[%o2 + V4F_START], %g1
344	LDPTR	[%o0 + V4F_START], %g2
345	ld	[%o2 + V4F_COUNT], %g3
346
347	LDMATRIX_0_12_13_14(%o1)
348
349	cmp	%g3, 1
350	st	%g3, [%o0 + V4F_COUNT]
351	bl	3f
352	 clr	%o1
353
354	be	2f
355	 andn	%g3, 1, %o2
356
3571:	ld	[%g1 + 0x00], %f0	! LSU	Group
358	add	%g1, %o5, %g1		! IEU0
359	ld	[%g1 + 0x00], %f2	! LSU	Group
360	add	%o1, 2, %o1		! IEU0
361	add	%g1, %o5, %g1		! IEU1
362	fmuls	%f0, M0, %f1		! FGM	Group
363	fmuls	%f2, M0, %f3		! FGM	Group
364	fadds	%f1, M12, %f1		! FGA	Group, 2 cycle stall, f1 available
365	st	%f1, [%g2 + 0x00]	! LSU
366	fadds	%f3, M12, %f3		! FGA	Group, f3 available
367	st	M13, [%g2 + 0x04]	! LSU
368	st	M14, [%g2 + 0x08]	! LSU	Group
369	st	%f3, [%g2 + 0x10]	! LSU	Group
370	st	M13, [%g2 + 0x14]	! LSU	Group
371	st	M14, [%g2 + 0x18]	! LSU	Group
372	cmp	%o1, %o2		! IEU1
373	bne	1b			! CTI
374	 add	%g2, 0x20, %g2		! IEU0	Group
375
376	cmp	%o1, %g3
377	be	3f
378	 nop
379
3802:	ld	[%g1 + 0x00], %f0
381	fmuls	%f0, M0, %f1
382	fadds	%f1, M12, %f1
383	st	%f1, [%g2 + 0x00]
384	st	M13, [%g2 + 0x04]
385	st	M14, [%g2 + 0x08]
386
3873:
388	ba	__set_v4f_3
389	 nop
390
391	.globl	_mesa_sparc_transform_points1_perspective
392_mesa_sparc_transform_points1_perspective:
393	ld	[%o2 + V4F_STRIDE], %o5
394	LDPTR	[%o2 + V4F_START], %g1
395	LDPTR	[%o0 + V4F_START], %g2
396	ld	[%o2 + V4F_COUNT], %g3
397
398	LDMATRIX_0_14(%o1)
399
400	cmp	%g3, 1
401	st	%g3, [%o0 + V4F_COUNT]
402	bl	3f
403	 clr	%o1
404
405	be	2f
406	 andn	%g3, 1, %o2
407
4081:	ld	[%g1 + 0x00], %f0	! LSU	Group
409	add	%g1, %o5, %g1		! IEU0
410	ld	[%g1 + 0x00], %f2	! LSU	Group
411	add	%o1, 2, %o1		! IEU0
412	add	%g1, %o5, %g1		! IEU1
413	fmuls	%f0, M0, %f1		! FGM	Group
414	st	%f1, [%g2 + 0x00]	! LSU
415	fmuls	%f2, M0, %f3		! FGM	Group
416	st	%g0, [%g2 + 0x04]	! LSU
417	st	M14, [%g2 + 0x08]	! LSU	Group
418	st	%g0, [%g2 + 0x0c]	! LSU	Group
419	st	%f3, [%g2 + 0x10]	! LSU	Group
420	st	%g0, [%g2 + 0x14]	! LSU	Group
421	st	M14, [%g2 + 0x18]	! LSU	Group
422	st	%g0, [%g2 + 0x1c]	! LSU	Group
423	cmp	%o1, %o2		! IEU1
424	bne	1b			! CTI
425	 add	%g2, 0x20, %g2		! IEU0	Group
426
427	cmp	%o1, %g3
428	be	3f
429	 nop
430
4312:	ld	[%g1 + 0x00], %f0
432	fmuls	%f0, M0, %f1
433	st	%f1, [%g2 + 0x00]
434	st	%g0, [%g2 + 0x04]
435	st	M14, [%g2 + 0x08]
436	st	%g0, [%g2 + 0x0c]
437
4383:
439	ba	__set_v4f_4
440	 nop
441
442	.globl	_mesa_sparc_transform_points2_general
443_mesa_sparc_transform_points2_general:
444	ld	[%o2 + V4F_STRIDE], %o5
445	LDPTR	[%o2 + V4F_START], %g1
446	LDPTR	[%o0 + V4F_START], %g2
447	ld	[%o2 + V4F_COUNT], %g3
448
449	LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(%o1)
450
451	cmp	%g3, 0
452	st	%g3, [%o0 + V4F_COUNT]
453	be	2f
454	 clr	%o1
455
4561:	ld	[%g1 + 0x00], %f0	! LSU	Group
457	ld	[%g1 + 0x04], %f1	! LSU	Group
458	add	%o1, 1, %o1		! IEU0
459	add	%g1, %o5, %g1		! IEU1
460	fmuls	%f0, M0, %f2		! FGM	Group
461	fmuls	%f0, M1, %f3		! FGM	Group
462	fmuls	%f0, M2, %f4		! FGM	Group
463	fmuls	%f0, M3, %f5		! FGM	Group
464	fadds	%f2, M12, %f2		! FGA	Group	f2 available
465	fmuls	%f1, M4, %f6		! FGM
466	fadds	%f3, M13, %f3		! FGA	Group	f3 available
467	fmuls	%f1, M5, %f7		! FGM
468	fadds	%f4, M14, %f4		! FGA	Group	f4 available
469	fmuls	%f1, M6, %f8		! FGM
470	fadds	%f5, M15, %f5		! FGA	Group	f5 available
471	fmuls	%f1, M7, %f9		! FGM
472	fadds	%f2, %f6, %f2		! FGA	Group	f6 available
473	st	%f2, [%g2 + 0x00]	! LSU
474	fadds	%f3, %f7, %f3		! FGA	Group	f7 available
475	st	%f3, [%g2 + 0x04]	! LSU
476	fadds	%f4, %f8, %f4		! FGA	Group	f8 available
477	st	%f4, [%g2 + 0x08]	! LSU
478	fadds	%f5, %f9, %f5		! FGA	Group	f9 available
479	st	%f5, [%g2 + 0x0c]	! LSU
480	cmp	%o1, %g3		! IEU1
481	bne	1b			! CTI
482	 add	%g2, 0x10, %g2		! IEU0	Group
4832:
484	ba	__set_v4f_4
485	 nop
486
487	.globl	_mesa_sparc_transform_points2_identity
488_mesa_sparc_transform_points2_identity:
489	cmp	%o2, %o0
490	be	3f
491	 ld	[%o2 + V4F_STRIDE], %o5
492	LDPTR	[%o2 + V4F_START], %g1
493	LDPTR	[%o0 + V4F_START], %g2
494	ld	[%o2 + V4F_COUNT], %g3
495
496	cmp	%g3, 0
497	st	%g3, [%o0 + V4F_COUNT]
498	be	2f
499	 clr	%o1
500
5011:	ld	[%g1 + 0x00], %f0	! LSU	Group
502	add	%o1, 1, %o1		! IEU0
503	ld	[%g1 + 0x04], %f1	! LSU	Group
504	add	%g1, %o5, %g1		! IEU0
505	cmp	%o1, %g3		! IEU1
506	st	%f0, [%g2 + 0x00]	! LSU	Group
507	st	%f1, [%g2 + 0x04]	! LSU	Group
508	bne	1b			! CTI
509	 add	%g2, 0x10, %g2		! IEU0
5102:
511	ba	__set_v4f_2
512	 nop
513
5143:	retl
515	 nop
516
517	.globl	_mesa_sparc_transform_points2_2d
518_mesa_sparc_transform_points2_2d:
519	ld	[%o2 + V4F_STRIDE], %o5
520	LDPTR	[%o2 + V4F_START], %g1
521	LDPTR	[%o0 + V4F_START], %g2
522	ld	[%o2 + V4F_COUNT], %g3
523
524	LDMATRIX_0_1_4_5_12_13(%o1)
525
526	cmp	%g3, 1
527	st	%g3, [%o0 + V4F_COUNT]
528	bl	3f
529	 clr	%o1
530
531	be	2f
532	 andn	%g3, 1, %o2
533
5341:	ld	[%g1 + 0x00], %f0	! LSU	Group
535	ld	[%g1 + 0x04], %f1	! LSU	Group
536	add	%o1, 2, %o1		! IEU0
537	add	%g1, %o5, %g1		! IEU1
538	fmuls	%f0, M0, %f2		! FGM
539	ld	[%g1 + 0x00], %f8	! LSU	Group
540	fmuls	%f0, M1, %f3		! FGM
541	ld	[%g1 + 0x04], %f9	! LSU	Group
542	fmuls	%f1, M4, %f6		! FGM
543	fmuls	%f1, M5, %f7		! FGM	Group
544	add	%g1, %o5, %g1		! IEU0
545	fmuls	%f8, M0, %f10		! FGM	Group	f2 available
546	fadds	%f2, M12, %f2		! FGA
547	fmuls	%f8, M1, %f11		! FGM	Group	f3 available
548	fadds	%f3, M13, %f3		! FGA
549	fmuls	%f9, M4, %f12		! FGM	Group
550	fmuls	%f9, M5, %f13		! FGM	Group
551	fadds	%f10, M12, %f10		! FGA	Group	f2, f10 available
552	fadds	%f2, %f6, %f2		! FGA	Group	f3, f11 available
553	st	%f2, [%g2 + 0x00]	! LSU
554	fadds	%f11, M13, %f11		! FGA	Group	f12 available
555	fadds	%f3, %f7, %f3		! FGA	Group	f13 available
556	st	%f3, [%g2 + 0x04]	! LSU
557	fadds	%f10, %f12, %f10	! FGA	Group	f10 available
558	st	%f10, [%g2 + 0x10]	! LSU
559	fadds	%f11, %f13, %f11	! FGA	Group	f11 available
560	st	%f11, [%g2 + 0x14]	! LSU
561	cmp	%o1, %o2		! IEU1
562	bne	1b			! CTI
563	 add	%g2, 0x20, %g2		! IEU0	Group
564
565	cmp	%o1, %g3
566	be	3f
567	 nop
568
5692:	ld	[%g1 + 0x00], %f0	! LSU	Group
570	ld	[%g1 + 0x04], %f1	! LSU	Group
571	fmuls	%f0, M0, %f2		! FGM	Group
572	fmuls	%f0, M1, %f3		! FGM	Group
573	fmuls	%f1, M4, %f6		! FGM	Group
574	fmuls	%f1, M5, %f7		! FGM	Group
575	fadds	%f2, M12, %f2		! FGA	Group	f2 available
576	fadds	%f3, M13, %f3		! FGA	Group	f3 available
577	fadds	%f2, %f6, %f2		! FGA	Group	2 cycle stall, f2 available
578	st	%f2, [%g2 + 0x00]	! LSU
579	fadds	%f3, %f7, %f3		! FGA	Group	f3 available
580	st	%f3, [%g2 + 0x04]	! LSU
581
5823:
583	ba	__set_v4f_2
584	 nop
585
586	.globl	_mesa_sparc_transform_points2_2d_no_rot
587_mesa_sparc_transform_points2_2d_no_rot:
588	ld	[%o2 + V4F_STRIDE], %o5
589	LDPTR	[%o2 + V4F_START], %g1
590	LDPTR	[%o0 + V4F_START], %g2
591	ld	[%o2 + V4F_COUNT], %g3
592
593	LDMATRIX_0_5_12_13(%o1)
594
595	cmp	%g3, 1
596	st	%g3, [%o0 + V4F_COUNT]
597	bl	3f
598	 clr	%o1
599
600	be	2f
601	 andn	%g3, 1, %o2
602
6031:	ld	[%g1 + 0x00], %f0	! LSU	Group
604	ld	[%g1 + 0x04], %f1	! LSU	Group
605	add	%o1, 2, %o1		! IEU0
606	add	%g1, %o5, %g1		! IEU1
607	ld	[%g1 + 0x00], %f4	! LSU	Group
608	fmuls	%f0, M0, %f2		! FGM
609	ld	[%g1 + 0x04], %f5	! LSU	Group
610	fmuls	%f1, M5, %f3		! FGM
611	fmuls	%f4, M0, %f6		! FGM	Group
612	add	%g1, %o5, %g1		! IEU0
613	fmuls	%f5, M5, %f7		! FGM	Group
614	fadds	%f2, M12, %f2		! FGA	Group	f2 available
615	st	%f2, [%g2 + 0x00]	! LSU
616	fadds	%f3, M13, %f3		! FGA	Group	f3 available
617	st	%f3, [%g2 + 0x04]	! LSU
618	fadds	%f6, M12, %f6		! FGA	Group	f6 available
619	st	%f6, [%g2 + 0x10]	! LSU
620	fadds	%f7, M13, %f7		! FGA	Group	f7 available
621	st	%f7, [%g2 + 0x14]	! LSU
622	cmp	%o1, %o2		! IEU1
623	bne	1b			! CTI
624	 add	%g2, 0x20, %g2		! IEU0	Group
625
626	cmp	%o1, %g3
627	be	3f
628	 nop
629
6302:	ld	[%g1 + 0x00], %f0	! LSU	Group
631	ld	[%g1 + 0x04], %f1	! LSU	Group
632	fmuls	%f0, M0, %f2		! FGM	Group
633	fmuls	%f1, M5, %f3		! FGM	Group
634	fadds	%f2, M12, %f2		! FGA	Group, 2 cycle stall, f2 available
635	st	%f2, [%g2 + 0x00]	! LSU
636	fadds	%f3, M13, %f3		! FGA	Group	f3 available
637	st	%f3, [%g2 + 0x04]	! LSU
638
6393:
640	ba	__set_v4f_2
641	 nop
642
643	/* orig: 12 cycles */
644	.globl	_mesa_sparc_transform_points2_3d
645_mesa_sparc_transform_points2_3d:
646	ld	[%o2 + V4F_STRIDE], %o5
647	ld	[%o2 + V4F_START], %g1
648	ld	[%o0 + V4F_START], %g2
649	ld	[%o2 + V4F_COUNT], %g3
650
651	LDMATRIX_0_1_2_3_4_5_6_12_13_14(%o1)
652
653	cmp	%g3, 1
654	st	%g3, [%o0 + V4F_COUNT]
655	bl	3f
656	 clr	%o1
657
658	be	2f
659	 andn	%g3, 1, %o2
660
6611:	ld	[%g1 + 0x00], %f0	! LSU	Group
662	ld	[%g1 + 0x04], %f1	! LSU	Group
663	add	%o1, 2, %o1		! IEU0
664	add	%g1, %o5, %g1		! IEU1
665	ld	[%g1 + 0x00], %f9	! LSU	Group
666	fmuls	%f0, M0, %f2		! FGM
667	ld	[%g1 + 0x04], %f10	! LSU	Group
668	fmuls	%f0, M1, %f3		! FGM
669	fmuls	%f0, M2, %f4		! FGM	Group
670	add	%g1, %o5, %g1		! IEU0
671	fmuls	%f1, M4, %f6		! FGM	Group
672	fmuls	%f1, M5, %f7		! FGM	Group	f2 available
673	fadds	%f2, M12, %f2		! FGA
674	fmuls	%f1, M6, %f8		! FGM	Group	f3 available
675	fadds	%f3, M13, %f3		! FGA
676	fmuls	%f9, M0, %f11		! FGM	Group	f4 available
677	fadds	%f4, M14, %f4		! FGA
678	fmuls	%f9, M1, %f12		! FGM	Group	f6 available
679	fmuls	%f9, M2, %f13		! FGM	Group	f2, f7 available
680	fadds	%f2, %f6, %f2		! FGA
681	st	%f2, [%g2 + 0x00]	! LSU
682	fmuls	%f10, M4, %f14		! FGM	Group	f3, f8 available
683	fadds	%f3, %f7, %f3		! FGA
684	st	%f3, [%g2 + 0x04]	! LSU
685	fmuls	%f10, M5, %f15		! FGM	Group	f4, f11 available
686	fadds	%f11, M12, %f11		! FGA
687	fmuls	%f10, M6, %f0		! FGM	Group	f12 available
688	fadds	%f12, M13, %f12		! FGA
689	fadds	%f13, M14, %f13		! FGA	Group	f13 available
690	fadds	%f4, %f8, %f4		! FGA	Group	f14 available
691	st	%f4, [%g2 + 0x08]	! LSU
692	fadds	%f11, %f14, %f11	! FGA	Group	f15, f11 available
693	st	%f11, [%g2 + 0x10]	! LSU
694	fadds	%f12, %f15, %f12	! FGA	Group	f0, f12 available
695	st	%f12, [%g2 + 0x14]	! LSU
696	fadds	%f13, %f0, %f13		! FGA	Group	f13 available
697	st	%f13, [%g2 + 0x18]	! LSU
698
699	cmp	%o1, %o2		! IEU1
700	bne	1b			! CTI
701	 add	%g2, 0x20, %g2		! IEU0	Group
702
703	cmp	%o1, %g3
704	be	3f
705	 nop
706
7072:	ld	[%g1 + 0x00], %f0	! LSU	Group
708	ld	[%g1 + 0x04], %f1	! LSU	Group
709	fmuls	%f0, M0, %f2		! FGM	Group
710	fmuls	%f0, M1, %f3		! FGM	Group
711	fmuls	%f0, M2, %f4		! FGM	Group
712	fmuls	%f1, M4, %f6		! FGM	Group
713	fmuls	%f1, M5, %f7		! FGM	Group	f2 available
714	fadds	%f2, M12, %f2		! FGA
715	fmuls	%f1, M6, %f8		! FGM	Group	f3 available
716	fadds	%f3, M13, %f3		! FGA
717	fadds	%f4, M14, %f4		! FGA	Group	f4 available
718	fadds	%f2, %f6, %f2		! FGA	Group	stall, f2, f6, f7 available
719	st	%f2, [%g2 + 0x00]	! LSU
720	fadds	%f3, %f7, %f3		! FGA	Group	f3, f8 available
721	st	%f3, [%g2 + 0x04]	! LSU
722	fadds	%f4, %f8, %f4		! FGA	Group	f4 available
723	st	%f4, [%g2 + 0x08]	! LSU
724
7253:
726	ba	__set_v4f_3
727	 nop
728
729	.globl	_mesa_sparc_transform_points2_3d_no_rot
730_mesa_sparc_transform_points2_3d_no_rot:
731	ld	[%o2 + V4F_STRIDE], %o5
732	LDPTR	[%o2 + V4F_START], %g1
733	LDPTR	[%o0 + V4F_START], %g2
734	ld	[%o2 + V4F_COUNT], %g3
735
736	LDMATRIX_0_5_12_13_14(%o1)
737
738	cmp	%g3, 1
739	st	%g3, [%o0 + V4F_COUNT]
740	bl	3f
741	 clr	%o3
742
743	be	2f
744	 andn	%g3, 1, %o2
745
7461:	ld	[%g1 + 0x00], %f0	! LSU	Group
747	ld	[%g1 + 0x04], %f1	! LSU	Group
748	add	%o3, 2, %o3		! IEU0
749	add	%g1, %o5, %g1		! IEU1
750	ld	[%g1 + 0x00], %f4	! LSU	Group
751	fmuls	%f0, M0, %f2		! FGM
752	ld	[%g1 + 0x04], %f5	! LSU	Group
753	fmuls	%f1, M5, %f3		! FGM
754	fmuls	%f4, M0, %f6		! FGM	Group
755	add	%g1, %o5, %g1		! IEU0
756	fmuls	%f5, M5, %f7		! FGM	Group
757	fadds	%f2, M12, %f2		! FGA	Group	f2 available
758	st	%f2, [%g2 + 0x00]	! LSU
759	fadds	%f3, M13, %f3		! FGA	Group	f3 available
760	st	%f3, [%g2 + 0x04]	! LSU
761	fadds	%f6, M12, %f6		! FGA	Group	f6 available
762	st	M14, [%g2 + 0x08]	! LSU
763	fadds	%f7, M13, %f7		! FGA	Group	f7 available
764	st	%f6, [%g2 + 0x10]	! LSU
765	st	%f7, [%g2 + 0x14]	! LSU	Group
766	st	M14, [%g2 + 0x18]	! LSU	Group
767	cmp	%o3, %o2		! IEU1
768	bne	1b			! CTI
769	 add	%g2, 0x20, %g2		! IEU0	Group
770
771	cmp	%o3, %g3
772	be	3f
773	 nop
774
7752:	ld	[%g1 + 0x00], %f0	! LSU	Group
776	ld	[%g1 + 0x04], %f1	! LSU	Group
777	fmuls	%f0, M0, %f2		! FGM	Group
778	fmuls	%f1, M5, %f3		! FGM	Group
779	fadds	%f2, M12, %f2		! FGA	Group, 2 cycle stall, f2 available
780	st	%f2, [%g2 + 0x00]	! LSU
781	fadds	%f3, M13, %f3		! FGA	Group	f3 available
782	st	%f3, [%g2 + 0x04]	! LSU
783	st	M14, [%g2 + 0x08]	! LSU	Group
784
7853:	ld	[%o1 + (14 * 0x4)], %g3
786	cmp	%g3, 0
787	bne	__set_v4f_3
788	 nop
789	ba	__set_v4f_2
790	 nop
791
792	.globl	_mesa_sparc_transform_points2_perspective
793_mesa_sparc_transform_points2_perspective:
794	ld	[%o2 + V4F_STRIDE], %o5
795	LDPTR	[%o2 + V4F_START], %g1
796	LDPTR	[%o0 + V4F_START], %g2
797	ld	[%o2 + V4F_COUNT], %g3
798
799	LDMATRIX_0_5_14(%o1)
800
801	cmp	%g3, 0
802	st	%g3, [%o0 + V4F_COUNT]
803	be	2f
804	 clr	%o1
805
8061:	ld	[%g1 + 0x00], %f0
807	ld	[%g1 + 0x04], %f1
808	add	%o1, 1, %o1
809	add	%g1, %o5, %g1
810	fmuls	%f0, M0, %f2
811	st	%f2, [%g2 + 0x00]
812	fmuls	%f1, M5, %f3
813	st	%f3, [%g2 + 0x04]
814	st	M14, [%g2 + 0x08]
815	st	%g0, [%g2 + 0x0c]
816	cmp	%o1, %g3
817	bne	1b
818	 add	%g2, 0x10, %g2
8192:
820	ba	__set_v4f_4
821	 nop
822
823	.globl	_mesa_sparc_transform_points3_general
824_mesa_sparc_transform_points3_general:
825	ld	[%o2 + V4F_STRIDE], %o5
826	LDPTR	[%o2 + V4F_START], %g1
827	LDPTR	[%o0 + V4F_START], %g2
828	ld	[%o2 + V4F_COUNT], %g3
829
830	LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
831
832	cmp	%g3, 0
833	st	%g3, [%o0 + V4F_COUNT]
834	be	2f
835	 clr	%o1
836
8371:	ld	[%g1 + 0x00], %f0	! LSU	Group
838	ld	[%g1 + 0x04], %f1	! LSU	Group
839	ld	[%g1 + 0x08], %f2	! LSU	Group
840	add	%o1, 1, %o1		! IEU0
841	add	%g1, %o5, %g1		! IEU1
842	fmuls	%f0, M0, %f3		! FGM
843	fmuls	%f1, M4, %f7		! FGM	Group
844	fmuls	%f0, M1, %f4		! FGM	Group
845	fmuls	%f1, M5, %f8		! FGM	Group
846	fmuls	%f0, M2, %f5		! FGM	Group	f3 available
847	fmuls	%f1, M6, %f9		! FGM	Group	f7 available
848	fadds	%f3, %f7, %f3		! FGA
849	fmuls	%f0, M3, %f6		! FGM	Group	f4 available
850	fmuls	%f1, M7, %f10		! FGM	Group	f8 available
851	fadds	%f4, %f8, %f4		! FGA
852	fmuls	%f2, M8, %f7		! FGM	Group	f5 available
853	fmuls	%f2, M9, %f8		! FGM	Group	f9,f3 available
854	fadds	%f5, %f9, %f5		! FGA
855	fmuls	%f2, M10, %f9		! FGM	Group	f6 available
856	fadds	%f6, %f10, %f6		! FGA	Group	f10,f4 available
857	fmuls	%f2, M11, %f10		! FGM
858	fadds	%f3, M12, %f3		! FGA	Group	f7 available
859	fadds	%f4, M13, %f4		! FGA	Group	f8,f5 available
860	fadds	%f5, M14, %f5		! FGA	Group	f9 available
861	fadds	%f6, M15, %f6		! FGA	Group	f10,f6 available
862	fadds	%f3, %f7, %f3		! FGA	Group	f3 available
863	st	%f3, [%g2 + 0x00]	! LSU
864	fadds	%f4, %f8, %f4		! FGA	Group	f4 available
865	st	%f4, [%g2 + 0x04]	! LSU
866	fadds	%f5, %f9, %f5		! FGA	Group	f5 available
867	st	%f5, [%g2 + 0x08]	! LSU
868	fadds	%f6, %f10, %f6		! FGA	Group	f6 available
869	st	%f6, [%g2 + 0x0c]	! LSU
870	cmp	%o1, %g3		! IEU1
871	bne	1b			! CTI
872	 add	%g2, 0x10, %g2		! IEU0	Group
8732:
874	ba	__set_v4f_4
875	 nop
876
877	.globl	_mesa_sparc_transform_points3_identity
878_mesa_sparc_transform_points3_identity:
879	ld	[%o2 + V4F_STRIDE], %o5
880	LDPTR	[%o2 + V4F_START], %g1
881	LDPTR	[%o0 + V4F_START], %g2
882	ld	[%o2 + V4F_COUNT], %g3
883
884	cmp	%g3, 0
885	st	%g3, [%o0 + V4F_COUNT]
886	be	2f
887	 clr	%o1
888
8891:	ld	[%g1 + 0x00], %f0
890	ld	[%g1 + 0x04], %f1
891	ld	[%g1 + 0x08], %f2
892	add	%o1, 1, %o1
893	add	%g1, %o5, %g1
894	cmp	%o1, %g3
895	st	%f0, [%g2 + 0x00]
896	st	%f1, [%g2 + 0x04]
897	st	%f2, [%g2 + 0x08]
898	bne	1b
899	 add	%g2, 0x10, %g2
9002:
901	ba	__set_v4f_3
902	 nop
903
904	.globl	_mesa_sparc_transform_points3_2d
905_mesa_sparc_transform_points3_2d:
906	ld	[%o2 + V4F_STRIDE], %o5
907	LDPTR	[%o2 + V4F_START], %g1
908	LDPTR	[%o0 + V4F_START], %g2
909	ld	[%o2 + V4F_COUNT], %g3
910
911	LDMATRIX_0_1_4_5_12_13(%o1)
912
913	cmp	%g3, 0
914	st	%g3, [%o0 + V4F_COUNT]
915	be	2f
916	 clr	%o1
917
9181:	ld	[%g1 + 0x00], %f0	! LSU	Group
919	ld	[%g1 + 0x04], %f1	! LSU	Group
920	ld	[%g1 + 0x08], %f2	! LSU	Group
921	add	%o1, 1, %o1		! IEU0
922	add	%g1, %o5, %g1		! IEU1
923	fmuls	%f0, M0, %f3		! FGM
924	fmuls	%f0, M1, %f4		! FGM	Group
925	fmuls	%f1, M4, %f6		! FGM	Group
926	fmuls	%f1, M5, %f7		! FGM	Group
927	fadds	%f3, M12, %f3		! FGA	Group	f3 available
928	fadds	%f4, M13, %f4		! FGA	Group	f4 available
929	fadds	%f3, %f6, %f3		! FGA	Group	f6 available
930	st	%f3, [%g2 + 0x00]	! LSU
931	fadds	%f4, %f7, %f4		! FGA	Group	f7 available
932	st	%f4, [%g2 + 0x04]	! LSU
933	st	%f2, [%g2 + 0x08]	! LSU	Group
934	cmp	%o1, %g3		! IEU1
935	bne	1b			! CTI
936	 add	%g2, 0x10, %g2		! IEU0	Group
9372:
938	ba	__set_v4f_3
939	 nop
940
941	.globl	_mesa_sparc_transform_points3_2d_no_rot
942_mesa_sparc_transform_points3_2d_no_rot:
943	ld	[%o2 + V4F_STRIDE], %o5
944	LDPTR	[%o2 + V4F_START], %g1
945	LDPTR	[%o0 + V4F_START], %g2
946	ld	[%o2 + V4F_COUNT], %g3
947
948	LDMATRIX_0_5_12_13(%o1)
949
950	cmp	%g3, 0
951	st	%g3, [%o0 + V4F_COUNT]
952	be	2f
953	 clr	%o1
954
9551:	ld	[%g1 + 0x00], %f0	! LSU	Group
956	ld	[%g1 + 0x04], %f1	! LSU	Group
957	ld	[%g1 + 0x08], %f2	! LSU	Group
958	add	%o1, 1, %o1		! IEU0
959	add	%g1, %o5, %g1		! IEU1
960	fmuls	%f0, M0, %f3		! FGM
961	fmuls	%f1, M5, %f4		! FGM	Group
962	st	%f2, [%g2 + 0x08]	! LSU
963	fadds	%f3, M12, %f3		! FGA	Group
964	st	%f3, [%g2 + 0x00]	! LSU
965	fadds	%f4, M13, %f4		! FGA	Group
966	st	%f4, [%g2 + 0x04]	! LSU
967	cmp	%o1, %g3		! IEU1
968	bne	1b			! CTI
969	 add	%g2, 0x10, %g2		! IEU0	Group
9702:
971	ba	__set_v4f_3
972	 nop
973
974	.globl	_mesa_sparc_transform_points3_3d
975_mesa_sparc_transform_points3_3d:
976	ld	[%o2 + V4F_STRIDE], %o5
977	LDPTR	[%o2 + V4F_START], %g1
978	LDPTR	[%o0 + V4F_START], %g2
979	ld	[%o2 + V4F_COUNT], %g3
980
981	LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
982
983	cmp	%g3, 0
984	st	%g3, [%o0 + V4F_COUNT]
985	be	2f
986	 clr	%o1
987
9881:	ld	[%g1 + 0x00], %f0	! LSU	Group
989	ld	[%g1 + 0x04], %f1	! LSU	Group
990	ld	[%g1 + 0x08], %f2	! LSU	Group
991	add	%o1, 1, %o1		! IEU0
992	add	%g1, %o5, %g1		! IEU1
993	fmuls	%f0, M0, %f3		! FGM
994	fmuls	%f1, M4, %f6		! FGM	Group
995	fmuls	%f0, M1, %f4		! FGM	Group
996	fmuls	%f1, M5, %f7		! FGM	Group
997	fmuls	%f0, M2, %f5		! FGM	Group	f3 available
998	fmuls	%f1, M6, %f8		! FGM	Group	f6 available
999	fadds	%f3, %f6, %f3		! FGA
1000	fmuls	%f2, M8, %f9		! FGM	Group	f4 available
1001	fmuls	%f2, M9, %f10		! FGM	Group	f7 available
1002	fadds	%f4, %f7, %f4		! FGA
1003	fmuls	%f2, M10, %f11		! FGM	Group	f5 available
1004	fadds	%f5, %f8, %f5		! FGA	Group	f8, f3 available
1005	fadds	%f3, %f9, %f3		! FGA	Group	f9 available
1006	fadds	%f4, %f10, %f4		! FGA	Group	f10, f4 available
1007	fadds	%f5, %f11, %f5		! FGA	Group	stall, f11, f5 available
1008	fadds	%f3, M12, %f3		! FGA	Group	f3 available
1009	st	%f3, [%g2 + 0x00]	! LSU
1010	fadds	%f4, M13, %f4		! FGA	Group	f4 available
1011	st	%f4, [%g2 + 0x04]	! LSU
1012	fadds	%f5, M14, %f5		! FGA	Group	f5 available
1013	st	%f5, [%g2 + 0x08]	! LSU
1014	cmp	%o1, %g3		! IEU1
1015	bne	1b			! CTI
1016	 add	%g2, 0x10, %g2		! IEU0	Group
10172:
1018	ba	__set_v4f_3
1019	 nop
1020
1021	.globl	_mesa_sparc_transform_points3_3d_no_rot
1022_mesa_sparc_transform_points3_3d_no_rot:
1023	ld	[%o2 + V4F_STRIDE], %o5
1024	LDPTR	[%o2 + V4F_START], %g1
1025	LDPTR	[%o0 + V4F_START], %g2
1026	ld	[%o2 + V4F_COUNT], %g3
1027
1028	LDMATRIX_0_5_10_12_13_14(%o1)
1029
1030	cmp	%g3, 0
1031	st	%g3, [%o0 + V4F_COUNT]
1032	be	2f
1033	 clr	%o1
1034
10351:	ld	[%g1 + 0x00], %f0	! LSU	Group
1036	ld	[%g1 + 0x04], %f1	! LSU	Group
1037	ld	[%g1 + 0x08], %f2	! LSU	Group
1038	add	%o1, 1, %o1		! IEU0
1039	add	%g1, %o5, %g1		! IEU1
1040	cmp	%o1, %g3		! IEU1	Group
1041	fmuls	%f0, M0, %f3		! FGM
1042	fmuls	%f1, M5, %f4		! FGM	Group
1043	fmuls	%f2, M10, %f5		! FGM	Group
1044	fadds	%f3, M12, %f3		! FGA	Group, stall, f3 available
1045	st	%f3, [%g2 + 0x00]	! LSU
1046	fadds	%f4, M13, %f4		! FGA	Group, f4 available
1047	st	%f4, [%g2 + 0x04]	! LSU
1048	fadds	%f5, M14, %f5		! FGA	Group, f5 available
1049	st	%f5, [%g2 + 0x08]	! LEU
1050	bne	1b			! CTI
1051	 add	%g2, 0x10, %g2		! IEU0	Group
10522:
1053	ba	__set_v4f_3
1054	 nop
1055
1056	.globl	_mesa_sparc_transform_points3_perspective
1057_mesa_sparc_transform_points3_perspective:
1058	ld	[%o2 + V4F_STRIDE], %o5
1059	LDPTR	[%o2 + V4F_START], %g1
1060	LDPTR	[%o0 + V4F_START], %g2
1061	ld	[%o2 + V4F_COUNT], %g3
1062
1063	LDMATRIX_0_5_8_9_10_14(%o1)
1064
1065	cmp	%g3, 0
1066	st	%g3, [%o0 + V4F_COUNT]
1067	be	2f
1068	 clr	%o1
1069
10701:	ld	[%g1 + 0x00], %f0	! LSU	Group
1071	ld	[%g1 + 0x04], %f1	! LSU	Group
1072	ld	[%g1 + 0x08], %f2	! LSU	Group
1073	add	%o1, 1, %o1		! IEU0
1074	add	%g1, %o5, %g1		! IEU1
1075	fmuls	%f0, M0, %f3		! FGM
1076	fmuls	%f2, M8, %f6		! FGM	Group
1077	fmuls	%f1, M5, %f4		! FGM	Group
1078	fmuls	%f2, M9, %f7		! FGM	Group
1079	fmuls	%f2, M10, %f5		! FGM	Group	f3 available
1080	fadds	%f3, %f6, %f3		! FGA	Group	f6 available
1081	st	%f3, [%g2 + 0x00]	! LSU
1082	fadds	%f4, %f7, %f4		! FGA	Group	stall, f4, f7 available
1083	st	%f4, [%g2 + 0x04]	! LSU
1084	fadds	%f5, M14, %f5		! FGA	Group
1085	st	%f5, [%g2 + 0x08]	! LSU
1086	fnegs	%f2, %f6		! FGA	Group
1087	st	%f6, [%g2 + 0x0c]	! LSU
1088	cmp	%o1, %g3		! IEU1
1089	bne	1b			! CTI
1090	 add	%g2, 0x10, %g2		! IEU0	Group
10912:
1092	ba	__set_v4f_4
1093	 nop
1094
1095	.globl	_mesa_sparc_transform_points4_general
1096_mesa_sparc_transform_points4_general:
1097	ld	[%o2 + V4F_STRIDE], %o5
1098	LDPTR	[%o2 + V4F_START], %g1
1099	LDPTR	[%o0 + V4F_START], %g2
1100	ld	[%o2 + V4F_COUNT], %g3
1101
1102	LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
1103
1104	cmp	%g3, 0
1105	st	%g3, [%o0 + V4F_COUNT]
1106	be	2f
1107	 clr	%o1
1108
11091:	ld	[%g1 + 0x00], %f0	! LSU	Group
1110	ld	[%g1 + 0x04], %f1	! LSU	Group
1111	ld	[%g1 + 0x08], %f2	! LSU	Group
1112	ld	[%g1 + 0x0c], %f3	! LSU	Group
1113	add	%o1, 1, %o1		! IEU0
1114	add	%g1, %o5, %g1		! IEU1
1115	fmuls	%f0, M0, %f4		! FGM	Group
1116	fmuls	%f1, M4, %f8		! FGM	Group
1117	fmuls	%f0, M1, %f5		! FGM	Group
1118	fmuls	%f1, M5, %f9		! FGM	Group
1119	fmuls	%f0, M2, %f6		! FGM	Group	f4 available
1120	fmuls	%f1, M6, %f10		! FGM	Group	f8 available
1121	fadds	%f4, %f8, %f4		! FGA
1122	fmuls	%f0, M3, %f7		! FGM	Group	f5 available
1123	fmuls	%f1, M7, %f11		! FGM	Group	f9 available
1124	fadds	%f5, %f9, %f5		! FGA
1125	fmuls	%f2, M8, %f12		! FGM	Group	f6 available
1126	fmuls	%f2, M9, %f13		! FGM	Group	f10, f4 available
1127	fadds	%f6, %f10, %f6		! FGA
1128	fmuls	%f2, M10, %f14		! FGM	Group	f7 available
1129	fmuls	%f2, M11, %f15		! FGM	Group	f11, f5 available
1130	fadds	%f7, %f11, %f7		! FGA
1131	fmuls	%f3, M12, %f8		! FGM	Group	f12 available
1132	fadds	%f4, %f12, %f4		! FGA
1133	fmuls	%f3, M13, %f9		! FGM	Group	f13, f6 available
1134	fadds	%f5, %f13, %f5		! FGA
1135	fmuls	%f3, M14, %f10		! FGM	Group	f14 available
1136	fadds	%f6, %f14, %f6		! FGA
1137	fmuls	%f3, M15, %f11		! FGM	Group	f15, f7 available
1138	fadds	%f7, %f15, %f7		! FGA
1139	fadds	%f4, %f8, %f4		! FGA	Group	f8, f4 available
1140	st	%f4, [%g2 + 0x00]	! LSU
1141	fadds	%f5, %f9, %f5		! FGA	Group	f9, f5 available
1142	st	%f5, [%g2 + 0x04]	! LSU
1143	fadds	%f6, %f10, %f6		! FGA	Group	f10, f6 available
1144	st	%f6, [%g2 + 0x08]	! LSU
1145	fadds	%f7, %f11, %f7		! FGA	Group	f11, f7 available
1146	st	%f7, [%g2 + 0x0c]	! LSU
1147	cmp	%o1, %g3		! IEU1
1148	bne	1b			! CTI
1149	 add	%g2, 0x10, %g2		! IEU0	Group
11502:
1151	ba	__set_v4f_4
1152	 nop
1153
1154	.globl	_mesa_sparc_transform_points4_identity
1155_mesa_sparc_transform_points4_identity:
1156	ld	[%o2 + V4F_STRIDE], %o5
1157	LDPTR	[%o2 + V4F_START], %g1
1158	LDPTR	[%o0 + V4F_START], %g2
1159	ld	[%o2 + V4F_COUNT], %g3
1160
1161	cmp	%g3, 0
1162	st	%g3, [%o0 + V4F_COUNT]
1163	be	2f
1164	 clr	%o1
1165
11661:	ld	[%g1 + 0x00], %f0
1167	ld	[%g1 + 0x04], %f1
1168	ld	[%g1 + 0x08], %f2
1169	add	%o1, 1, %o1
1170	ld	[%g1 + 0x0c], %f3
1171	add	%g1, %o5, %g1
1172	st	%f0, [%g2 + 0x00]
1173	st	%f1, [%g2 + 0x04]
1174	st	%f2, [%g2 + 0x08]
1175	cmp	%o1, %g3
1176	st	%f3, [%g2 + 0x0c]
1177	bne	1b
1178	 add	%g2, 0x10, %g2
11792:
1180	ba	__set_v4f_4
1181	 nop
1182
1183	.globl	_mesa_sparc_transform_points4_2d
1184_mesa_sparc_transform_points4_2d:
1185	ld	[%o2 + V4F_STRIDE], %o5
1186	LDPTR	[%o2 + V4F_START], %g1
1187	LDPTR	[%o0 + V4F_START], %g2
1188	ld	[%o2 + V4F_COUNT], %g3
1189
1190	LDMATRIX_0_1_4_5_12_13(%o1)
1191
1192	cmp	%g3, 0
1193	st	%g3, [%o0 + V4F_COUNT]
1194	be	2f
1195	 clr	%o1
1196
11971:	ld	[%g1 + 0x00], %f0	! LSU	Group
1198	ld	[%g1 + 0x04], %f1	! LSU	Group
1199	ld	[%g1 + 0x08], %f2	! LSU	Group
1200	ld	[%g1 + 0x0c], %f3	! LSU	Group
1201	add	%o1, 1, %o1		! IEU0
1202	add	%g1, %o5, %g1		! IEU1
1203	fmuls	%f0, M0, %f4		! FGM
1204	fmuls	%f1, M4, %f8		! FGM	Group
1205	fmuls	%f0, M1, %f5		! FGM	Group
1206	fmuls	%f1, M5, %f9		! FGM	Group	f4 available
1207	fmuls	%f3, M12, %f12		! FGM	Group
1208	fmuls	%f3, M13, %f13		! FGM	Group	f8 available
1209	fadds	%f4, %f8, %f4		! FGA
1210	fadds	%f5, %f9, %f5		! FGA	Group	stall, f5, f9 available
1211	fadds	%f4, %f12, %f4		! FGA	Group	2 cycle stall, f4, f12, f13 avail
1212	st	%f4, [%g2 + 0x00]	! LSU
1213	fadds	%f5, %f13, %f5		! FGA	Group	f5 available
1214	st	%f5, [%g2 + 0x04]	! LSU
1215	st	%f2, [%g2 + 0x08]	! LSU	Group
1216	st	%f3, [%g2 + 0x0c]	! LSU	Group
1217	cmp	%o1, %g3		! IEU1
1218	bne	1b			! CTI
1219	 add	%g2, 0x10, %g2		! IEU0	Group
12202:
1221	ba	__set_v4f_4
1222	 nop
1223
1224	.globl	_mesa_sparc_transform_points4_2d_no_rot
1225_mesa_sparc_transform_points4_2d_no_rot:
1226	ld	[%o2 + V4F_STRIDE], %o5
1227	LDPTR	[%o2 + V4F_START], %g1
1228	LDPTR	[%o0 + V4F_START], %g2
1229	ld	[%o2 + V4F_COUNT], %g3
1230
1231	LDMATRIX_0_1_4_5_12_13(%o1)
1232
1233	cmp	%g3, 0
1234	st	%g3, [%o0 + V4F_COUNT]
1235	be	2f
1236	 clr	%o1
1237
12381:	ld	[%g1 + 0x00], %f0
1239	ld	[%g1 + 0x04], %f1
1240	ld	[%g1 + 0x08], %f2
1241	ld	[%g1 + 0x0c], %f3
1242	add	%o1, 1, %o1
1243	add	%g1, %o5, %g1
1244	fmuls	%f0, M0, %f4
1245	fmuls	%f3, M12, %f8
1246	fmuls	%f1, M5, %f5
1247	fmuls	%f3, M13, %f9
1248	fadds	%f4, %f8, %f4
1249	st	%f4, [%g2 + 0x00]
1250	fadds	%f5, %f9, %f5
1251	st	%f5, [%g2 + 0x04]
1252	st	%f2, [%g2 + 0x08]
1253	st	%f3, [%g2 + 0x0c]
1254	cmp	%o1, %g3
1255	bne	1b
1256	 add	%g2, 0x10, %g2
12572:
1258	ba	__set_v4f_4
1259	 nop
1260
1261	.globl	_mesa_sparc_transform_points4_3d
1262_mesa_sparc_transform_points4_3d:
1263	ld	[%o2 + V4F_STRIDE], %o5
1264	LDPTR	[%o2 + V4F_START], %g1
1265	LDPTR	[%o0 + V4F_START], %g2
1266	ld	[%o2 + V4F_COUNT], %g3
1267
1268	LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
1269
1270	cmp	%g3, 0
1271	st	%g3, [%o0 + V4F_COUNT]
1272	be	2f
1273	 clr	%o1
1274
12751:	ld	[%g1 + 0x00], %f0	! LSU	Group
1276	ld	[%g1 + 0x04], %f1	! LSU	Group
1277	ld	[%g1 + 0x08], %f2	! LSU	Group
1278	ld	[%g1 + 0x0c], %f3	! LSU	Group
1279	add	%o1, 1, %o1		! IEU0
1280	add	%g1, %o5, %g1		! IEU1
1281	fmuls	%f0, M0, %f4		! FGM
1282	fmuls	%f1, M4, %f7		! FGM	Group
1283	fmuls	%f0, M1, %f5		! FGM	Group
1284	fmuls	%f1, M5, %f8		! FGM	Group
1285	fmuls	%f0, M2, %f6		! FGM	Group	f4 available
1286	fmuls	%f1, M6, %f9		! FGM	Group	f7 available
1287	fadds	%f4, %f7, %f4		! FGA
1288	fmuls	%f2, M8, %f10		! FGM	Group	f5 available
1289	fmuls	%f2, M9, %f11		! FGM	Group	f8 available
1290	fadds	%f5, %f8, %f5		! FGA
1291	fmuls	%f2, M10, %f12		! FGM	Group	f6 available
1292	fmuls	%f3, M12, %f13		! FGM	Group	f9, f4 available
1293	fadds	%f6, %f9, %f6		! FGA
1294	fmuls	%f3, M13, %f14		! FGM	Group	f10 available
1295	fadds	%f4, %f10, %f4		! FGA
1296	fmuls	%f3, M14, %f15		! FGM	Group	f11, f5 available
1297	fadds	%f5, %f11, %f5		! FGA
1298	fadds	%f6, %f12, %f6		! FGA	Group	stall, f12, f13, f6 available
1299	fadds	%f4, %f13, %f4		! FGA	Group	f14, f4 available
1300	st	%f4, [%g2 + 0x00]	! LSU
1301	fadds	%f5, %f14, %f5		! FGA	Group	f15, f5 available
1302	st	%f5, [%g2 + 0x04]	! LSU
1303	fadds	%f6, %f15, %f6		! FGA	Group	f6 available
1304	st	%f6, [%g2 + 0x08]	! LSU
1305	st	%f3, [%g2 + 0x0c]	! LSU	Group
1306	cmp	%o1, %g3		! IEU1
1307	bne	1b			! CTI
1308	 add	%g2, 0x10, %g2		! IEU0	Group
13092:
1310	ba	__set_v4f_4
1311	 nop
1312
1313	.globl	_mesa_sparc_transform_points4_3d_no_rot
1314_mesa_sparc_transform_points4_3d_no_rot:
1315	ld	[%o2 + V4F_STRIDE], %o5
1316	LDPTR	[%o2 + V4F_START], %g1
1317	LDPTR	[%o0 + V4F_START], %g2
1318	ld	[%o2 + V4F_COUNT], %g3
1319
1320	LDMATRIX_0_5_10_12_13_14(%o1)
1321
1322	cmp	%g3, 0
1323	st	%g3, [%o0 + V4F_COUNT]
1324	be	2f
1325	 clr	%o1
1326
13271:	ld	[%g1 + 0x00], %f0	! LSU	Group
1328	ld	[%g1 + 0x04], %f1	! LSU	Group
1329	ld	[%g1 + 0x08], %f2	! LSU	Group
1330	ld	[%g1 + 0x0c], %f3	! LSU	Group
1331	add	%o1, 1, %o1		! IEU0
1332	add	%g1, %o5, %g1		! IEU1
1333	fmuls	%f0, M0, %f4		! FGM
1334	fmuls	%f3, M12, %f7		! FGM	Group
1335	fmuls	%f1, M5, %f5		! FGM	Group
1336	fmuls	%f3, M13, %f8		! FGM	Group
1337	fmuls	%f2, M10, %f6		! FGM	Group	f4 available
1338	fmuls	%f3, M14, %f9		! FGM	Group	f7 available
1339	fadds	%f4, %f7, %f4		! FGA
1340	st	%f4, [%g2 + 0x00]	! LSU
1341	fadds	%f5, %f8, %f5		! FGA	Group	stall, f5, f8 available
1342	st	%f5, [%g2 + 0x04]	! LSU
1343	fadds	%f6, %f9, %f6		! FGA	Group	stall, f6, f9 available
1344	st	%f6, [%g2 + 0x08]	! LSU
1345	st	%f3, [%g2 + 0x0c]	! LSU	Group
1346	cmp	%o1, %g3		! IEU1
1347	bne	1b			! CTI
1348	 add	%g2, 0x10, %g2		! IEU0	Group
13492:
1350	ba	__set_v4f_4
1351	 nop
1352
1353	.globl	_mesa_sparc_transform_points4_perspective
1354_mesa_sparc_transform_points4_perspective:
1355	ld	[%o2 + V4F_STRIDE], %o5
1356	LDPTR	[%o2 + V4F_START], %g1
1357	LDPTR	[%o0 + V4F_START], %g2
1358	ld	[%o2 + V4F_COUNT], %g3
1359
1360	LDMATRIX_0_5_8_9_10_14(%o1)
1361
1362	cmp	%g3, 0
1363	st	%g3, [%o0 + V4F_COUNT]
1364	be	2f
1365	 clr	%o1
1366
13671:	ld	[%g1 + 0x00], %f0	! LSU	Group
1368	ld	[%g1 + 0x04], %f1	! LSU	Group
1369	ld	[%g1 + 0x08], %f2	! LSU	Group
1370	ld	[%g1 + 0x0c], %f3	! LSU	Group
1371	add	%o1, 1, %o1		! IEU0
1372	add	%g1, %o5, %g1		! IEU1
1373	fmuls	%f0, M0, %f4		! FGM
1374	fmuls	%f2, M8, %f7		! FGM	Group
1375	fmuls	%f1, M5, %f5		! FGM	Group
1376	fmuls	%f2, M9, %f8		! FGM	Group
1377	fmuls	%f2, M10, %f6		! FGM	Group	f4 available
1378	fmuls	%f3, M14, %f9		! FGM	Group	f7 available
1379	fadds	%f4, %f7, %f4		! FGA
1380	st	%f4, [%g2 + 0x00]	! LSU
1381	fadds	%f5, %f8, %f5		! FGA	Group	stall, f5, f8 available
1382	st	%f5, [%g2 + 0x04]	! LSU
1383	fadds	%f6, %f9, %f6		! FGA	Group	stall, f6, f9 available
1384	st	%f6, [%g2 + 0x08]	! LSU
1385	fnegs	%f2, %f7		! FGA	Group
1386	st	%f7, [%g2 + 0x0c]	! LSU
1387	cmp	%o1, %g3		! IEU1
1388	bne	1b			! CTI
1389	 add	%g2, 0x10, %g2		! IEU0	Group
13902:
1391	ba	__set_v4f_4
1392	 nop
1393