sparc_clip.S revision 8676890018a94b475dd0bb7389f3b77f37e36fdb
1/*
2 * Clip testing in SPARC assembly
3 */
4
5#if __arch64__
6#define LDPTR		ldx
7#define V4F_DATA	0x00
8#define V4F_START	0x08
9#define V4F_COUNT	0x10
10#define V4F_STRIDE	0x14
11#define V4F_SIZE	0x18
12#define V4F_FLAGS	0x1c
13#else
14#define LDPTR		ld
15#define V4F_DATA	0x00
16#define V4F_START	0x04
17#define V4F_COUNT	0x08
18#define V4F_STRIDE	0x0c
19#define V4F_SIZE	0x10
20#define V4F_FLAGS	0x14
21#endif
22
23#define VEC_SIZE_1   	1
24#define VEC_SIZE_2   	3
25#define VEC_SIZE_3   	7
26#define VEC_SIZE_4   	15
27
28        .register %g2, #scratch
29        .register %g3, #scratch
30
31	.text
32	.align		64
33
34one_dot_zero:
35	.word		0x3f800000	/* 1.0f */
36
37	/* This trick is shamelessly stolen from the x86
38	 * Mesa asm.  Very clever, and we can do it too
39	 * since we have the necessary add with carry
40	 * instructions on Sparc.
41	 */
42clip_table:
43	.byte	 0,  1,  0,  2,  4,  5,  4,  6
44	.byte	 0,  1,  0,  2,  8,  9,  8, 10
45	.byte	32, 33, 32, 34, 36, 37, 36, 38
46	.byte	32, 33, 32, 34, 40, 41, 40, 42
47	.byte	 0,  1,  0,  2,  4,  5,  4,  6
48	.byte	 0,  1,  0,  2,  8,  9,  8, 10
49	.byte	16, 17, 16, 18, 20, 21, 20, 22
50	.byte	16, 17, 16, 18, 24, 25, 24, 26
51	.byte	63, 61, 63, 62, 55, 53, 55, 54
52	.byte	63, 61, 63, 62, 59, 57, 59, 58
53	.byte	47, 45, 47, 46, 39, 37, 39, 38
54	.byte	47, 45, 47, 46, 43, 41, 43, 42
55	.byte	63, 61, 63, 62, 55, 53, 55, 54
56	.byte	63, 61, 63, 62, 59, 57, 59, 58
57	.byte	31, 29, 31, 30, 23, 21, 23, 22
58	.byte	31, 29, 31, 30, 27, 25, 27, 26
59
60/* GLvector4f *clip_vec, GLvector4f *proj_vec,
61   GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask,
62   GLboolean viewport_z_enable */
63
64	.align		64
65__pc_tramp:
66	retl
67	 nop
68
69	.globl		_mesa_sparc_cliptest_points4
70_mesa_sparc_cliptest_points4:
71	save		%sp, -64, %sp
72	call		__pc_tramp
73	 sub		%o7, (. - one_dot_zero - 4), %g1
74	ld		[%g1 + 0x0], %f4
75	add		%g1, 0x4, %g1
76
77	ld		[%i0 + V4F_STRIDE], %l1
78	ld		[%i0 + V4F_COUNT], %l3
79	LDPTR		[%i0 + V4F_START], %i0
80	LDPTR		[%i1 + V4F_START], %i5
81	ldub		[%i3], %g2
82	ldub		[%i4], %g3
83	sll		%g3, 8, %g3
84	or		%g2, %g3, %g2
85
86	ld		[%i1 + V4F_FLAGS], %g3
87	or		%g3, VEC_SIZE_4, %g3
88	st		%g3, [%i1 + V4F_FLAGS]
89	mov		3, %g3
90	st		%g3, [%i1 + V4F_SIZE]
91	st		%l3, [%i1 + V4F_COUNT]
92	clr		%l2
93	clr		%l0
94
95	/* l0:	i
96	 * l3:	count
97	 * l1:	stride
98	 * l2:	c
99	 * g2:	(tmpAndMask << 8) | tmpOrMask
100	 * g1:	clip_table
101	 * i0:	from[stride][i]
102	 * i2:	clipMask
103	 * i5:	vProj[4][i]
104	 */
105
1061:	ld		[%i0 + 0x0c], %f3	! LSU	Group
107	ld		[%i0 + 0x0c], %g5	! LSU	Group
108	ld		[%i0 + 0x08], %g4	! LSU	Group
109	fdivs		%f4, %f3, %f8		! FGM
110	addcc		%g5, %g5, %g5		! IEU1	Group
111	addx		%g0, 0x0, %g3		! IEU1	Group
112	addcc		%g4, %g4, %g4		! IEU1	Group
113	addx		%g3, %g3, %g3		! IEU1	Group
114	subcc		%g5, %g4, %g0		! IEU1	Group
115	ld		[%i0 + 0x04], %g4	! LSU	Group
116	addx		%g3, %g3, %g3		! IEU1	Group
117	addcc		%g4, %g4, %g4		! IEU1	Group
118	addx		%g3, %g3, %g3		! IEU1	Group
119	subcc		%g5, %g4, %g0		! IEU1	Group
120	ld		[%i0 + 0x00], %g4	! LSU	Group
121	addx		%g3, %g3, %g3		! IEU1	Group
122	addcc		%g4, %g4, %g4		! IEU1	Group
123	addx		%g3, %g3, %g3		! IEU1	Group
124	subcc		%g5, %g4, %g0		! IEU1	Group
125	addx		%g3, %g3, %g3		! IEU1	Group
126	ldub		[%g1 + %g3], %g3	! LSU	Group
127	cmp		%g3, 0			! IEU1	Group, stall
128	be		2f			! CTI
129	 stb		%g3, [%i2]		! LSU
130	sll		%g3, 8, %g4		! IEU1	Group
131	add		%l2, 1, %l2		! IEU0
132	st		%g0, [%i5 + 0x00]	! LSU
133	or		%g4, 0xff, %g4		! IEU0	Group
134	or		%g2, %g3, %g2		! IEU1
135	st		%g0, [%i5 + 0x04]	! LSU
136	and		%g2, %g4, %g2		! IEU0	Group
137	st		%g0, [%i5 + 0x08]	! LSU
138	b		3f			! CTI
139	 st		%f4, [%i5 + 0x0c]	! LSU	Group
1402:	ld		[%i0 + 0x00], %f0	! LSU	Group
141	ld		[%i0 + 0x04], %f1	! LSU	Group
142	ld		[%i0 + 0x08], %f2	! LSU	Group
143	fmuls		%f0, %f8, %f0		! FGM
144	st		%f0, [%i5 + 0x00]	! LSU	Group
145	fmuls		%f1, %f8, %f1		! FGM
146	st		%f1, [%i5 + 0x04]	! LSU	Group
147	fmuls		%f2, %f8, %f2		! FGM
148	st		%f2, [%i5 + 0x08]	! LSU	Group
149	st		%f8, [%i5 + 0x0c]	! LSU	Group
1503:	add		%i5, 0x10, %i5		! IEU1
151	add		%l0, 1, %l0		! IEU0	Group
152	add		%i2, 1, %i2		! IEU0	Group
153	cmp		%l0, %l3		! IEU1	Group
154	bne		1b			! CTI
155	 add		%i0, %l1, %i0		! IEU0	Group
156	stb		%g2, [%i3]		! LSU
157	srl		%g2, 8, %g3		! IEU0	Group
158	cmp		%l2, %l3		! IEU1	Group
159	bl,a		1f			! CTI
160	 clr		%g3			! IEU0
1611:	stb		%g3, [%i4]		! LSU	Group
162	ret					! CTI	Group
163	 restore	%i1, 0x0, %o0
164
165	.globl		_mesa_sparc_cliptest_points4_np
166_mesa_sparc_cliptest_points4_np:
167	save		%sp, -64, %sp
168
169	call		__pc_tramp
170	 sub		%o7, (. - one_dot_zero - 4), %g1
171	add		%g1, 0x4, %g1
172
173	ld		[%i0 + V4F_STRIDE], %l1
174	ld		[%i0 + V4F_COUNT], %l3
175	LDPTR		[%i0 + V4F_START], %i0
176	ldub		[%i3], %g2
177	ldub		[%i4], %g3
178	sll		%g3, 8, %g3
179	or		%g2, %g3, %g2
180
181	clr		%l2
182	clr		%l0
183
184	/* l0:	i
185	 * l3:	count
186	 * l1:	stride
187	 * l2:	c
188	 * g2:	(tmpAndMask << 8) | tmpOrMask
189	 * g1:	clip_table
190	 * i0:	from[stride][i]
191	 * i2:	clipMask
192	 */
193
1941:	ld		[%i0 + 0x0c], %g5	! LSU	Group
195	ld		[%i0 + 0x08], %g4	! LSU	Group
196	addcc		%g5, %g5, %g5		! IEU1	Group
197	addx		%g0, 0x0, %g3		! IEU1	Group
198	addcc		%g4, %g4, %g4		! IEU1	Group
199	addx		%g3, %g3, %g3		! IEU1	Group
200	subcc		%g5, %g4, %g0		! IEU1	Group
201	ld		[%i0 + 0x04], %g4	! LSU	Group
202	addx		%g3, %g3, %g3		! IEU1	Group
203	addcc		%g4, %g4, %g4		! IEU1	Group
204	addx		%g3, %g3, %g3		! IEU1	Group
205	subcc		%g5, %g4, %g0		! IEU1	Group
206	ld		[%i0 + 0x00], %g4	! LSU	Group
207	addx		%g3, %g3, %g3		! IEU1	Group
208	addcc		%g4, %g4, %g4		! IEU1	Group
209	addx		%g3, %g3, %g3		! IEU1	Group
210	subcc		%g5, %g4, %g0		! IEU1	Group
211	addx		%g3, %g3, %g3		! IEU1	Group
212	ldub		[%g1 + %g3], %g3	! LSU	Group
213	cmp		%g3, 0			! IEU1	Group, stall
214	be		2f			! CTI
215	 stb		%g3, [%i2]		! LSU
216	sll		%g3, 8, %g4		! IEU1	Group
217	add		%l2, 1, %l2		! IEU0
218	or		%g4, 0xff, %g4		! IEU0	Group
219	or		%g2, %g3, %g2		! IEU1
220	and		%g2, %g4, %g2		! IEU0	Group
2212:	add		%l0, 1, %l0		! IEU0	Group
222	add		%i2, 1, %i2		! IEU0	Group
223	cmp		%l0, %l3		! IEU1	Group
224	bne		1b			! CTI
225	 add		%i0, %l1, %i0		! IEU0	Group
226	stb		%g2, [%i3]		! LSU
227	srl		%g2, 8, %g3		! IEU0	Group
228	cmp		%l2, %l3		! IEU1	Group
229	bl,a		1f			! CTI
230	 clr		%g3			! IEU0
2311:	stb		%g3, [%i4]		! LSU	Group
232	ret					! CTI	Group
233	 restore	%i1, 0x0, %o0
234