1/*
2Copyright (C) 1996-1997 Id Software, Inc.
3
4This program is free software; you can redistribute it and/or
5modify it under the terms of the GNU General Public License
6as published by the Free Software Foundation; either version 2
7of the License, or (at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
13See the GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program; if not, write to the Free Software
17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
18
19*/
20//
21// d_polysa.s
22// x86 assembly-language polygon model drawing code
23//
24
25#include "asm_i386.h"
26#include "quakeasm.h"
27#include "asm_draw.h"
28#include "d_ifacea.h"
29
30#if	id386
31
32// !!! if this is changed, it must be changed in d_polyse.c too !!!
33#define DPS_MAXSPANS			MAXHEIGHT+1
34									// 1 extra for spanpackage that marks end
35
36//#define	SPAN_SIZE	(((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
37#define SPAN_SIZE (1024+1+1+1)*32
38
39
40	.data
41
42	.align	4
43p10_minus_p20:	.single		0
44p01_minus_p21:	.single		0
45temp0:			.single		0
46temp1:			.single		0
47Ltemp:			.single		0
48
49aff8entryvec_table:	.long	LDraw8, LDraw7, LDraw6, LDraw5
50				.long	LDraw4, LDraw3, LDraw2, LDraw1
51
52lzistepx:		.long	0
53
54
55	.text
56
57#ifndef NeXT
58	.extern C(D_PolysetSetEdgeTable)
59	.extern C(D_RasterizeAliasPolySmooth)
60#endif
61
62//----------------------------------------------------------------------
63// affine triangle gradient calculation code
64//----------------------------------------------------------------------
65
66#define skinwidth	4+0
67
68.globl C(D_PolysetCalcGradients)
69C(D_PolysetCalcGradients):
70
71//	p00_minus_p20 = r_p0[0] - r_p2[0];
72//	p01_minus_p21 = r_p0[1] - r_p2[1];
73//	p10_minus_p20 = r_p1[0] - r_p2[0];
74//	p11_minus_p21 = r_p1[1] - r_p2[1];
75//
76//	xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
77//			     p00_minus_p20 * p11_minus_p21);
78//
79//	ystepdenominv = -xstepdenominv;
80
81	fildl	C(r_p0)+0		// r_p0[0]
82	fildl	C(r_p2)+0		// r_p2[0] | r_p0[0]
83	fildl	C(r_p0)+4		// r_p0[1] | r_p2[0] | r_p0[0]
84	fildl	C(r_p2)+4		// r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
85	fildl	C(r_p1)+0		// r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
86	fildl	C(r_p1)+4		// r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
87							//  r_p2[0] | r_p0[0]
88	fxch	%st(3)			// r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
89							//  r_p2[0] | r_p0[0]
90	fsub	%st(2),%st(0)	// p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
91							//  r_p2[0] | r_p0[0]
92	fxch	%st(1)			// r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
93							//  r_p2[0] | r_p0[0]
94	fsub	%st(4),%st(0)	// p10_minus_p20 | p01_minus_p21 | r_p2[1] |
95							//  r_p1[1] | r_p2[0] | r_p0[0]
96	fxch	%st(5)			// r_p0[0] | p01_minus_p21 | r_p2[1] |
97							//  r_p1[1] | r_p2[0] | p10_minus_p20
98	fsubp	%st(0),%st(4)	// p01_minus_p21 | r_p2[1] | r_p1[1] |
99							//  p00_minus_p20 | p10_minus_p20
100	fxch	%st(2)			// r_p1[1] | r_p2[1] | p01_minus_p21 |
101							//  p00_minus_p20 | p10_minus_p20
102	fsubp	%st(0),%st(1)	// p11_minus_p21 | p01_minus_p21 |
103							//  p00_minus_p20 | p10_minus_p20
104	fxch	%st(1)			// p01_minus_p21 | p11_minus_p21 |
105							//  p00_minus_p20 | p10_minus_p20
106	flds	C(d_xdenom)		// d_xdenom | p01_minus_p21 | p11_minus_p21 |
107							//  p00_minus_p20 | p10_minus_p20
108	fxch	%st(4)			// p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
109							//  p00_minus_p20 | d_xdenom
110	fstps	p10_minus_p20	// p01_minus_p21 | p11_minus_p21 |
111							//  p00_minus_p20 | d_xdenom
112	fstps	p01_minus_p21	// p11_minus_p21 | p00_minus_p20 | xstepdenominv
113	fxch	%st(2)			// xstepdenominv | p00_minus_p20 | p11_minus_p21
114
115//// ceil () for light so positive steps are exaggerated, negative steps
116//// diminished,  pushing us away from underflow toward overflow. Underflow is
117//// very visible, overflow is very unlikely, because of ambient lighting
118//	t0 = r_p0[4] - r_p2[4];
119//	t1 = r_p1[4] - r_p2[4];
120
121	fildl	C(r_p2)+16		// r_p2[4] | xstepdenominv | p00_minus_p20 |
122							//  p11_minus_p21
123	fildl	C(r_p0)+16		// r_p0[4] | r_p2[4] | xstepdenominv |
124							//  p00_minus_p20 | p11_minus_p21
125	fildl	C(r_p1)+16		// r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
126							//  p00_minus_p20 | p11_minus_p21
127	fxch	%st(2)			// r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
128							//  p00_minus_p20 | p11_minus_p21
129	fld		%st(0)			// r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
130							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
131	fsubrp	%st(0),%st(2)	// r_p2[4] | t0 | r_p1[4] | xstepdenominv |
132							//  p00_minus_p20 | p11_minus_p21
133	fsubrp	%st(0),%st(2)	// t0 | t1 | xstepdenominv | p00_minus_p20 |
134							//  p11_minus_p21
135
136//	r_lstepx = (int)
137//			ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
138//	r_lstepy = (int)
139//			ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
140
141	fld		%st(0)			// t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
142							//  p11_minus_p21
143	fmul	%st(5),%st(0)	// t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
144							//  p00_minus_p20 | p11_minus_p21
145	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
146							//  p00_minus_p20 | p11_minus_p21
147	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
148							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
149	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
150							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
151	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
152							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
153	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
154							//  t0*p11_minus_p21 | xstepdenominv |
155							//  p00_minus_p20 | p11_minus_p21
156	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
157							//  t0*p11_minus_p21 | xstepdenominv |
158							//  p00_minus_p20 | p11_minus_p21
159	fmul	%st(5),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
160							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
161							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
162	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
163							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
164							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
165	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
166							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
167							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
168	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
169							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
170							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
171	fld		%st(2)			// xstepdenominv |
172							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
173							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
174							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
175	fmuls	float_minus_1	// ystepdenominv |
176							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
177							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
178							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
179	fxch	%st(2)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
180							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
181							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
182							//  p11_minus_p21
183	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
184							//   xstepdenominv |
185							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
186							//   | ystepdenominv | xstepdenominv |
187							//   p00_minus_p20 | p11_minus_p21
188	fxch	%st(1)			// t1*p00_minus_p20 - t0*p10_minus_p20 |
189							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
190							//   xstepdenominv | ystepdenominv |
191							//   xstepdenominv | p00_minus_p20 | p11_minus_p21
192	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
193							//  ystepdenominv |
194							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
195							//  xstepdenominv | ystepdenominv |
196							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
197	fldcw	ceil_cw
198	fistpl	C(r_lstepy)		// r_lstepx | ystepdenominv | xstepdenominv |
199							//  p00_minus_p20 | p11_minus_p21
200	fistpl	C(r_lstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
201							//  p11_minus_p21
202	fldcw	single_cw
203
204//	t0 = r_p0[2] - r_p2[2];
205//	t1 = r_p1[2] - r_p2[2];
206
207	fildl	C(r_p2)+8		// r_p2[2] | ystepdenominv | xstepdenominv |
208							//  p00_minus_p20 | p11_minus_p21
209	fildl	C(r_p0)+8		// r_p0[2] | r_p2[2] | ystepdenominv |
210							//   xstepdenominv | p00_minus_p20 | p11_minus_p21
211	fildl	C(r_p1)+8		// r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
212							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
213	fxch	%st(2)			// r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
214							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
215	fld		%st(0)			// r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
216							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
217							//  p11_minus_p21
218	fsubrp	%st(0),%st(2)	// r_p2[2] | t0 | r_p1[2] | ystepdenominv |
219							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
220	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
221							//  p00_minus_p20 | p11_minus_p21
222
223//	r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
224//			xstepdenominv);
225//	r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
226//			ystepdenominv);
227
228	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv
229	fmul	%st(6),%st(0)	// t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
230							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
231	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
232							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
233	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
234							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
235							//  p11_minus_p21
236	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
237							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
238							//  p11_minus_p21
239	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
240							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
241							//  p11_minus_p21
242	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
243							//  t0*p11_minus_p21 | ystepdenominv |
244							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
245	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
246							//  t0*p11_minus_p21 | ystepdenominv |
247							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
248	fmul	%st(6),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
249							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
250							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
251							//  p11_minus_p21
252	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
253							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
254							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
255							//  p11_minus_p21
256	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
257							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
258							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
259							//  p11_minus_p21
260	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
261							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
262							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
263							//  p11_minus_p21
264	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
265							//   ystepdenominv |
266							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
267							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
268							//  p11_minus_p21
269	fxch	%st(1)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
270							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
271							//   ystepdenominv | ystepdenominv |
272							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
273	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
274							//  xstepdenominv |
275							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
276							//  ystepdenominv | ystepdenominv |
277							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
278	fxch	%st(1)			// (t1*p00_minus_p20 - t0*p10_minus_p20)*
279							//  ystepdenominv |
280							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
281							//  xstepdenominv | ystepdenominv |
282							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
283	fistpl	C(r_sstepy)		// r_sstepx | ystepdenominv | xstepdenominv |
284							//  p00_minus_p20 | p11_minus_p21
285	fistpl	C(r_sstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
286							//  p11_minus_p21
287
288//	t0 = r_p0[3] - r_p2[3];
289//	t1 = r_p1[3] - r_p2[3];
290
291	fildl	C(r_p2)+12		// r_p2[3] | ystepdenominv | xstepdenominv |
292							//  p00_minus_p20 | p11_minus_p21
293	fildl	C(r_p0)+12		// r_p0[3] | r_p2[3] | ystepdenominv |
294							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
295	fildl	C(r_p1)+12		// r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
296							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
297	fxch	%st(2)			// r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
298							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
299	fld		%st(0)			// r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
300							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
301							//  p11_minus_p21
302	fsubrp	%st(0),%st(2)	// r_p2[3] | t0 | r_p1[3] | ystepdenominv |
303							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
304	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
305							//  p00_minus_p20 | p11_minus_p21
306
307//	r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
308//			xstepdenominv);
309//	r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
310//			ystepdenominv);
311
312	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv |
313							//  p00_minus_p20 | p11_minus_p21
314	fmul	%st(6),%st(0)	// t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
315							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
316	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
317							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
318	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
319							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
320							//  p11_minus_p21
321	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
322							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
323							//  p11_minus_p21
324	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
325							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
326							//  p11_minus_p21
327	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
328							//  t0*p11_minus_p21 | ystepdenominv |
329							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
330	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
331							//  t0*p11_minus_p21 | ystepdenominv |
332							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
333	fmul	%st(6),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
334							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
335							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
336							//  p11_minus_p21
337	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
338							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
339							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
340							//  p11_minus_p21
341	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
342							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
343							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
344							//  p11_minus_p21
345	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
346							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
347							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
348							//  p11_minus_p21
349	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
350							//   ystepdenominv |
351							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
352							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
353							//  p11_minus_p21
354	fxch	%st(1)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
355							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
356							//  ystepdenominv | ystepdenominv |
357							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
358	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
359							//  xstepdenominv |
360							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
361							//  ystepdenominv | ystepdenominv |
362							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
363	fxch	%st(1)			// (t1*p00_minus_p20 - t0*p10_minus_p20)*
364							//  ystepdenominv |
365							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
366							//  xstepdenominv | ystepdenominv |
367							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
368	fistpl	C(r_tstepy)		// r_tstepx | ystepdenominv | xstepdenominv |
369							//  p00_minus_p20 | p11_minus_p21
370	fistpl	C(r_tstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
371							//  p11_minus_p21
372
373//	t0 = r_p0[5] - r_p2[5];
374//	t1 = r_p1[5] - r_p2[5];
375
376	fildl	C(r_p2)+20		// r_p2[5] | ystepdenominv | xstepdenominv |
377							//  p00_minus_p20 | p11_minus_p21
378	fildl	C(r_p0)+20		// r_p0[5] | r_p2[5] | ystepdenominv |
379							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
380	fildl	C(r_p1)+20		// r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
381							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
382	fxch	%st(2)			// r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
383							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
384	fld		%st(0)			// r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
385							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
386							//  p11_minus_p21
387	fsubrp	%st(0),%st(2)	// r_p2[5] | t0 | r_p1[5] | ystepdenominv |
388							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
389	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
390							//  p00_minus_p20 | p11_minus_p21
391
392//	r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
393//			xstepdenominv);
394//	r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
395//			ystepdenominv);
396
397	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv |
398							//  p00_minus_p20 | p11_minus_p21
399	fmulp	%st(0),%st(6)	// t0 | t1 | ystepdenominv | xstepdenominv |
400							//  p00_minus_p20 | t0*p11_minus_p21
401	fxch	%st(1)			// t1 | t0 | ystepdenominv | xstepdenominv |
402							//  p00_minus_p20 | t0*p11_minus_p21
403	fld		%st(0)			// t1 | t1 | t0 | ystepdenominv | xstepdenominv |
404							//  p00_minus_p20 | t0*p11_minus_p21
405	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
406							//  xstepdenominv | p00_minus_p20 |
407							//  t0*p11_minus_p21
408	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
409							//  xstepdenominv | p00_minus_p20 |
410							//  t0*p11_minus_p21
411	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
412							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
413							//  t0*p11_minus_p21
414	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
415							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
416							//  t0*p11_minus_p21
417	fmulp	%st(0),%st(5)	// t0*p10_minus_p20 | t1*p01_minus_p21 |
418							//  ystepdenominv | xstepdenominv |
419							//  t1*p00_minus_p20 | t0*p11_minus_p21
420	fxch	%st(5)			// t0*p11_minus_p21 | t1*p01_minus_p21 |
421							//  ystepdenominv | xstepdenominv |
422							//  t1*p00_minus_p20 | t0*p10_minus_p20
423	fsubrp	%st(0),%st(1)	// t1*p01_minus_p21 - t0*p11_minus_p21 |
424							//  ystepdenominv | xstepdenominv |
425							//  t1*p00_minus_p20 | t0*p10_minus_p20
426	fxch	%st(3)			// t1*p00_minus_p20 | ystepdenominv |
427							//  xstepdenominv |
428							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
429							//  t0*p10_minus_p20
430	fsubp	%st(0),%st(4)	// ystepdenominv | xstepdenominv |
431							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
432							//  t1*p00_minus_p20 - t0*p10_minus_p20
433	fxch	%st(1)			// xstepdenominv | ystepdenominv |
434							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
435							//  t1*p00_minus_p20 - t0*p10_minus_p20
436	fmulp	%st(0),%st(2)	// ystepdenominv |
437							//  (t1*p01_minus_p21 - t0*p11_minus_p21) *
438							//  xstepdenominv |
439							//  t1*p00_minus_p20 - t0*p10_minus_p20
440	fmulp	%st(0),%st(2)	// (t1*p01_minus_p21 - t0*p11_minus_p21) *
441							//  xstepdenominv |
442							//  (t1*p00_minus_p20 - t0*p10_minus_p20) *
443							//  ystepdenominv
444	fistpl	C(r_zistepx)	// (t1*p00_minus_p20 - t0*p10_minus_p20) *
445							//  ystepdenominv
446	fistpl	C(r_zistepy)
447
448//	a_sstepxfrac = r_sstepx << 16;
449//	a_tstepxfrac = r_tstepx << 16;
450//
451//	a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
452//			(r_sstepx >> 16);
453
454	movl	C(r_sstepx),%eax
455	movl	C(r_tstepx),%edx
456	shll	$16,%eax
457	shll	$16,%edx
458	movl	%eax,C(a_sstepxfrac)
459	movl	%edx,C(a_tstepxfrac)
460
461	movl	C(r_sstepx),%ecx
462	movl	C(r_tstepx),%eax
463	sarl	$16,%ecx
464	sarl	$16,%eax
465	imull	skinwidth(%esp)
466	addl	%ecx,%eax
467	movl	%eax,C(a_ststepxwhole)
468
469	ret
470
471
472//----------------------------------------------------------------------
473// recursive subdivision affine triangle drawing code
474//
475// not C-callable because of stdcall return
476//----------------------------------------------------------------------
477
478#define lp1	4+16
479#define lp2	8+16
480#define lp3	12+16
481
482.globl C(D_PolysetRecursiveTriangle)
483C(D_PolysetRecursiveTriangle):
484	pushl	%ebp				// preserve caller stack frame pointer
485	pushl	%esi				// preserve register variables
486	pushl	%edi
487	pushl	%ebx
488
489//	int		*temp;
490//	int		d;
491//	int		new[6];
492//	int		i;
493//	int		z;
494//	short	*zbuf;
495	movl	lp2(%esp),%esi
496	movl	lp1(%esp),%ebx
497	movl	lp3(%esp),%edi
498
499//	d = lp2[0] - lp1[0];
500//	if (d < -1 || d > 1)
501//		goto split;
502	movl	0(%esi),%eax
503
504	movl	0(%ebx),%edx
505	movl	4(%esi),%ebp
506
507	subl	%edx,%eax
508	movl	4(%ebx),%ecx
509
510	subl	%ecx,%ebp
511	incl	%eax
512
513	cmpl	$2,%eax
514	ja		LSplit
515
516//	d = lp2[1] - lp1[1];
517//	if (d < -1 || d > 1)
518//		goto split;
519	movl	0(%edi),%eax
520	incl	%ebp
521
522	cmpl	$2,%ebp
523	ja		LSplit
524
525//	d = lp3[0] - lp2[0];
526//	if (d < -1 || d > 1)
527//		goto split2;
528	movl	0(%esi),%edx
529	movl	4(%edi),%ebp
530
531	subl	%edx,%eax
532	movl	4(%esi),%ecx
533
534	subl	%ecx,%ebp
535	incl	%eax
536
537	cmpl	$2,%eax
538	ja		LSplit2
539
540//	d = lp3[1] - lp2[1];
541//	if (d < -1 || d > 1)
542//		goto split2;
543	movl	0(%ebx),%eax
544	incl	%ebp
545
546	cmpl	$2,%ebp
547	ja		LSplit2
548
549//	d = lp1[0] - lp3[0];
550//	if (d < -1 || d > 1)
551//		goto split3;
552	movl	0(%edi),%edx
553	movl	4(%ebx),%ebp
554
555	subl	%edx,%eax
556	movl	4(%edi),%ecx
557
558	subl	%ecx,%ebp
559	incl	%eax
560
561	incl	%ebp
562	movl	%ebx,%edx
563
564	cmpl	$2,%eax
565	ja		LSplit3
566
567//	d = lp1[1] - lp3[1];
568//	if (d < -1 || d > 1)
569//	{
570//split3:
571//		temp = lp1;
572//		lp3 = lp2;
573//		lp1 = lp3;
574//		lp2 = temp;
575//		goto split;
576//	}
577//
578//	return;			// entire tri is filled
579//
580	cmpl	$2,%ebp
581	jna		LDone
582
583LSplit3:
584	movl	%edi,%ebx
585	movl	%esi,%edi
586	movl	%edx,%esi
587	jmp		LSplit
588
589//split2:
590LSplit2:
591
592//	temp = lp1;
593//	lp1 = lp2;
594//	lp2 = lp3;
595//	lp3 = temp;
596	movl	%ebx,%eax
597	movl	%esi,%ebx
598	movl	%edi,%esi
599	movl	%eax,%edi
600
601//split:
602LSplit:
603
604	subl	$24,%esp		// allocate space for a new vertex
605
606//// split this edge
607//	new[0] = (lp1[0] + lp2[0]) >> 1;
608//	new[1] = (lp1[1] + lp2[1]) >> 1;
609//	new[2] = (lp1[2] + lp2[2]) >> 1;
610//	new[3] = (lp1[3] + lp2[3]) >> 1;
611//	new[5] = (lp1[5] + lp2[5]) >> 1;
612	movl	8(%ebx),%eax
613
614	movl	8(%esi),%edx
615	movl	12(%ebx),%ecx
616
617	addl	%edx,%eax
618	movl	12(%esi),%edx
619
620	sarl	$1,%eax
621	addl	%edx,%ecx
622
623	movl	%eax,8(%esp)
624	movl	20(%ebx),%eax
625
626	sarl	$1,%ecx
627	movl	20(%esi),%edx
628
629	movl	%ecx,12(%esp)
630	addl	%edx,%eax
631
632	movl	0(%ebx),%ecx
633	movl	0(%esi),%edx
634
635	sarl	$1,%eax
636	addl	%ecx,%edx
637
638	movl	%eax,20(%esp)
639	movl	4(%ebx),%eax
640
641	sarl	$1,%edx
642	movl	4(%esi),%ebp
643
644	movl	%edx,0(%esp)
645	addl	%eax,%ebp
646
647	sarl	$1,%ebp
648	movl	%ebp,4(%esp)
649
650//// draw the point if splitting a leading edge
651//	if (lp2[1] > lp1[1])
652//		goto nodraw;
653	cmpl	%eax,4(%esi)
654	jg		LNoDraw
655
656//	if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
657//		goto nodraw;
658	movl	0(%esi),%edx
659	jnz		LDraw
660
661	cmpl	%ecx,%edx
662	jl		LNoDraw
663
664LDraw:
665
666// z = new[5] >> 16;
667	movl	20(%esp),%edx
668	movl	4(%esp),%ecx
669
670	sarl	$16,%edx
671	movl	0(%esp),%ebp
672
673//	zbuf = zspantable[new[1]] + new[0];
674	movl	C(zspantable)(,%ecx,4),%eax
675
676//	if (z >= *zbuf)
677//	{
678	cmpw	(%eax,%ebp,2),%dx
679	jnge	LNoDraw
680
681//		int		pix;
682//
683//		*zbuf = z;
684	movw	%dx,(%eax,%ebp,2)
685
686//		pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
687	movl	12(%esp),%eax
688
689	sarl	$16,%eax
690	movl	8(%esp),%edx
691
692	sarl	$16,%edx
693	subl	%ecx,%ecx
694
695	movl	C(skintable)(,%eax,4),%eax
696	movl	4(%esp),%ebp
697
698	movb	(%eax,%edx,),%cl
699	movl	C(d_pcolormap),%edx
700
701	movb	(%edx,%ecx,),%dl
702	movl	0(%esp),%ecx
703
704//		d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
705	movl	C(d_scantable)(,%ebp,4),%eax
706	addl	%eax,%ecx
707	movl	C(d_viewbuffer),%eax
708	movb	%dl,(%eax,%ecx,1)
709
710//	}
711//
712//nodraw:
713LNoDraw:
714
715//// recursively continue
716//	D_PolysetRecursiveTriangle (lp3, lp1, new);
717	pushl	%esp
718	pushl	%ebx
719	pushl	%edi
720	call	C(D_PolysetRecursiveTriangle)
721
722//	D_PolysetRecursiveTriangle (lp3, new, lp2);
723	movl	%esp,%ebx
724	pushl	%esi
725	pushl	%ebx
726	pushl	%edi
727	call	C(D_PolysetRecursiveTriangle)
728	addl	$24,%esp
729
730LDone:
731	popl	%ebx				// restore register variables
732	popl	%edi
733	popl	%esi
734	popl	%ebp				// restore caller stack frame pointer
735	ret		$12
736
737
738//----------------------------------------------------------------------
739// 8-bpp horizontal span drawing code for affine polygons, with smooth
740// shading and no transparency
741//----------------------------------------------------------------------
742
743#define pspans	4+8
744
745.globl C(D_PolysetAff8Start)
746C(D_PolysetAff8Start):
747
748.globl C(D_PolysetDrawSpans8)
749C(D_PolysetDrawSpans8):
750	pushl	%esi				// preserve register variables
751	pushl	%ebx
752
753	movl	pspans(%esp),%esi	// point to the first span descriptor
754	movl	C(r_zistepx),%ecx
755
756	pushl	%ebp				// preserve caller's stack frame
757	pushl	%edi
758
759	rorl	$16,%ecx			// put high 16 bits of 1/z step in low word
760	movl	spanpackage_t_count(%esi),%edx
761
762	movl	%ecx,lzistepx
763
764LSpanLoop:
765
766//		lcount = d_aspancount - pspanpackage->count;
767//
768//		errorterm += erroradjustup;
769//		if (errorterm >= 0)
770//		{
771//			d_aspancount += d_countextrastep;
772//			errorterm -= erroradjustdown;
773//		}
774//		else
775//		{
776//			d_aspancount += ubasestep;
777//		}
778	movl	C(d_aspancount),%eax
779	subl	%edx,%eax
780
781	movl	C(erroradjustup),%edx
782	movl	C(errorterm),%ebx
783	addl	%edx,%ebx
784	js		LNoTurnover
785
786	movl	C(erroradjustdown),%edx
787	movl	C(d_countextrastep),%edi
788	subl	%edx,%ebx
789	movl	C(d_aspancount),%ebp
790	movl	%ebx,C(errorterm)
791	addl	%edi,%ebp
792	movl	%ebp,C(d_aspancount)
793	jmp		LRightEdgeStepped
794
795LNoTurnover:
796	movl	C(d_aspancount),%edi
797	movl	C(ubasestep),%edx
798	movl	%ebx,C(errorterm)
799	addl	%edx,%edi
800	movl	%edi,C(d_aspancount)
801
802LRightEdgeStepped:
803	cmpl	$1,%eax
804
805	jl		LNextSpan
806	jz		LExactlyOneLong
807
808//
809// set up advancetable
810//
811	movl	C(a_ststepxwhole),%ecx
812	movl	C(r_affinetridesc)+atd_skinwidth,%edx
813
814	movl	%ecx,advancetable+4	// advance base in t
815	addl	%edx,%ecx
816
817	movl	%ecx,advancetable	// advance extra in t
818	movl	C(a_tstepxfrac),%ecx
819
820	movw	C(r_lstepx),%cx
821	movl	%eax,%edx			// count
822
823	movl	%ecx,tstep
824	addl	$7,%edx
825
826	shrl	$3,%edx				// count of full and partial loops
827	movl	spanpackage_t_sfrac(%esi),%ebx
828
829	movw	%dx,%bx
830	movl	spanpackage_t_pz(%esi),%ecx
831
832	negl	%eax
833
834	movl	spanpackage_t_pdest(%esi),%edi
835	andl	$7,%eax		// 0->0, 1->7, 2->6, ... , 7->1
836
837	subl	%eax,%edi	// compensate for hardwired offsets
838	subl	%eax,%ecx
839
840	subl	%eax,%ecx
841	movl	spanpackage_t_tfrac(%esi),%edx
842
843	movw	spanpackage_t_light(%esi),%dx
844	movl	spanpackage_t_zi(%esi),%ebp
845
846	rorl	$16,%ebp	// put high 16 bits of 1/z in low word
847	pushl	%esi
848
849	movl	spanpackage_t_ptex(%esi),%esi
850	jmp		aff8entryvec_table(,%eax,4)
851
852// %bx = count of full and partial loops
853// %ebx high word = sfrac
854// %ecx = pz
855// %dx = light
856// %edx high word = tfrac
857// %esi = ptex
858// %edi = pdest
859// %ebp = 1/z
860// tstep low word = C(r_lstepx)
861// tstep high word = C(a_tstepxfrac)
862// C(a_sstepxfrac) low word = 0
863// C(a_sstepxfrac) high word = C(a_sstepxfrac)
864
865LDrawLoop:
866
867// FIXME: do we need to clamp light? We may need at least a buffer bit to
868// keep it from poking into tfrac and causing problems
869
870LDraw8:
871	cmpw	(%ecx),%bp
872	jl		Lp1
873	xorl	%eax,%eax
874	movb	%dh,%ah
875	movb	(%esi),%al
876	movw	%bp,(%ecx)
877	movb	0x12345678(%eax),%al
878LPatch8:
879	movb	%al,(%edi)
880Lp1:
881	addl	tstep,%edx
882	sbbl	%eax,%eax
883	addl	lzistepx,%ebp
884	adcl	$0,%ebp
885	addl	C(a_sstepxfrac),%ebx
886	adcl	advancetable+4(,%eax,4),%esi
887
888LDraw7:
889	cmpw	2(%ecx),%bp
890	jl		Lp2
891	xorl	%eax,%eax
892	movb	%dh,%ah
893	movb	(%esi),%al
894	movw	%bp,2(%ecx)
895	movb	0x12345678(%eax),%al
896LPatch7:
897	movb	%al,1(%edi)
898Lp2:
899	addl	tstep,%edx
900	sbbl	%eax,%eax
901	addl	lzistepx,%ebp
902	adcl	$0,%ebp
903	addl	C(a_sstepxfrac),%ebx
904	adcl	advancetable+4(,%eax,4),%esi
905
906LDraw6:
907	cmpw	4(%ecx),%bp
908	jl		Lp3
909	xorl	%eax,%eax
910	movb	%dh,%ah
911	movb	(%esi),%al
912	movw	%bp,4(%ecx)
913	movb	0x12345678(%eax),%al
914LPatch6:
915	movb	%al,2(%edi)
916Lp3:
917	addl	tstep,%edx
918	sbbl	%eax,%eax
919	addl	lzistepx,%ebp
920	adcl	$0,%ebp
921	addl	C(a_sstepxfrac),%ebx
922	adcl	advancetable+4(,%eax,4),%esi
923
924LDraw5:
925	cmpw	6(%ecx),%bp
926	jl		Lp4
927	xorl	%eax,%eax
928	movb	%dh,%ah
929	movb	(%esi),%al
930	movw	%bp,6(%ecx)
931	movb	0x12345678(%eax),%al
932LPatch5:
933	movb	%al,3(%edi)
934Lp4:
935	addl	tstep,%edx
936	sbbl	%eax,%eax
937	addl	lzistepx,%ebp
938	adcl	$0,%ebp
939	addl	C(a_sstepxfrac),%ebx
940	adcl	advancetable+4(,%eax,4),%esi
941
942LDraw4:
943	cmpw	8(%ecx),%bp
944	jl		Lp5
945	xorl	%eax,%eax
946	movb	%dh,%ah
947	movb	(%esi),%al
948	movw	%bp,8(%ecx)
949	movb	0x12345678(%eax),%al
950LPatch4:
951	movb	%al,4(%edi)
952Lp5:
953	addl	tstep,%edx
954	sbbl	%eax,%eax
955	addl	lzistepx,%ebp
956	adcl	$0,%ebp
957	addl	C(a_sstepxfrac),%ebx
958	adcl	advancetable+4(,%eax,4),%esi
959
960LDraw3:
961	cmpw	10(%ecx),%bp
962	jl		Lp6
963	xorl	%eax,%eax
964	movb	%dh,%ah
965	movb	(%esi),%al
966	movw	%bp,10(%ecx)
967	movb	0x12345678(%eax),%al
968LPatch3:
969	movb	%al,5(%edi)
970Lp6:
971	addl	tstep,%edx
972	sbbl	%eax,%eax
973	addl	lzistepx,%ebp
974	adcl	$0,%ebp
975	addl	C(a_sstepxfrac),%ebx
976	adcl	advancetable+4(,%eax,4),%esi
977
978LDraw2:
979	cmpw	12(%ecx),%bp
980	jl		Lp7
981	xorl	%eax,%eax
982	movb	%dh,%ah
983	movb	(%esi),%al
984	movw	%bp,12(%ecx)
985	movb	0x12345678(%eax),%al
986LPatch2:
987	movb	%al,6(%edi)
988Lp7:
989	addl	tstep,%edx
990	sbbl	%eax,%eax
991	addl	lzistepx,%ebp
992	adcl	$0,%ebp
993	addl	C(a_sstepxfrac),%ebx
994	adcl	advancetable+4(,%eax,4),%esi
995
996LDraw1:
997	cmpw	14(%ecx),%bp
998	jl		Lp8
999	xorl	%eax,%eax
1000	movb	%dh,%ah
1001	movb	(%esi),%al
1002	movw	%bp,14(%ecx)
1003	movb	0x12345678(%eax),%al
1004LPatch1:
1005	movb	%al,7(%edi)
1006Lp8:
1007	addl	tstep,%edx
1008	sbbl	%eax,%eax
1009	addl	lzistepx,%ebp
1010	adcl	$0,%ebp
1011	addl	C(a_sstepxfrac),%ebx
1012	adcl	advancetable+4(,%eax,4),%esi
1013
1014	addl	$8,%edi
1015	addl	$16,%ecx
1016
1017	decw	%bx
1018	jnz		LDrawLoop
1019
1020	popl	%esi				// restore spans pointer
1021LNextSpan:
1022	addl	$(spanpackage_t_size),%esi	// point to next span
1023LNextSpanESISet:
1024	movl	spanpackage_t_count(%esi),%edx
1025	cmpl	$-999999,%edx		// any more spans?
1026	jnz		LSpanLoop			// yes
1027
1028	popl	%edi
1029	popl	%ebp				// restore the caller's stack frame
1030	popl	%ebx				// restore register variables
1031	popl	%esi
1032	ret
1033
1034
1035// draw a one-long span
1036
1037LExactlyOneLong:
1038
1039	movl	spanpackage_t_pz(%esi),%ecx
1040	movl	spanpackage_t_zi(%esi),%ebp
1041
1042	rorl	$16,%ebp	// put high 16 bits of 1/z in low word
1043	movl	spanpackage_t_ptex(%esi),%ebx
1044
1045	cmpw	(%ecx),%bp
1046	jl		LNextSpan
1047	xorl	%eax,%eax
1048	movl	spanpackage_t_pdest(%esi),%edi
1049	movb	spanpackage_t_light+1(%esi),%ah
1050	addl	$(spanpackage_t_size),%esi	// point to next span
1051	movb	(%ebx),%al
1052	movw	%bp,(%ecx)
1053	movb	0x12345678(%eax),%al
1054LPatch9:
1055	movb	%al,(%edi)
1056
1057	jmp		LNextSpanESISet
1058
1059.globl C(D_PolysetAff8End)
1060C(D_PolysetAff8End):
1061
1062
1063#define pcolormap		4
1064
1065.globl C(D_Aff8Patch)
1066C(D_Aff8Patch):
1067	movl	pcolormap(%esp),%eax
1068	movl	%eax,LPatch1-4
1069	movl	%eax,LPatch2-4
1070	movl	%eax,LPatch3-4
1071	movl	%eax,LPatch4-4
1072	movl	%eax,LPatch5-4
1073	movl	%eax,LPatch6-4
1074	movl	%eax,LPatch7-4
1075	movl	%eax,LPatch8-4
1076	movl	%eax,LPatch9-4
1077
1078	ret
1079
1080
1081//----------------------------------------------------------------------
1082// Alias model polygon dispatching code, combined with subdivided affine
1083// triangle drawing code
1084//----------------------------------------------------------------------
1085
1086.globl C(D_PolysetDraw)
1087C(D_PolysetDraw):
1088
1089//	spanpackage_t	spans[DPS_MAXSPANS + 1 +
1090//			((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
1091//						// one extra because of cache line pretouching
1092//
1093//	a_spans = (spanpackage_t *)
1094//			(((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
1095	subl	$(SPAN_SIZE),%esp
1096	movl	%esp,%eax
1097	addl	$(CACHE_SIZE - 1),%eax
1098	andl	$(~(CACHE_SIZE - 1)),%eax
1099	movl	%eax,C(a_spans)
1100
1101//	if (r_affinetridesc.drawtype)
1102//		D_DrawSubdiv ();
1103//	else
1104//		D_DrawNonSubdiv ();
1105	movl	C(r_affinetridesc)+atd_drawtype,%eax
1106	testl	%eax,%eax
1107	jz		C(D_DrawNonSubdiv)
1108
1109	pushl	%ebp				// preserve caller stack frame pointer
1110
1111//	lnumtriangles = r_affinetridesc.numtriangles;
1112	movl	C(r_affinetridesc)+atd_numtriangles,%ebp
1113
1114	pushl	%esi				// preserve register variables
1115	shll	$4,%ebp
1116
1117	pushl	%ebx
1118//	ptri = r_affinetridesc.ptriangles;
1119	movl	C(r_affinetridesc)+atd_ptriangles,%ebx
1120
1121	pushl	%edi
1122
1123//	mtriangle_t		*ptri;
1124//	finalvert_t		*pfv, *index0, *index1, *index2;
1125//	int				i;
1126//	int				lnumtriangles;
1127//	int				s0, s1, s2;
1128
1129//	pfv = r_affinetridesc.pfinalverts;
1130	movl	C(r_affinetridesc)+atd_pfinalverts,%edi
1131
1132//	for (i=0 ; i<lnumtriangles ; i++)
1133//	{
1134
1135Llooptop:
1136
1137//		index0 = pfv + ptri[i].vertindex[0];
1138//		index1 = pfv + ptri[i].vertindex[1];
1139//		index2 = pfv + ptri[i].vertindex[2];
1140	movl	mtri_vertindex-16+0(%ebx,%ebp,),%ecx
1141	movl	mtri_vertindex-16+4(%ebx,%ebp,),%esi
1142
1143	shll	$(fv_shift),%ecx
1144	movl	mtri_vertindex-16+8(%ebx,%ebp,),%edx
1145
1146	shll	$(fv_shift),%esi
1147	addl	%edi,%ecx
1148
1149	shll	$(fv_shift),%edx
1150	addl	%edi,%esi
1151
1152	addl	%edi,%edx
1153
1154//		if (((index0->v[1]-index1->v[1]) *
1155//				(index0->v[0]-index2->v[0]) -
1156//				(index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0)
1157//		{
1158//			continue;
1159//		}
1160//
1161//		d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00];
1162	fildl	fv_v+4(%ecx)	// i0v1
1163	fildl	fv_v+4(%esi)	// i1v1 | i0v1
1164	fildl	fv_v+0(%ecx)	// i0v0 | i1v1 | i0v1
1165	fildl	fv_v+0(%edx)	// i2v0 | i0v0 | i1v1 | i0v1
1166	fxch	%st(2)			// i1v1 | i0v0 | i2v0 | i0v1
1167	fsubr	%st(3),%st(0)	// i0v1-i1v1 | i0v0 | i2v0 | i0v1
1168	fildl	fv_v+0(%esi)	// i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
1169	fxch	%st(2)			// i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
1170	fsub	%st(0),%st(3)	// i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
1171	fildl	fv_v+4(%edx)	// i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
1172	fxch	%st(1)			// i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
1173	fsubp	%st(0),%st(3)	// i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
1174	fxch	%st(1)			// i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
1175	fmulp	%st(0),%st(3)	// i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1
1176	fsubrp	%st(0),%st(3)	// i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1
1177	movl	fv_v+16(%ecx),%eax
1178	andl	$0xFF00,%eax
1179	fmulp	%st(0),%st(2)	// i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1
1180	addl	C(acolormap),%eax
1181	fsubp	%st(0),%st(1)	// (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1)
1182	movl	%eax,C(d_pcolormap)
1183	fstps	Ltemp
1184	movl	Ltemp,%eax
1185	subl	$0x80000001,%eax
1186	jc		Lskip
1187
1188//		if (ptri[i].facesfront)
1189//		{
1190//			D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
1191	movl	mtri_facesfront-16(%ebx,%ebp,),%eax
1192	testl	%eax,%eax
1193	jz		Lfacesback
1194
1195	pushl	%edx
1196	pushl	%esi
1197	pushl	%ecx
1198	call	C(D_PolysetRecursiveTriangle)
1199
1200	subl	$16,%ebp
1201	jnz		Llooptop
1202	jmp		Ldone2
1203
1204//		}
1205//		else
1206//		{
1207Lfacesback:
1208
1209//			s0 = index0->v[2];
1210//			s1 = index1->v[2];
1211//			s2 = index2->v[2];
1212	movl	fv_v+8(%ecx),%eax
1213	pushl	%eax
1214	movl	fv_v+8(%esi),%eax
1215	pushl	%eax
1216	movl	fv_v+8(%edx),%eax
1217	pushl	%eax
1218	pushl	%ecx
1219	pushl	%edx
1220
1221//			if (index0->flags & ALIAS_ONSEAM)
1222//				index0->v[2] += r_affinetridesc.seamfixupX16;
1223	movl	C(r_affinetridesc)+atd_seamfixupX16,%eax
1224	testl	$(ALIAS_ONSEAM),fv_flags(%ecx)
1225	jz		Lp11
1226	addl	%eax,fv_v+8(%ecx)
1227Lp11:
1228
1229//			if (index1->flags & ALIAS_ONSEAM)
1230//				index1->v[2] += r_affinetridesc.seamfixupX16;
1231	testl	$(ALIAS_ONSEAM),fv_flags(%esi)
1232	jz		Lp12
1233	addl	%eax,fv_v+8(%esi)
1234Lp12:
1235
1236//			if (index2->flags & ALIAS_ONSEAM)
1237//				index2->v[2] += r_affinetridesc.seamfixupX16;
1238	testl	$(ALIAS_ONSEAM),fv_flags(%edx)
1239	jz		Lp13
1240	addl	%eax,fv_v+8(%edx)
1241Lp13:
1242
1243//			D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
1244	pushl	%edx
1245	pushl	%esi
1246	pushl	%ecx
1247	call	C(D_PolysetRecursiveTriangle)
1248
1249//			index0->v[2] = s0;
1250//			index1->v[2] = s1;
1251//			index2->v[2] = s2;
1252	popl	%edx
1253	popl	%ecx
1254	popl	%eax
1255	movl	%eax,fv_v+8(%edx)
1256	popl	%eax
1257	movl	%eax,fv_v+8(%esi)
1258	popl	%eax
1259	movl	%eax,fv_v+8(%ecx)
1260
1261//		}
1262//	}
1263Lskip:
1264	subl	$16,%ebp
1265	jnz		Llooptop
1266
1267Ldone2:
1268	popl	%edi				// restore the caller's stack frame
1269	popl	%ebx
1270	popl	%esi				// restore register variables
1271	popl	%ebp
1272
1273	addl	$(SPAN_SIZE),%esp
1274
1275	ret
1276
1277
1278//----------------------------------------------------------------------
1279// Alias model triangle left-edge scanning code
1280//----------------------------------------------------------------------
1281
1282#define height	4+16
1283
1284.globl C(D_PolysetScanLeftEdge)
1285C(D_PolysetScanLeftEdge):
1286	pushl	%ebp				// preserve caller stack frame pointer
1287	pushl	%esi				// preserve register variables
1288	pushl	%edi
1289	pushl	%ebx
1290
1291	movl	height(%esp),%eax
1292	movl	C(d_sfrac),%ecx
1293	andl	$0xFFFF,%eax
1294	movl	C(d_ptex),%ebx
1295	orl		%eax,%ecx
1296	movl	C(d_pedgespanpackage),%esi
1297	movl	C(d_tfrac),%edx
1298	movl	C(d_light),%edi
1299	movl	C(d_zi),%ebp
1300
1301// %eax: scratch
1302// %ebx: d_ptex
1303// %ecx: d_sfrac in high word, count in low word
1304// %edx: d_tfrac
1305// %esi: d_pedgespanpackage, errorterm, scratch alternately
1306// %edi: d_light
1307// %ebp: d_zi
1308
1309//	do
1310//	{
1311
1312LScanLoop:
1313
1314//		d_pedgespanpackage->ptex = ptex;
1315//		d_pedgespanpackage->pdest = d_pdest;
1316//		d_pedgespanpackage->pz = d_pz;
1317//		d_pedgespanpackage->count = d_aspancount;
1318//		d_pedgespanpackage->light = d_light;
1319//		d_pedgespanpackage->zi = d_zi;
1320//		d_pedgespanpackage->sfrac = d_sfrac << 16;
1321//		d_pedgespanpackage->tfrac = d_tfrac << 16;
1322	movl	%ebx,spanpackage_t_ptex(%esi)
1323	movl	C(d_pdest),%eax
1324	movl	%eax,spanpackage_t_pdest(%esi)
1325	movl	C(d_pz),%eax
1326	movl	%eax,spanpackage_t_pz(%esi)
1327	movl	C(d_aspancount),%eax
1328	movl	%eax,spanpackage_t_count(%esi)
1329	movl	%edi,spanpackage_t_light(%esi)
1330	movl	%ebp,spanpackage_t_zi(%esi)
1331	movl	%ecx,spanpackage_t_sfrac(%esi)
1332	movl	%edx,spanpackage_t_tfrac(%esi)
1333
1334// pretouch the next cache line
1335	movb	spanpackage_t_size(%esi),%al
1336
1337//		d_pedgespanpackage++;
1338	addl	$(spanpackage_t_size),%esi
1339	movl	C(erroradjustup),%eax
1340	movl	%esi,C(d_pedgespanpackage)
1341
1342//		errorterm += erroradjustup;
1343	movl	C(errorterm),%esi
1344	addl	%eax,%esi
1345	movl	C(d_pdest),%eax
1346
1347//		if (errorterm >= 0)
1348//		{
1349	js		LNoLeftEdgeTurnover
1350
1351//			errorterm -= erroradjustdown;
1352//			d_pdest += d_pdestextrastep;
1353	subl	C(erroradjustdown),%esi
1354	addl	C(d_pdestextrastep),%eax
1355	movl	%esi,C(errorterm)
1356	movl	%eax,C(d_pdest)
1357
1358//			d_pz += d_pzextrastep;
1359//			d_aspancount += d_countextrastep;
1360//			d_ptex += d_ptexextrastep;
1361//			d_sfrac += d_sfracextrastep;
1362//			d_ptex += d_sfrac >> 16;
1363//			d_sfrac &= 0xFFFF;
1364//			d_tfrac += d_tfracextrastep;
1365	movl	C(d_pz),%eax
1366	movl	C(d_aspancount),%esi
1367	addl	C(d_pzextrastep),%eax
1368	addl	C(d_sfracextrastep),%ecx
1369	adcl	C(d_ptexextrastep),%ebx
1370	addl	C(d_countextrastep),%esi
1371	movl	%eax,C(d_pz)
1372	movl	C(d_tfracextrastep),%eax
1373	movl	%esi,C(d_aspancount)
1374	addl	%eax,%edx
1375
1376//			if (d_tfrac & 0x10000)
1377//			{
1378	jnc		LSkip1
1379
1380//				d_ptex += r_affinetridesc.skinwidth;
1381//				d_tfrac &= 0xFFFF;
1382	addl	C(r_affinetridesc)+atd_skinwidth,%ebx
1383
1384//			}
1385
1386LSkip1:
1387
1388//			d_light += d_lightextrastep;
1389//			d_zi += d_ziextrastep;
1390	addl	C(d_lightextrastep),%edi
1391	addl	C(d_ziextrastep),%ebp
1392
1393//		}
1394	movl	C(d_pedgespanpackage),%esi
1395	decl	%ecx
1396	testl	$0xFFFF,%ecx
1397	jnz		LScanLoop
1398
1399	popl	%ebx
1400	popl	%edi
1401	popl	%esi
1402	popl	%ebp
1403	ret
1404
1405//		else
1406//		{
1407
1408LNoLeftEdgeTurnover:
1409	movl	%esi,C(errorterm)
1410
1411//			d_pdest += d_pdestbasestep;
1412	addl	C(d_pdestbasestep),%eax
1413	movl	%eax,C(d_pdest)
1414
1415//			d_pz += d_pzbasestep;
1416//			d_aspancount += ubasestep;
1417//			d_ptex += d_ptexbasestep;
1418//			d_sfrac += d_sfracbasestep;
1419//			d_ptex += d_sfrac >> 16;
1420//			d_sfrac &= 0xFFFF;
1421	movl	C(d_pz),%eax
1422	movl	C(d_aspancount),%esi
1423	addl	C(d_pzbasestep),%eax
1424	addl	C(d_sfracbasestep),%ecx
1425	adcl	C(d_ptexbasestep),%ebx
1426	addl	C(ubasestep),%esi
1427	movl	%eax,C(d_pz)
1428	movl	%esi,C(d_aspancount)
1429
1430//			d_tfrac += d_tfracbasestep;
1431	movl	C(d_tfracbasestep),%esi
1432	addl	%esi,%edx
1433
1434//			if (d_tfrac & 0x10000)
1435//			{
1436	jnc		LSkip2
1437
1438//				d_ptex += r_affinetridesc.skinwidth;
1439//				d_tfrac &= 0xFFFF;
1440	addl	C(r_affinetridesc)+atd_skinwidth,%ebx
1441
1442//			}
1443
1444LSkip2:
1445
1446//			d_light += d_lightbasestep;
1447//			d_zi += d_zibasestep;
1448	addl	C(d_lightbasestep),%edi
1449	addl	C(d_zibasestep),%ebp
1450
1451//		}
1452//	} while (--height);
1453	movl	C(d_pedgespanpackage),%esi
1454	decl	%ecx
1455	testl	$0xFFFF,%ecx
1456	jnz		LScanLoop
1457
1458	popl	%ebx
1459	popl	%edi
1460	popl	%esi
1461	popl	%ebp
1462	ret
1463
1464
1465//----------------------------------------------------------------------
1466// Alias model vertex drawing code
1467//----------------------------------------------------------------------
1468
1469#define fv			4+8
1470#define	numverts	8+8
1471
1472.globl C(D_PolysetDrawFinalVerts)
1473C(D_PolysetDrawFinalVerts):
1474	pushl	%ebp				// preserve caller stack frame pointer
1475	pushl	%ebx
1476
1477//	int		i, z;
1478//	short	*zbuf;
1479
1480	movl	numverts(%esp),%ecx
1481	movl	fv(%esp),%ebx
1482
1483	pushl	%esi				// preserve register variables
1484	pushl	%edi
1485
1486LFVLoop:
1487
1488//	for (i=0 ; i<numverts ; i++, fv++)
1489//	{
1490//	// valid triangle coordinates for filling can include the bottom and
1491//	// right clip edges, due to the fill rule; these shouldn't be drawn
1492//		if ((fv->v[0] < r_refdef.vrectright) &&
1493//			(fv->v[1] < r_refdef.vrectbottom))
1494//		{
1495	movl	fv_v+0(%ebx),%eax
1496	movl	C(r_refdef)+rd_vrectright,%edx
1497	cmpl	%edx,%eax
1498	jge		LNextVert
1499	movl	fv_v+4(%ebx),%esi
1500	movl	C(r_refdef)+rd_vrectbottom,%edx
1501	cmpl	%edx,%esi
1502	jge		LNextVert
1503
1504//			zbuf = zspantable[fv->v[1]] + fv->v[0];
1505	movl	C(zspantable)(,%esi,4),%edi
1506
1507//			z = fv->v[5]>>16;
1508	movl	fv_v+20(%ebx),%edx
1509	shrl	$16,%edx
1510
1511//			if (z >= *zbuf)
1512//			{
1513//				int		pix;
1514	cmpw	(%edi,%eax,2),%dx
1515	jl		LNextVert
1516
1517//				*zbuf = z;
1518	movw	%dx,(%edi,%eax,2)
1519
1520//				pix = skintable[fv->v[3]>>16][fv->v[2]>>16];
1521	movl	fv_v+12(%ebx),%edi
1522	shrl	$16,%edi
1523	movl	C(skintable)(,%edi,4),%edi
1524	movl	fv_v+8(%ebx),%edx
1525	shrl	$16,%edx
1526	movb	(%edi,%edx),%dl
1527
1528//				pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)];
1529	movl	fv_v+16(%ebx),%edi
1530	andl	$0xFF00,%edi
1531	andl	$0x00FF,%edx
1532	addl	%edx,%edi
1533	movl	C(acolormap),%edx
1534	movb	(%edx,%edi,1),%dl
1535
1536//				d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix;
1537	movl	C(d_scantable)(,%esi,4),%edi
1538	movl	C(d_viewbuffer),%esi
1539	addl	%eax,%edi
1540	movb	%dl,(%esi,%edi)
1541
1542//			}
1543//		}
1544//	}
1545LNextVert:
1546	addl	$(fv_size),%ebx
1547	decl	%ecx
1548	jnz		LFVLoop
1549
1550	popl	%edi
1551	popl	%esi
1552	popl	%ebx
1553	popl	%ebp
1554	ret
1555
1556
1557//----------------------------------------------------------------------
1558// Alias model non-subdivided polygon dispatching code
1559//
1560// not C-callable because of stack buffer cleanup
1561//----------------------------------------------------------------------
1562
1563.globl C(D_DrawNonSubdiv)
1564C(D_DrawNonSubdiv):
1565	pushl	%ebp				// preserve caller stack frame pointer
1566	movl	C(r_affinetridesc)+atd_numtriangles,%ebp
1567	pushl	%ebx
1568	shll	$(mtri_shift),%ebp
1569	pushl	%esi				// preserve register variables
1570	movl	C(r_affinetridesc)+atd_ptriangles,%esi
1571	pushl	%edi
1572
1573//	mtriangle_t		*ptri;
1574//	finalvert_t		*pfv, *index0, *index1, *index2;
1575//	int				i;
1576//	int				lnumtriangles;
1577
1578//	pfv = r_affinetridesc.pfinalverts;
1579//	ptri = r_affinetridesc.ptriangles;
1580//	lnumtriangles = r_affinetridesc.numtriangles;
1581
1582LNDLoop:
1583
1584//	for (i=0 ; i<lnumtriangles ; i++, ptri++)
1585//	{
1586//		index0 = pfv + ptri->vertindex[0];
1587//		index1 = pfv + ptri->vertindex[1];
1588//		index2 = pfv + ptri->vertindex[2];
1589	movl	C(r_affinetridesc)+atd_pfinalverts,%edi
1590	movl	mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx
1591	shll	$(fv_shift),%ecx
1592	movl	mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx
1593	shll	$(fv_shift),%edx
1594	movl	mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx
1595	shll	$(fv_shift),%ebx
1596	addl	%edi,%ecx
1597	addl	%edi,%edx
1598	addl	%edi,%ebx
1599
1600//		d_xdenom = (index0->v[1]-index1->v[1]) *
1601//				(index0->v[0]-index2->v[0]) -
1602//				(index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]);
1603	movl	fv_v+4(%ecx),%eax
1604	movl	fv_v+0(%ecx),%esi
1605	subl	fv_v+4(%edx),%eax
1606	subl	fv_v+0(%ebx),%esi
1607	imull	%esi,%eax
1608	movl	fv_v+0(%ecx),%esi
1609	movl	fv_v+4(%ecx),%edi
1610	subl	fv_v+0(%edx),%esi
1611	subl	fv_v+4(%ebx),%edi
1612	imull	%esi,%edi
1613	subl	%edi,%eax
1614
1615//		if (d_xdenom >= 0)
1616//		{
1617//			continue;
1618	jns		LNextTri
1619
1620//		}
1621
1622	movl	%eax,C(d_xdenom)
1623	fildl	C(d_xdenom)
1624
1625//		r_p0[0] = index0->v[0];		// u
1626//		r_p0[1] = index0->v[1];		// v
1627//		r_p0[2] = index0->v[2];		// s
1628//		r_p0[3] = index0->v[3];		// t
1629//		r_p0[4] = index0->v[4];		// light
1630//		r_p0[5] = index0->v[5];		// iz
1631	movl	fv_v+0(%ecx),%eax
1632	movl	fv_v+4(%ecx),%esi
1633	movl	%eax,C(r_p0)+0
1634	movl	%esi,C(r_p0)+4
1635	movl	fv_v+8(%ecx),%eax
1636	movl	fv_v+12(%ecx),%esi
1637	movl	%eax,C(r_p0)+8
1638	movl	%esi,C(r_p0)+12
1639	movl	fv_v+16(%ecx),%eax
1640	movl	fv_v+20(%ecx),%esi
1641	movl	%eax,C(r_p0)+16
1642	movl	%esi,C(r_p0)+20
1643
1644	fdivrs	float_1
1645
1646//		r_p1[0] = index1->v[0];
1647//		r_p1[1] = index1->v[1];
1648//		r_p1[2] = index1->v[2];
1649//		r_p1[3] = index1->v[3];
1650//		r_p1[4] = index1->v[4];
1651//		r_p1[5] = index1->v[5];
1652	movl	fv_v+0(%edx),%eax
1653	movl	fv_v+4(%edx),%esi
1654	movl	%eax,C(r_p1)+0
1655	movl	%esi,C(r_p1)+4
1656	movl	fv_v+8(%edx),%eax
1657	movl	fv_v+12(%edx),%esi
1658	movl	%eax,C(r_p1)+8
1659	movl	%esi,C(r_p1)+12
1660	movl	fv_v+16(%edx),%eax
1661	movl	fv_v+20(%edx),%esi
1662	movl	%eax,C(r_p1)+16
1663	movl	%esi,C(r_p1)+20
1664
1665//		r_p2[0] = index2->v[0];
1666//		r_p2[1] = index2->v[1];
1667//		r_p2[2] = index2->v[2];
1668//		r_p2[3] = index2->v[3];
1669//		r_p2[4] = index2->v[4];
1670//		r_p2[5] = index2->v[5];
1671	movl	fv_v+0(%ebx),%eax
1672	movl	fv_v+4(%ebx),%esi
1673	movl	%eax,C(r_p2)+0
1674	movl	%esi,C(r_p2)+4
1675	movl	fv_v+8(%ebx),%eax
1676	movl	fv_v+12(%ebx),%esi
1677	movl	%eax,C(r_p2)+8
1678	movl	%esi,C(r_p2)+12
1679	movl	fv_v+16(%ebx),%eax
1680	movl	fv_v+20(%ebx),%esi
1681	movl	%eax,C(r_p2)+16
1682	movl	C(r_affinetridesc)+atd_ptriangles,%edi
1683	movl	%esi,C(r_p2)+20
1684	movl	mtri_facesfront-mtri_size(%edi,%ebp,1),%eax
1685
1686//		if (!ptri->facesfront)
1687//		{
1688	testl	%eax,%eax
1689	jnz		LFacesFront
1690
1691//			if (index0->flags & ALIAS_ONSEAM)
1692//				r_p0[2] += r_affinetridesc.seamfixupX16;
1693	movl	fv_flags(%ecx),%eax
1694	movl	fv_flags(%edx),%esi
1695	movl	fv_flags(%ebx),%edi
1696	testl	$(ALIAS_ONSEAM),%eax
1697	movl	C(r_affinetridesc)+atd_seamfixupX16,%eax
1698	jz		LOnseamDone0
1699	addl	%eax,C(r_p0)+8
1700LOnseamDone0:
1701
1702//			if (index1->flags & ALIAS_ONSEAM)
1703// 				r_p1[2] += r_affinetridesc.seamfixupX16;
1704	testl	$(ALIAS_ONSEAM),%esi
1705	jz		LOnseamDone1
1706	addl	%eax,C(r_p1)+8
1707LOnseamDone1:
1708
1709//			if (index2->flags & ALIAS_ONSEAM)
1710//				r_p2[2] += r_affinetridesc.seamfixupX16;
1711	testl	$(ALIAS_ONSEAM),%edi
1712	jz		LOnseamDone2
1713	addl	%eax,C(r_p2)+8
1714LOnseamDone2:
1715
1716//		}
1717
1718LFacesFront:
1719
1720	fstps	C(d_xdenom)
1721
1722//		D_PolysetSetEdgeTable ();
1723//		D_RasterizeAliasPolySmooth ();
1724		call	C(D_PolysetSetEdgeTable)
1725		call	C(D_RasterizeAliasPolySmooth)
1726
1727LNextTri:
1728		movl	C(r_affinetridesc)+atd_ptriangles,%esi
1729		subl	$16,%ebp
1730		jnz		LNDLoop
1731//	}
1732
1733	popl	%edi
1734	popl	%esi
1735	popl	%ebx
1736	popl	%ebp
1737
1738	addl	$(SPAN_SIZE),%esp
1739
1740	ret
1741
1742
1743#endif	// id386
1744
1745