1/*
2Copyright (C) 1996-1997 Id Software, Inc.
3
4This program is free software; you can redistribute it and/or
5modify it under the terms of the GNU General Public License
6as published by the Free Software Foundation; either version 2
7of the License, or (at your option) any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
13See the GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program; if not, write to the Free Software
17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
18
19*/
20//
21// d_spr8.s
22// x86 assembly-language horizontal 8-bpp transparent span-drawing code.
23//
24
25#include "asm_i386.h"
26#include "quakeasm.h"
27#include "asm_draw.h"
28
29#if id386
30
31//----------------------------------------------------------------------
32// 8-bpp horizontal span drawing code for polygons, with transparency.
33//----------------------------------------------------------------------
34
35	.text
36
37// out-of-line, rarely-needed clamping code
38
39LClampHigh0:
40	movl	C(bbextents),%esi
41	jmp		LClampReentry0
42LClampHighOrLow0:
43	jg		LClampHigh0
44	xorl	%esi,%esi
45	jmp		LClampReentry0
46
47LClampHigh1:
48	movl	C(bbextentt),%edx
49	jmp		LClampReentry1
50LClampHighOrLow1:
51	jg		LClampHigh1
52	xorl	%edx,%edx
53	jmp		LClampReentry1
54
55LClampLow2:
56	movl	$2048,%ebp
57	jmp		LClampReentry2
58LClampHigh2:
59	movl	C(bbextents),%ebp
60	jmp		LClampReentry2
61
62LClampLow3:
63	movl	$2048,%ecx
64	jmp		LClampReentry3
65LClampHigh3:
66	movl	C(bbextentt),%ecx
67	jmp		LClampReentry3
68
69LClampLow4:
70	movl	$2048,%eax
71	jmp		LClampReentry4
72LClampHigh4:
73	movl	C(bbextents),%eax
74	jmp		LClampReentry4
75
76LClampLow5:
77	movl	$2048,%ebx
78	jmp		LClampReentry5
79LClampHigh5:
80	movl	C(bbextentt),%ebx
81	jmp		LClampReentry5
82
83
84#define pspans	4+16
85
86	.align 4
87.globl C(D_SpriteDrawSpans)
88C(D_SpriteDrawSpans):
89	pushl	%ebp				// preserve caller's stack frame
90	pushl	%edi
91	pushl	%esi				// preserve register variables
92	pushl	%ebx
93
94//
95// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
96// and span list pointers, and 1/z step in 0.32 fixed-point
97//
98// FIXME: any overlap from rearranging?
99	flds	C(d_sdivzstepu)
100	fmuls	fp_8
101	movl	C(cacheblock),%edx
102	flds	C(d_tdivzstepu)
103	fmuls	fp_8
104	movl	pspans(%esp),%ebx	// point to the first span descriptor
105	flds	C(d_zistepu)
106	fmuls	fp_8
107	movl	%edx,pbase			// pbase = cacheblock
108	flds	C(d_zistepu)
109	fmuls	fp_64kx64k
110	fxch	%st(3)
111	fstps	sdivz8stepu
112	fstps	zi8stepu
113	fstps	tdivz8stepu
114	fistpl	izistep
115	movl	izistep,%eax
116	rorl	$16,%eax		// put upper 16 bits in low word
117	movl	sspan_t_count(%ebx),%ecx
118	movl	%eax,izistep
119
120	cmpl	$0,%ecx
121	jle		LNextSpan
122
123LSpanLoop:
124
125//
126// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
127// initial s and t values
128//
129// FIXME: pipeline FILD?
130	fildl	sspan_t_v(%ebx)
131	fildl	sspan_t_u(%ebx)
132
133	fld		%st(1)			// dv | du | dv
134	fmuls	C(d_sdivzstepv)	// dv*d_sdivzstepv | du | dv
135	fld		%st(1)			// du | dv*d_sdivzstepv | du | dv
136	fmuls	C(d_sdivzstepu)	// du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
137	fld		%st(2)			// du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
138	fmuls	C(d_tdivzstepu)	// du*d_tdivzstepu | du*d_sdivzstepu |
139							//  dv*d_sdivzstepv | du | dv
140	fxch	%st(1)			// du*d_sdivzstepu | du*d_tdivzstepu |
141							//  dv*d_sdivzstepv | du | dv
142	faddp	%st(0),%st(2)	// du*d_tdivzstepu |
143							//  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
144	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
145							//  du*d_tdivzstepu | du | dv
146	fld		%st(3)			// dv | du*d_sdivzstepu + dv*d_sdivzstepv |
147							//  du*d_tdivzstepu | du | dv
148	fmuls	C(d_tdivzstepv)	// dv*d_tdivzstepv |
149							//  du*d_sdivzstepu + dv*d_sdivzstepv |
150							//  du*d_tdivzstepu | du | dv
151	fxch	%st(1)			// du*d_sdivzstepu + dv*d_sdivzstepv |
152							//  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
153	fadds	C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv +
154							//  du*d_sdivzstepu; stays in %st(2) at end
155	fxch	%st(4)			// dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
156							//  s/z
157	fmuls	C(d_zistepv)		// dv*d_zistepv | dv*d_tdivzstepv |
158							//  du*d_tdivzstepu | du | s/z
159	fxch	%st(1)			// dv*d_tdivzstepv |  dv*d_zistepv |
160							//  du*d_tdivzstepu | du | s/z
161	faddp	%st(0),%st(2)	// dv*d_zistepv |
162							//  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
163	fxch	%st(2)			// du | dv*d_tdivzstepv + du*d_tdivzstepu |
164							//  dv*d_zistepv | s/z
165	fmuls	C(d_zistepu)		// du*d_zistepu |
166							//  dv*d_tdivzstepv + du*d_tdivzstepu |
167							//  dv*d_zistepv | s/z
168	fxch	%st(1)			// dv*d_tdivzstepv + du*d_tdivzstepu |
169							//  du*d_zistepu | dv*d_zistepv | s/z
170	fadds	C(d_tdivzorigin)	// tdivz = d_tdivzorigin + dv*d_tdivzstepv +
171							//  du*d_tdivzstepu; stays in %st(1) at end
172	fxch	%st(2)			// dv*d_zistepv | du*d_zistepu | t/z | s/z
173	faddp	%st(0),%st(1)	// dv*d_zistepv + du*d_zistepu | t/z | s/z
174
175	flds	fp_64k			// fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
176	fxch	%st(1)			// dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
177	fadds	C(d_ziorigin)		// zi = d_ziorigin + dv*d_zistepv +
178							//  du*d_zistepu; stays in %st(0) at end
179							// 1/z | fp_64k | t/z | s/z
180
181	fld		%st(0)			// FIXME: get rid of stall on FMUL?
182	fmuls	fp_64kx64k
183	fxch	%st(1)
184
185//
186// calculate and clamp s & t
187//
188	fdivr	%st(0),%st(2)	// 1/z | z*64k | t/z | s/z
189	fxch	%st(1)
190
191	fistpl	izi				// 0.32 fixed-point 1/z
192	movl	izi,%ebp
193
194//
195// set pz to point to the first z-buffer pixel in the span
196//
197	rorl	$16,%ebp		// put upper 16 bits in low word
198	movl	sspan_t_v(%ebx),%eax
199	movl	%ebp,izi
200	movl	sspan_t_u(%ebx),%ebp
201	imull	C(d_zrowbytes)
202	shll	$1,%ebp					// a word per pixel
203	addl	C(d_pzbuffer),%eax
204	addl	%ebp,%eax
205	movl	%eax,pz
206
207//
208// point %edi to the first pixel in the span
209//
210	movl	C(d_viewbuffer),%ebp
211	movl	sspan_t_v(%ebx),%eax
212	pushl	%ebx		// preserve spans pointer
213	movl	C(tadjust),%edx
214	movl	C(sadjust),%esi
215	movl	C(d_scantable)(,%eax,4),%edi	// v * screenwidth
216	addl	%ebp,%edi
217	movl	sspan_t_u(%ebx),%ebp
218	addl	%ebp,%edi				// pdest = &pdestspan[scans->u];
219
220//
221// now start the FDIV for the end of the span
222//
223	cmpl	$8,%ecx
224	ja		LSetupNotLast1
225
226	decl	%ecx
227	jz		LCleanup1		// if only one pixel, no need to start an FDIV
228	movl	%ecx,spancountminus1
229
230// finish up the s and t calcs
231	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
232
233	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
234	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
235	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
236	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
237	fxch	%st(1)			// s | t | 1/z | t/z | s/z
238	fistpl	s				// 1/z | t | t/z | s/z
239	fistpl	t				// 1/z | t/z | s/z
240
241	fildl	spancountminus1
242
243	flds	C(d_tdivzstepu)	// _d_tdivzstepu | spancountminus1
244	flds	C(d_zistepu)	// _d_zistepu | _d_tdivzstepu | spancountminus1
245	fmul	%st(2),%st(0)	// _d_zistepu*scm1 | _d_tdivzstepu | scm1
246	fxch	%st(1)			// _d_tdivzstepu | _d_zistepu*scm1 | scm1
247	fmul	%st(2),%st(0)	// _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
248	fxch	%st(2)			// scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
249	fmuls	C(d_sdivzstepu)	// _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
250							//  _d_tdivzstepu*scm1
251	fxch	%st(1)			// _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
252							//  _d_tdivzstepu*scm1
253	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
254	fxch	%st(1)			// _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
255	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1
256	faddp	%st(0),%st(3)
257
258	flds	fp_64k
259	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
260							//  overlap
261	jmp		LFDIVInFlight1
262
263LCleanup1:
264// finish up the s and t calcs
265	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
266
267	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
268	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
269	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
270	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
271	fxch	%st(1)			// s | t | 1/z | t/z | s/z
272	fistpl	s				// 1/z | t | t/z | s/z
273	fistpl	t				// 1/z | t/z | s/z
274	jmp		LFDIVInFlight1
275
276	.align	4
277LSetupNotLast1:
278// finish up the s and t calcs
279	fxch	%st(1)			// z*64k | 1/z | t/z | s/z
280
281	fld		%st(0)			// z*64k | z*64k | 1/z | t/z | s/z
282	fmul	%st(4),%st(0)	// s | z*64k | 1/z | t/z | s/z
283	fxch	%st(1)			// z*64k | s | 1/z | t/z | s/z
284	fmul	%st(3),%st(0)	// t | s | 1/z | t/z | s/z
285	fxch	%st(1)			// s | t | 1/z | t/z | s/z
286	fistpl	s				// 1/z | t | t/z | s/z
287	fistpl	t				// 1/z | t/z | s/z
288
289	fadds	zi8stepu
290	fxch	%st(2)
291	fadds	sdivz8stepu
292	fxch	%st(2)
293	flds	tdivz8stepu
294	faddp	%st(0),%st(2)
295	flds	fp_64k
296	fdiv	%st(1),%st(0)	// z = 1/1/z
297							// this is what we've gone to all this trouble to
298							//  overlap
299LFDIVInFlight1:
300
301	addl	s,%esi
302	addl	t,%edx
303	movl	C(bbextents),%ebx
304	movl	C(bbextentt),%ebp
305	cmpl	%ebx,%esi
306	ja		LClampHighOrLow0
307LClampReentry0:
308	movl	%esi,s
309	movl	pbase,%ebx
310	shll	$16,%esi
311	cmpl	%ebp,%edx
312	movl	%esi,sfracf
313	ja		LClampHighOrLow1
314LClampReentry1:
315	movl	%edx,t
316	movl	s,%esi					// sfrac = scans->sfrac;
317	shll	$16,%edx
318	movl	t,%eax					// tfrac = scans->tfrac;
319	sarl	$16,%esi
320	movl	%edx,tfracf
321
322//
323// calculate the texture starting address
324//
325	sarl	$16,%eax
326	addl	%ebx,%esi
327	imull	C(cachewidth),%eax		// (tfrac >> 16) * cachewidth
328	addl	%eax,%esi				// psource = pbase + (sfrac >> 16) +
329									//           ((tfrac >> 16) * cachewidth);
330
331//
332// determine whether last span or not
333//
334	cmpl	$8,%ecx
335	jna		LLastSegment
336
337//
338// not the last segment; do full 8-wide segment
339//
340LNotLastSegment:
341
342//
343// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
344// get there
345//
346
347// pick up after the FDIV that was left in flight previously
348
349	fld		%st(0)			// duplicate it
350	fmul	%st(4),%st(0)	// s = s/z * z
351	fxch	%st(1)
352	fmul	%st(3),%st(0)	// t = t/z * z
353	fxch	%st(1)
354	fistpl	snext
355	fistpl	tnext
356	movl	snext,%eax
357	movl	tnext,%edx
358
359	subl	$8,%ecx		// count off this segments' pixels
360	movl	C(sadjust),%ebp
361	pushl	%ecx		// remember count of remaining pixels
362	movl	C(tadjust),%ecx
363
364	addl	%eax,%ebp
365	addl	%edx,%ecx
366
367	movl	C(bbextents),%eax
368	movl	C(bbextentt),%edx
369
370	cmpl	$2048,%ebp
371	jl		LClampLow2
372	cmpl	%eax,%ebp
373	ja		LClampHigh2
374LClampReentry2:
375
376	cmpl	$2048,%ecx
377	jl		LClampLow3
378	cmpl	%edx,%ecx
379	ja		LClampHigh3
380LClampReentry3:
381
382	movl	%ebp,snext
383	movl	%ecx,tnext
384
385	subl	s,%ebp
386	subl	t,%ecx
387
388//
389// set up advancetable
390//
391	movl	%ecx,%eax
392	movl	%ebp,%edx
393	sarl	$19,%edx			// sstep >>= 16;
394	movl	C(cachewidth),%ebx
395	sarl	$19,%eax			// tstep >>= 16;
396	jz		LIsZero
397	imull	%ebx,%eax			// (tstep >> 16) * cachewidth;
398LIsZero:
399	addl	%edx,%eax			// add in sstep
400								// (tstep >> 16) * cachewidth + (sstep >> 16);
401	movl	tfracf,%edx
402	movl	%eax,advancetable+4	// advance base in t
403	addl	%ebx,%eax			// ((tstep >> 16) + 1) * cachewidth +
404								//  (sstep >> 16);
405	shll	$13,%ebp			// left-justify sstep fractional part
406	movl	%ebp,sstep
407	movl	sfracf,%ebx
408	shll	$13,%ecx			// left-justify tstep fractional part
409	movl	%eax,advancetable	// advance extra in t
410	movl	%ecx,tstep
411
412	movl	pz,%ecx
413	movl	izi,%ebp
414
415	cmpw	(%ecx),%bp
416	jl		Lp1
417	movb	(%esi),%al			// get first source texel
418	cmpb	$(TRANSPARENT_COLOR),%al
419	jz		Lp1
420	movw	%bp,(%ecx)
421	movb	%al,(%edi)			// store first dest pixel
422Lp1:
423	addl	izistep,%ebp
424	adcl	$0,%ebp
425	addl	tstep,%edx			// advance tfrac fractional part by tstep frac
426
427	sbbl	%eax,%eax			// turn tstep carry into -1 (0 if none)
428	addl	sstep,%ebx			// advance sfrac fractional part by sstep frac
429	adcl	advancetable+4(,%eax,4),%esi	// point to next source texel
430
431	cmpw	2(%ecx),%bp
432	jl		Lp2
433	movb	(%esi),%al
434	cmpb	$(TRANSPARENT_COLOR),%al
435	jz		Lp2
436	movw	%bp,2(%ecx)
437	movb	%al,1(%edi)
438Lp2:
439	addl	izistep,%ebp
440	adcl	$0,%ebp
441	addl	tstep,%edx
442	sbbl	%eax,%eax
443	addl	sstep,%ebx
444	adcl	advancetable+4(,%eax,4),%esi
445
446	cmpw	4(%ecx),%bp
447	jl		Lp3
448	movb	(%esi),%al
449	cmpb	$(TRANSPARENT_COLOR),%al
450	jz		Lp3
451	movw	%bp,4(%ecx)
452	movb	%al,2(%edi)
453Lp3:
454	addl	izistep,%ebp
455	adcl	$0,%ebp
456	addl	tstep,%edx
457	sbbl	%eax,%eax
458	addl	sstep,%ebx
459	adcl	advancetable+4(,%eax,4),%esi
460
461	cmpw	6(%ecx),%bp
462	jl		Lp4
463	movb	(%esi),%al
464	cmpb	$(TRANSPARENT_COLOR),%al
465	jz		Lp4
466	movw	%bp,6(%ecx)
467	movb	%al,3(%edi)
468Lp4:
469	addl	izistep,%ebp
470	adcl	$0,%ebp
471	addl	tstep,%edx
472	sbbl	%eax,%eax
473	addl	sstep,%ebx
474	adcl	advancetable+4(,%eax,4),%esi
475
476	cmpw	8(%ecx),%bp
477	jl		Lp5
478	movb	(%esi),%al
479	cmpb	$(TRANSPARENT_COLOR),%al
480	jz		Lp5
481	movw	%bp,8(%ecx)
482	movb	%al,4(%edi)
483Lp5:
484	addl	izistep,%ebp
485	adcl	$0,%ebp
486	addl	tstep,%edx
487	sbbl	%eax,%eax
488	addl	sstep,%ebx
489	adcl	advancetable+4(,%eax,4),%esi
490
491//
492// start FDIV for end of next segment in flight, so it can overlap
493//
494	popl	%eax
495	cmpl	$8,%eax			// more than one segment after this?
496	ja		LSetupNotLast2	// yes
497
498	decl	%eax
499	jz		LFDIVInFlight2	// if only one pixel, no need to start an FDIV
500	movl	%eax,spancountminus1
501	fildl	spancountminus1
502
503	flds	C(d_zistepu)		// _d_zistepu | spancountminus1
504	fmul	%st(1),%st(0)	// _d_zistepu*scm1 | scm1
505	flds	C(d_tdivzstepu)	// _d_tdivzstepu | _d_zistepu*scm1 | scm1
506	fmul	%st(2),%st(0)	// _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
507	fxch	%st(1)			// _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
508	faddp	%st(0),%st(3)	// _d_tdivzstepu*scm1 | scm1
509	fxch	%st(1)			// scm1 | _d_tdivzstepu*scm1
510	fmuls	C(d_sdivzstepu)	// _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
511	fxch	%st(1)			// _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
512	faddp	%st(0),%st(3)	// _d_sdivzstepu*scm1
513	flds	fp_64k			// 64k | _d_sdivzstepu*scm1
514	fxch	%st(1)			// _d_sdivzstepu*scm1 | 64k
515	faddp	%st(0),%st(4)	// 64k
516
517	fdiv	%st(1),%st(0)	// this is what we've gone to all this trouble to
518							//  overlap
519	jmp		LFDIVInFlight2
520
521	.align	4
522LSetupNotLast2:
523	fadds	zi8stepu
524	fxch	%st(2)
525	fadds	sdivz8stepu
526	fxch	%st(2)
527	flds	tdivz8stepu
528	faddp	%st(0),%st(2)
529	flds	fp_64k
530	fdiv	%st(1),%st(0)	// z = 1/1/z
531							// this is what we've gone to all this trouble to
532							//  overlap
533LFDIVInFlight2:
534	pushl	%eax
535
536	cmpw	10(%ecx),%bp
537	jl		Lp6
538	movb	(%esi),%al
539	cmpb	$(TRANSPARENT_COLOR),%al
540	jz		Lp6
541	movw	%bp,10(%ecx)
542	movb	%al,5(%edi)
543Lp6:
544	addl	izistep,%ebp
545	adcl	$0,%ebp
546	addl	tstep,%edx
547	sbbl	%eax,%eax
548	addl	sstep,%ebx
549	adcl	advancetable+4(,%eax,4),%esi
550
551	cmpw	12(%ecx),%bp
552	jl		Lp7
553	movb	(%esi),%al
554	cmpb	$(TRANSPARENT_COLOR),%al
555	jz		Lp7
556	movw	%bp,12(%ecx)
557	movb	%al,6(%edi)
558Lp7:
559	addl	izistep,%ebp
560	adcl	$0,%ebp
561	addl	tstep,%edx
562	sbbl	%eax,%eax
563	addl	sstep,%ebx
564	adcl	advancetable+4(,%eax,4),%esi
565
566	cmpw	14(%ecx),%bp
567	jl		Lp8
568	movb	(%esi),%al
569	cmpb	$(TRANSPARENT_COLOR),%al
570	jz		Lp8
571	movw	%bp,14(%ecx)
572	movb	%al,7(%edi)
573Lp8:
574	addl	izistep,%ebp
575	adcl	$0,%ebp
576	addl	tstep,%edx
577	sbbl	%eax,%eax
578	addl	sstep,%ebx
579	adcl	advancetable+4(,%eax,4),%esi
580
581	addl	$8,%edi
582	addl	$16,%ecx
583	movl	%edx,tfracf
584	movl	snext,%edx
585	movl	%ebx,sfracf
586	movl	tnext,%ebx
587	movl	%edx,s
588	movl	%ebx,t
589
590	movl	%ecx,pz
591	movl	%ebp,izi
592
593	popl	%ecx				// retrieve count
594
595//
596// determine whether last span or not
597//
598	cmpl	$8,%ecx				// are there multiple segments remaining?
599	ja		LNotLastSegment		// yes
600
601//
602// last segment of scan
603//
604LLastSegment:
605
606//
607// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
608// get there. The number of pixels left is variable, and we want to land on the
609// last pixel, not step one past it, so we can't run into arithmetic problems
610//
611	testl	%ecx,%ecx
612	jz		LNoSteps		// just draw the last pixel and we're done
613
614// pick up after the FDIV that was left in flight previously
615
616
617	fld		%st(0)			// duplicate it
618	fmul	%st(4),%st(0)	// s = s/z * z
619	fxch	%st(1)
620	fmul	%st(3),%st(0)	// t = t/z * z
621	fxch	%st(1)
622	fistpl	snext
623	fistpl	tnext
624
625	movl	C(tadjust),%ebx
626	movl	C(sadjust),%eax
627
628	addl	snext,%eax
629	addl	tnext,%ebx
630
631	movl	C(bbextents),%ebp
632	movl	C(bbextentt),%edx
633
634	cmpl	$2048,%eax
635	jl		LClampLow4
636	cmpl	%ebp,%eax
637	ja		LClampHigh4
638LClampReentry4:
639	movl	%eax,snext
640
641	cmpl	$2048,%ebx
642	jl		LClampLow5
643	cmpl	%edx,%ebx
644	ja		LClampHigh5
645LClampReentry5:
646
647	cmpl	$1,%ecx			// don't bother
648	je		LOnlyOneStep	// if two pixels in segment, there's only one step,
649							//  of the segment length
650	subl	s,%eax
651	subl	t,%ebx
652
653	addl	%eax,%eax		// convert to 15.17 format so multiply by 1.31
654	addl	%ebx,%ebx		//  reciprocal yields 16.48
655	imull	reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
656	movl	%edx,%ebp
657
658	movl	%ebx,%eax
659	imull	reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
660
661LSetEntryvec:
662//
663// set up advancetable
664//
665	movl	spr8entryvec_table(,%ecx,4),%ebx
666	movl	%edx,%eax
667	pushl	%ebx				// entry point into code for RET later
668	movl	%ebp,%ecx
669	sarl	$16,%ecx			// sstep >>= 16;
670	movl	C(cachewidth),%ebx
671	sarl	$16,%edx			// tstep >>= 16;
672	jz		LIsZeroLast
673	imull	%ebx,%edx			// (tstep >> 16) * cachewidth;
674LIsZeroLast:
675	addl	%ecx,%edx			// add in sstep
676								// (tstep >> 16) * cachewidth + (sstep >> 16);
677	movl	tfracf,%ecx
678	movl	%edx,advancetable+4	// advance base in t
679	addl	%ebx,%edx			// ((tstep >> 16) + 1) * cachewidth +
680								//  (sstep >> 16);
681	shll	$16,%ebp			// left-justify sstep fractional part
682	movl	sfracf,%ebx
683	shll	$16,%eax			// left-justify tstep fractional part
684	movl	%edx,advancetable	// advance extra in t
685
686	movl	%eax,tstep
687	movl	%ebp,sstep
688	movl	%ecx,%edx
689
690	movl	pz,%ecx
691	movl	izi,%ebp
692
693	ret							// jump to the number-of-pixels handler
694
695//----------------------------------------
696
697LNoSteps:
698	movl	pz,%ecx
699	subl	$7,%edi			// adjust for hardwired offset
700	subl	$14,%ecx
701	jmp		LEndSpan
702
703
704LOnlyOneStep:
705	subl	s,%eax
706	subl	t,%ebx
707	movl	%eax,%ebp
708	movl	%ebx,%edx
709	jmp		LSetEntryvec
710
711//----------------------------------------
712
713.globl	Spr8Entry2_8
714Spr8Entry2_8:
715	subl	$6,%edi		// adjust for hardwired offsets
716	subl	$12,%ecx
717	movb	(%esi),%al
718	jmp		LLEntry2_8
719
720//----------------------------------------
721
722.globl	Spr8Entry3_8
723Spr8Entry3_8:
724	subl	$5,%edi		// adjust for hardwired offsets
725	subl	$10,%ecx
726	jmp		LLEntry3_8
727
728//----------------------------------------
729
730.globl	Spr8Entry4_8
731Spr8Entry4_8:
732	subl	$4,%edi		// adjust for hardwired offsets
733	subl	$8,%ecx
734	jmp		LLEntry4_8
735
736//----------------------------------------
737
738.globl	Spr8Entry5_8
739Spr8Entry5_8:
740	subl	$3,%edi		// adjust for hardwired offsets
741	subl	$6,%ecx
742	jmp		LLEntry5_8
743
744//----------------------------------------
745
746.globl	Spr8Entry6_8
747Spr8Entry6_8:
748	subl	$2,%edi		// adjust for hardwired offsets
749	subl	$4,%ecx
750	jmp		LLEntry6_8
751
752//----------------------------------------
753
754.globl	Spr8Entry7_8
755Spr8Entry7_8:
756	decl	%edi		// adjust for hardwired offsets
757	subl	$2,%ecx
758	jmp		LLEntry7_8
759
760//----------------------------------------
761
762.globl	Spr8Entry8_8
763Spr8Entry8_8:
764	cmpw	(%ecx),%bp
765	jl		Lp9
766	movb	(%esi),%al
767	cmpb	$(TRANSPARENT_COLOR),%al
768	jz		Lp9
769	movw	%bp,(%ecx)
770	movb	%al,(%edi)
771Lp9:
772	addl	izistep,%ebp
773	adcl	$0,%ebp
774	addl	tstep,%edx
775	sbbl	%eax,%eax
776	addl	sstep,%ebx
777	adcl	advancetable+4(,%eax,4),%esi
778LLEntry7_8:
779	cmpw	2(%ecx),%bp
780	jl		Lp10
781	movb	(%esi),%al
782	cmpb	$(TRANSPARENT_COLOR),%al
783	jz		Lp10
784	movw	%bp,2(%ecx)
785	movb	%al,1(%edi)
786Lp10:
787	addl	izistep,%ebp
788	adcl	$0,%ebp
789	addl	tstep,%edx
790	sbbl	%eax,%eax
791	addl	sstep,%ebx
792	adcl	advancetable+4(,%eax,4),%esi
793LLEntry6_8:
794	cmpw	4(%ecx),%bp
795	jl		Lp11
796	movb	(%esi),%al
797	cmpb	$(TRANSPARENT_COLOR),%al
798	jz		Lp11
799	movw	%bp,4(%ecx)
800	movb	%al,2(%edi)
801Lp11:
802	addl	izistep,%ebp
803	adcl	$0,%ebp
804	addl	tstep,%edx
805	sbbl	%eax,%eax
806	addl	sstep,%ebx
807	adcl	advancetable+4(,%eax,4),%esi
808LLEntry5_8:
809	cmpw	6(%ecx),%bp
810	jl		Lp12
811	movb	(%esi),%al
812	cmpb	$(TRANSPARENT_COLOR),%al
813	jz		Lp12
814	movw	%bp,6(%ecx)
815	movb	%al,3(%edi)
816Lp12:
817	addl	izistep,%ebp
818	adcl	$0,%ebp
819	addl	tstep,%edx
820	sbbl	%eax,%eax
821	addl	sstep,%ebx
822	adcl	advancetable+4(,%eax,4),%esi
823LLEntry4_8:
824	cmpw	8(%ecx),%bp
825	jl		Lp13
826	movb	(%esi),%al
827	cmpb	$(TRANSPARENT_COLOR),%al
828	jz		Lp13
829	movw	%bp,8(%ecx)
830	movb	%al,4(%edi)
831Lp13:
832	addl	izistep,%ebp
833	adcl	$0,%ebp
834	addl	tstep,%edx
835	sbbl	%eax,%eax
836	addl	sstep,%ebx
837	adcl	advancetable+4(,%eax,4),%esi
838LLEntry3_8:
839	cmpw	10(%ecx),%bp
840	jl		Lp14
841	movb	(%esi),%al
842	cmpb	$(TRANSPARENT_COLOR),%al
843	jz		Lp14
844	movw	%bp,10(%ecx)
845	movb	%al,5(%edi)
846Lp14:
847	addl	izistep,%ebp
848	adcl	$0,%ebp
849	addl	tstep,%edx
850	sbbl	%eax,%eax
851	addl	sstep,%ebx
852	adcl	advancetable+4(,%eax,4),%esi
853LLEntry2_8:
854	cmpw	12(%ecx),%bp
855	jl		Lp15
856	movb	(%esi),%al
857	cmpb	$(TRANSPARENT_COLOR),%al
858	jz		Lp15
859	movw	%bp,12(%ecx)
860	movb	%al,6(%edi)
861Lp15:
862	addl	izistep,%ebp
863	adcl	$0,%ebp
864	addl	tstep,%edx
865	sbbl	%eax,%eax
866	addl	sstep,%ebx
867	adcl	advancetable+4(,%eax,4),%esi
868
869LEndSpan:
870	cmpw	14(%ecx),%bp
871	jl		Lp16
872	movb	(%esi),%al		// load first texel in segment
873	cmpb	$(TRANSPARENT_COLOR),%al
874	jz		Lp16
875	movw	%bp,14(%ecx)
876	movb	%al,7(%edi)
877Lp16:
878
879//
880// clear s/z, t/z, 1/z from FP stack
881//
882	fstp %st(0)
883	fstp %st(0)
884	fstp %st(0)
885
886	popl	%ebx				// restore spans pointer
887LNextSpan:
888	addl	$(sspan_t_size),%ebx // point to next span
889	movl	sspan_t_count(%ebx),%ecx
890	cmpl	$0,%ecx				// any more spans?
891	jg		LSpanLoop			// yes
892	jz		LNextSpan			// yes, but this one's empty
893
894	popl	%ebx				// restore register variables
895	popl	%esi
896	popl	%edi
897	popl	%ebp				// restore the caller's stack frame
898	ret
899
900#endif	// id386
901