1/* 2Copyright (C) 1996-1997 Id Software, Inc. 3 4This program is free software; you can redistribute it and/or 5modify it under the terms of the GNU General Public License 6as published by the Free Software Foundation; either version 2 7of the License, or (at your option) any later version. 8 9This program is distributed in the hope that it will be useful, 10but WITHOUT ANY WARRANTY; without even the implied warranty of 11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13See the GNU General Public License for more details. 14 15You should have received a copy of the GNU General Public License 16along with this program; if not, write to the Free Software 17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 19*/ 20// 21// d_spr8.s 22// x86 assembly-language horizontal 8-bpp transparent span-drawing code. 23// 24 25#include "asm_i386.h" 26#include "quakeasm.h" 27#include "asm_draw.h" 28 29#if id386 30 31//---------------------------------------------------------------------- 32// 8-bpp horizontal span drawing code for polygons, with transparency. 33//---------------------------------------------------------------------- 34 35 .text 36 37// out-of-line, rarely-needed clamping code 38 39LClampHigh0: 40 movl C(bbextents),%esi 41 jmp LClampReentry0 42LClampHighOrLow0: 43 jg LClampHigh0 44 xorl %esi,%esi 45 jmp LClampReentry0 46 47LClampHigh1: 48 movl C(bbextentt),%edx 49 jmp LClampReentry1 50LClampHighOrLow1: 51 jg LClampHigh1 52 xorl %edx,%edx 53 jmp LClampReentry1 54 55LClampLow2: 56 movl $2048,%ebp 57 jmp LClampReentry2 58LClampHigh2: 59 movl C(bbextents),%ebp 60 jmp LClampReentry2 61 62LClampLow3: 63 movl $2048,%ecx 64 jmp LClampReentry3 65LClampHigh3: 66 movl C(bbextentt),%ecx 67 jmp LClampReentry3 68 69LClampLow4: 70 movl $2048,%eax 71 jmp LClampReentry4 72LClampHigh4: 73 movl C(bbextents),%eax 74 jmp LClampReentry4 75 76LClampLow5: 77 movl $2048,%ebx 78 jmp LClampReentry5 79LClampHigh5: 80 movl C(bbextentt),%ebx 81 jmp LClampReentry5 82 83 84#define pspans 4+16 85 86 .align 4 87.globl C(D_SpriteDrawSpans) 88C(D_SpriteDrawSpans): 89 pushl %ebp // preserve caller's stack frame 90 pushl %edi 91 pushl %esi // preserve register variables 92 pushl %ebx 93 94// 95// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock 96// and span list pointers, and 1/z step in 0.32 fixed-point 97// 98// FIXME: any overlap from rearranging? 99 flds C(d_sdivzstepu) 100 fmuls fp_8 101 movl C(cacheblock),%edx 102 flds C(d_tdivzstepu) 103 fmuls fp_8 104 movl pspans(%esp),%ebx // point to the first span descriptor 105 flds C(d_zistepu) 106 fmuls fp_8 107 movl %edx,pbase // pbase = cacheblock 108 flds C(d_zistepu) 109 fmuls fp_64kx64k 110 fxch %st(3) 111 fstps sdivz8stepu 112 fstps zi8stepu 113 fstps tdivz8stepu 114 fistpl izistep 115 movl izistep,%eax 116 rorl $16,%eax // put upper 16 bits in low word 117 movl sspan_t_count(%ebx),%ecx 118 movl %eax,izistep 119 120 cmpl $0,%ecx 121 jle LNextSpan 122 123LSpanLoop: 124 125// 126// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the 127// initial s and t values 128// 129// FIXME: pipeline FILD? 130 fildl sspan_t_v(%ebx) 131 fildl sspan_t_u(%ebx) 132 133 fld %st(1) // dv | du | dv 134 fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv 135 fld %st(1) // du | dv*d_sdivzstepv | du | dv 136 fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv 137 fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv 138 fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu | 139 // dv*d_sdivzstepv | du | dv 140 fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu | 141 // dv*d_sdivzstepv | du | dv 142 faddp %st(0),%st(2) // du*d_tdivzstepu | 143 // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv 144 fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | 145 // du*d_tdivzstepu | du | dv 146 fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv | 147 // du*d_tdivzstepu | du | dv 148 fmuls C(d_tdivzstepv) // dv*d_tdivzstepv | 149 // du*d_sdivzstepu + dv*d_sdivzstepv | 150 // du*d_tdivzstepu | du | dv 151 fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | 152 // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv 153 fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv + 154 // du*d_sdivzstepu; stays in %st(2) at end 155 fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | 156 // s/z 157 fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv | 158 // du*d_tdivzstepu | du | s/z 159 fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv | 160 // du*d_tdivzstepu | du | s/z 161 faddp %st(0),%st(2) // dv*d_zistepv | 162 // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z 163 fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu | 164 // dv*d_zistepv | s/z 165 fmuls C(d_zistepu) // du*d_zistepu | 166 // dv*d_tdivzstepv + du*d_tdivzstepu | 167 // dv*d_zistepv | s/z 168 fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu | 169 // du*d_zistepu | dv*d_zistepv | s/z 170 fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv + 171 // du*d_tdivzstepu; stays in %st(1) at end 172 fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z 173 faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z 174 175 flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z 176 fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z 177 fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + 178 // du*d_zistepu; stays in %st(0) at end 179 // 1/z | fp_64k | t/z | s/z 180 181 fld %st(0) // FIXME: get rid of stall on FMUL? 182 fmuls fp_64kx64k 183 fxch %st(1) 184 185// 186// calculate and clamp s & t 187// 188 fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z 189 fxch %st(1) 190 191 fistpl izi // 0.32 fixed-point 1/z 192 movl izi,%ebp 193 194// 195// set pz to point to the first z-buffer pixel in the span 196// 197 rorl $16,%ebp // put upper 16 bits in low word 198 movl sspan_t_v(%ebx),%eax 199 movl %ebp,izi 200 movl sspan_t_u(%ebx),%ebp 201 imull C(d_zrowbytes) 202 shll $1,%ebp // a word per pixel 203 addl C(d_pzbuffer),%eax 204 addl %ebp,%eax 205 movl %eax,pz 206 207// 208// point %edi to the first pixel in the span 209// 210 movl C(d_viewbuffer),%ebp 211 movl sspan_t_v(%ebx),%eax 212 pushl %ebx // preserve spans pointer 213 movl C(tadjust),%edx 214 movl C(sadjust),%esi 215 movl C(d_scantable)(,%eax,4),%edi // v * screenwidth 216 addl %ebp,%edi 217 movl sspan_t_u(%ebx),%ebp 218 addl %ebp,%edi // pdest = &pdestspan[scans->u]; 219 220// 221// now start the FDIV for the end of the span 222// 223 cmpl $8,%ecx 224 ja LSetupNotLast1 225 226 decl %ecx 227 jz LCleanup1 // if only one pixel, no need to start an FDIV 228 movl %ecx,spancountminus1 229 230// finish up the s and t calcs 231 fxch %st(1) // z*64k | 1/z | t/z | s/z 232 233 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z 234 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z 235 fxch %st(1) // z*64k | s | 1/z | t/z | s/z 236 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z 237 fxch %st(1) // s | t | 1/z | t/z | s/z 238 fistpl s // 1/z | t | t/z | s/z 239 fistpl t // 1/z | t/z | s/z 240 241 fildl spancountminus1 242 243 flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1 244 flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1 245 fmul %st(2),%st(0) // _d_zistepu*scm1 | _d_tdivzstepu | scm1 246 fxch %st(1) // _d_tdivzstepu | _d_zistepu*scm1 | scm1 247 fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1 248 fxch %st(2) // scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1 249 fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_zistepu*scm1 | 250 // _d_tdivzstepu*scm1 251 fxch %st(1) // _d_zistepu*scm1 | _d_sdivzstepu*scm1 | 252 // _d_tdivzstepu*scm1 253 faddp %st(0),%st(3) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 254 fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 255 faddp %st(0),%st(3) // _d_sdivzstepu*scm1 256 faddp %st(0),%st(3) 257 258 flds fp_64k 259 fdiv %st(1),%st(0) // this is what we've gone to all this trouble to 260 // overlap 261 jmp LFDIVInFlight1 262 263LCleanup1: 264// finish up the s and t calcs 265 fxch %st(1) // z*64k | 1/z | t/z | s/z 266 267 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z 268 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z 269 fxch %st(1) // z*64k | s | 1/z | t/z | s/z 270 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z 271 fxch %st(1) // s | t | 1/z | t/z | s/z 272 fistpl s // 1/z | t | t/z | s/z 273 fistpl t // 1/z | t/z | s/z 274 jmp LFDIVInFlight1 275 276 .align 4 277LSetupNotLast1: 278// finish up the s and t calcs 279 fxch %st(1) // z*64k | 1/z | t/z | s/z 280 281 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z 282 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z 283 fxch %st(1) // z*64k | s | 1/z | t/z | s/z 284 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z 285 fxch %st(1) // s | t | 1/z | t/z | s/z 286 fistpl s // 1/z | t | t/z | s/z 287 fistpl t // 1/z | t/z | s/z 288 289 fadds zi8stepu 290 fxch %st(2) 291 fadds sdivz8stepu 292 fxch %st(2) 293 flds tdivz8stepu 294 faddp %st(0),%st(2) 295 flds fp_64k 296 fdiv %st(1),%st(0) // z = 1/1/z 297 // this is what we've gone to all this trouble to 298 // overlap 299LFDIVInFlight1: 300 301 addl s,%esi 302 addl t,%edx 303 movl C(bbextents),%ebx 304 movl C(bbextentt),%ebp 305 cmpl %ebx,%esi 306 ja LClampHighOrLow0 307LClampReentry0: 308 movl %esi,s 309 movl pbase,%ebx 310 shll $16,%esi 311 cmpl %ebp,%edx 312 movl %esi,sfracf 313 ja LClampHighOrLow1 314LClampReentry1: 315 movl %edx,t 316 movl s,%esi // sfrac = scans->sfrac; 317 shll $16,%edx 318 movl t,%eax // tfrac = scans->tfrac; 319 sarl $16,%esi 320 movl %edx,tfracf 321 322// 323// calculate the texture starting address 324// 325 sarl $16,%eax 326 addl %ebx,%esi 327 imull C(cachewidth),%eax // (tfrac >> 16) * cachewidth 328 addl %eax,%esi // psource = pbase + (sfrac >> 16) + 329 // ((tfrac >> 16) * cachewidth); 330 331// 332// determine whether last span or not 333// 334 cmpl $8,%ecx 335 jna LLastSegment 336 337// 338// not the last segment; do full 8-wide segment 339// 340LNotLastSegment: 341 342// 343// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to 344// get there 345// 346 347// pick up after the FDIV that was left in flight previously 348 349 fld %st(0) // duplicate it 350 fmul %st(4),%st(0) // s = s/z * z 351 fxch %st(1) 352 fmul %st(3),%st(0) // t = t/z * z 353 fxch %st(1) 354 fistpl snext 355 fistpl tnext 356 movl snext,%eax 357 movl tnext,%edx 358 359 subl $8,%ecx // count off this segments' pixels 360 movl C(sadjust),%ebp 361 pushl %ecx // remember count of remaining pixels 362 movl C(tadjust),%ecx 363 364 addl %eax,%ebp 365 addl %edx,%ecx 366 367 movl C(bbextents),%eax 368 movl C(bbextentt),%edx 369 370 cmpl $2048,%ebp 371 jl LClampLow2 372 cmpl %eax,%ebp 373 ja LClampHigh2 374LClampReentry2: 375 376 cmpl $2048,%ecx 377 jl LClampLow3 378 cmpl %edx,%ecx 379 ja LClampHigh3 380LClampReentry3: 381 382 movl %ebp,snext 383 movl %ecx,tnext 384 385 subl s,%ebp 386 subl t,%ecx 387 388// 389// set up advancetable 390// 391 movl %ecx,%eax 392 movl %ebp,%edx 393 sarl $19,%edx // sstep >>= 16; 394 movl C(cachewidth),%ebx 395 sarl $19,%eax // tstep >>= 16; 396 jz LIsZero 397 imull %ebx,%eax // (tstep >> 16) * cachewidth; 398LIsZero: 399 addl %edx,%eax // add in sstep 400 // (tstep >> 16) * cachewidth + (sstep >> 16); 401 movl tfracf,%edx 402 movl %eax,advancetable+4 // advance base in t 403 addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + 404 // (sstep >> 16); 405 shll $13,%ebp // left-justify sstep fractional part 406 movl %ebp,sstep 407 movl sfracf,%ebx 408 shll $13,%ecx // left-justify tstep fractional part 409 movl %eax,advancetable // advance extra in t 410 movl %ecx,tstep 411 412 movl pz,%ecx 413 movl izi,%ebp 414 415 cmpw (%ecx),%bp 416 jl Lp1 417 movb (%esi),%al // get first source texel 418 cmpb $(TRANSPARENT_COLOR),%al 419 jz Lp1 420 movw %bp,(%ecx) 421 movb %al,(%edi) // store first dest pixel 422Lp1: 423 addl izistep,%ebp 424 adcl $0,%ebp 425 addl tstep,%edx // advance tfrac fractional part by tstep frac 426 427 sbbl %eax,%eax // turn tstep carry into -1 (0 if none) 428 addl sstep,%ebx // advance sfrac fractional part by sstep frac 429 adcl advancetable+4(,%eax,4),%esi // point to next source texel 430 431 cmpw 2(%ecx),%bp 432 jl Lp2 433 movb (%esi),%al 434 cmpb $(TRANSPARENT_COLOR),%al 435 jz Lp2 436 movw %bp,2(%ecx) 437 movb %al,1(%edi) 438Lp2: 439 addl izistep,%ebp 440 adcl $0,%ebp 441 addl tstep,%edx 442 sbbl %eax,%eax 443 addl sstep,%ebx 444 adcl advancetable+4(,%eax,4),%esi 445 446 cmpw 4(%ecx),%bp 447 jl Lp3 448 movb (%esi),%al 449 cmpb $(TRANSPARENT_COLOR),%al 450 jz Lp3 451 movw %bp,4(%ecx) 452 movb %al,2(%edi) 453Lp3: 454 addl izistep,%ebp 455 adcl $0,%ebp 456 addl tstep,%edx 457 sbbl %eax,%eax 458 addl sstep,%ebx 459 adcl advancetable+4(,%eax,4),%esi 460 461 cmpw 6(%ecx),%bp 462 jl Lp4 463 movb (%esi),%al 464 cmpb $(TRANSPARENT_COLOR),%al 465 jz Lp4 466 movw %bp,6(%ecx) 467 movb %al,3(%edi) 468Lp4: 469 addl izistep,%ebp 470 adcl $0,%ebp 471 addl tstep,%edx 472 sbbl %eax,%eax 473 addl sstep,%ebx 474 adcl advancetable+4(,%eax,4),%esi 475 476 cmpw 8(%ecx),%bp 477 jl Lp5 478 movb (%esi),%al 479 cmpb $(TRANSPARENT_COLOR),%al 480 jz Lp5 481 movw %bp,8(%ecx) 482 movb %al,4(%edi) 483Lp5: 484 addl izistep,%ebp 485 adcl $0,%ebp 486 addl tstep,%edx 487 sbbl %eax,%eax 488 addl sstep,%ebx 489 adcl advancetable+4(,%eax,4),%esi 490 491// 492// start FDIV for end of next segment in flight, so it can overlap 493// 494 popl %eax 495 cmpl $8,%eax // more than one segment after this? 496 ja LSetupNotLast2 // yes 497 498 decl %eax 499 jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV 500 movl %eax,spancountminus1 501 fildl spancountminus1 502 503 flds C(d_zistepu) // _d_zistepu | spancountminus1 504 fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1 505 flds C(d_tdivzstepu) // _d_tdivzstepu | _d_zistepu*scm1 | scm1 506 fmul %st(2),%st(0) // _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1 507 fxch %st(1) // _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1 508 faddp %st(0),%st(3) // _d_tdivzstepu*scm1 | scm1 509 fxch %st(1) // scm1 | _d_tdivzstepu*scm1 510 fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 511 fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 512 faddp %st(0),%st(3) // _d_sdivzstepu*scm1 513 flds fp_64k // 64k | _d_sdivzstepu*scm1 514 fxch %st(1) // _d_sdivzstepu*scm1 | 64k 515 faddp %st(0),%st(4) // 64k 516 517 fdiv %st(1),%st(0) // this is what we've gone to all this trouble to 518 // overlap 519 jmp LFDIVInFlight2 520 521 .align 4 522LSetupNotLast2: 523 fadds zi8stepu 524 fxch %st(2) 525 fadds sdivz8stepu 526 fxch %st(2) 527 flds tdivz8stepu 528 faddp %st(0),%st(2) 529 flds fp_64k 530 fdiv %st(1),%st(0) // z = 1/1/z 531 // this is what we've gone to all this trouble to 532 // overlap 533LFDIVInFlight2: 534 pushl %eax 535 536 cmpw 10(%ecx),%bp 537 jl Lp6 538 movb (%esi),%al 539 cmpb $(TRANSPARENT_COLOR),%al 540 jz Lp6 541 movw %bp,10(%ecx) 542 movb %al,5(%edi) 543Lp6: 544 addl izistep,%ebp 545 adcl $0,%ebp 546 addl tstep,%edx 547 sbbl %eax,%eax 548 addl sstep,%ebx 549 adcl advancetable+4(,%eax,4),%esi 550 551 cmpw 12(%ecx),%bp 552 jl Lp7 553 movb (%esi),%al 554 cmpb $(TRANSPARENT_COLOR),%al 555 jz Lp7 556 movw %bp,12(%ecx) 557 movb %al,6(%edi) 558Lp7: 559 addl izistep,%ebp 560 adcl $0,%ebp 561 addl tstep,%edx 562 sbbl %eax,%eax 563 addl sstep,%ebx 564 adcl advancetable+4(,%eax,4),%esi 565 566 cmpw 14(%ecx),%bp 567 jl Lp8 568 movb (%esi),%al 569 cmpb $(TRANSPARENT_COLOR),%al 570 jz Lp8 571 movw %bp,14(%ecx) 572 movb %al,7(%edi) 573Lp8: 574 addl izistep,%ebp 575 adcl $0,%ebp 576 addl tstep,%edx 577 sbbl %eax,%eax 578 addl sstep,%ebx 579 adcl advancetable+4(,%eax,4),%esi 580 581 addl $8,%edi 582 addl $16,%ecx 583 movl %edx,tfracf 584 movl snext,%edx 585 movl %ebx,sfracf 586 movl tnext,%ebx 587 movl %edx,s 588 movl %ebx,t 589 590 movl %ecx,pz 591 movl %ebp,izi 592 593 popl %ecx // retrieve count 594 595// 596// determine whether last span or not 597// 598 cmpl $8,%ecx // are there multiple segments remaining? 599 ja LNotLastSegment // yes 600 601// 602// last segment of scan 603// 604LLastSegment: 605 606// 607// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to 608// get there. The number of pixels left is variable, and we want to land on the 609// last pixel, not step one past it, so we can't run into arithmetic problems 610// 611 testl %ecx,%ecx 612 jz LNoSteps // just draw the last pixel and we're done 613 614// pick up after the FDIV that was left in flight previously 615 616 617 fld %st(0) // duplicate it 618 fmul %st(4),%st(0) // s = s/z * z 619 fxch %st(1) 620 fmul %st(3),%st(0) // t = t/z * z 621 fxch %st(1) 622 fistpl snext 623 fistpl tnext 624 625 movl C(tadjust),%ebx 626 movl C(sadjust),%eax 627 628 addl snext,%eax 629 addl tnext,%ebx 630 631 movl C(bbextents),%ebp 632 movl C(bbextentt),%edx 633 634 cmpl $2048,%eax 635 jl LClampLow4 636 cmpl %ebp,%eax 637 ja LClampHigh4 638LClampReentry4: 639 movl %eax,snext 640 641 cmpl $2048,%ebx 642 jl LClampLow5 643 cmpl %edx,%ebx 644 ja LClampHigh5 645LClampReentry5: 646 647 cmpl $1,%ecx // don't bother 648 je LOnlyOneStep // if two pixels in segment, there's only one step, 649 // of the segment length 650 subl s,%eax 651 subl t,%ebx 652 653 addl %eax,%eax // convert to 15.17 format so multiply by 1.31 654 addl %ebx,%ebx // reciprocal yields 16.48 655 imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1) 656 movl %edx,%ebp 657 658 movl %ebx,%eax 659 imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1) 660 661LSetEntryvec: 662// 663// set up advancetable 664// 665 movl spr8entryvec_table(,%ecx,4),%ebx 666 movl %edx,%eax 667 pushl %ebx // entry point into code for RET later 668 movl %ebp,%ecx 669 sarl $16,%ecx // sstep >>= 16; 670 movl C(cachewidth),%ebx 671 sarl $16,%edx // tstep >>= 16; 672 jz LIsZeroLast 673 imull %ebx,%edx // (tstep >> 16) * cachewidth; 674LIsZeroLast: 675 addl %ecx,%edx // add in sstep 676 // (tstep >> 16) * cachewidth + (sstep >> 16); 677 movl tfracf,%ecx 678 movl %edx,advancetable+4 // advance base in t 679 addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + 680 // (sstep >> 16); 681 shll $16,%ebp // left-justify sstep fractional part 682 movl sfracf,%ebx 683 shll $16,%eax // left-justify tstep fractional part 684 movl %edx,advancetable // advance extra in t 685 686 movl %eax,tstep 687 movl %ebp,sstep 688 movl %ecx,%edx 689 690 movl pz,%ecx 691 movl izi,%ebp 692 693 ret // jump to the number-of-pixels handler 694 695//---------------------------------------- 696 697LNoSteps: 698 movl pz,%ecx 699 subl $7,%edi // adjust for hardwired offset 700 subl $14,%ecx 701 jmp LEndSpan 702 703 704LOnlyOneStep: 705 subl s,%eax 706 subl t,%ebx 707 movl %eax,%ebp 708 movl %ebx,%edx 709 jmp LSetEntryvec 710 711//---------------------------------------- 712 713.globl Spr8Entry2_8 714Spr8Entry2_8: 715 subl $6,%edi // adjust for hardwired offsets 716 subl $12,%ecx 717 movb (%esi),%al 718 jmp LLEntry2_8 719 720//---------------------------------------- 721 722.globl Spr8Entry3_8 723Spr8Entry3_8: 724 subl $5,%edi // adjust for hardwired offsets 725 subl $10,%ecx 726 jmp LLEntry3_8 727 728//---------------------------------------- 729 730.globl Spr8Entry4_8 731Spr8Entry4_8: 732 subl $4,%edi // adjust for hardwired offsets 733 subl $8,%ecx 734 jmp LLEntry4_8 735 736//---------------------------------------- 737 738.globl Spr8Entry5_8 739Spr8Entry5_8: 740 subl $3,%edi // adjust for hardwired offsets 741 subl $6,%ecx 742 jmp LLEntry5_8 743 744//---------------------------------------- 745 746.globl Spr8Entry6_8 747Spr8Entry6_8: 748 subl $2,%edi // adjust for hardwired offsets 749 subl $4,%ecx 750 jmp LLEntry6_8 751 752//---------------------------------------- 753 754.globl Spr8Entry7_8 755Spr8Entry7_8: 756 decl %edi // adjust for hardwired offsets 757 subl $2,%ecx 758 jmp LLEntry7_8 759 760//---------------------------------------- 761 762.globl Spr8Entry8_8 763Spr8Entry8_8: 764 cmpw (%ecx),%bp 765 jl Lp9 766 movb (%esi),%al 767 cmpb $(TRANSPARENT_COLOR),%al 768 jz Lp9 769 movw %bp,(%ecx) 770 movb %al,(%edi) 771Lp9: 772 addl izistep,%ebp 773 adcl $0,%ebp 774 addl tstep,%edx 775 sbbl %eax,%eax 776 addl sstep,%ebx 777 adcl advancetable+4(,%eax,4),%esi 778LLEntry7_8: 779 cmpw 2(%ecx),%bp 780 jl Lp10 781 movb (%esi),%al 782 cmpb $(TRANSPARENT_COLOR),%al 783 jz Lp10 784 movw %bp,2(%ecx) 785 movb %al,1(%edi) 786Lp10: 787 addl izistep,%ebp 788 adcl $0,%ebp 789 addl tstep,%edx 790 sbbl %eax,%eax 791 addl sstep,%ebx 792 adcl advancetable+4(,%eax,4),%esi 793LLEntry6_8: 794 cmpw 4(%ecx),%bp 795 jl Lp11 796 movb (%esi),%al 797 cmpb $(TRANSPARENT_COLOR),%al 798 jz Lp11 799 movw %bp,4(%ecx) 800 movb %al,2(%edi) 801Lp11: 802 addl izistep,%ebp 803 adcl $0,%ebp 804 addl tstep,%edx 805 sbbl %eax,%eax 806 addl sstep,%ebx 807 adcl advancetable+4(,%eax,4),%esi 808LLEntry5_8: 809 cmpw 6(%ecx),%bp 810 jl Lp12 811 movb (%esi),%al 812 cmpb $(TRANSPARENT_COLOR),%al 813 jz Lp12 814 movw %bp,6(%ecx) 815 movb %al,3(%edi) 816Lp12: 817 addl izistep,%ebp 818 adcl $0,%ebp 819 addl tstep,%edx 820 sbbl %eax,%eax 821 addl sstep,%ebx 822 adcl advancetable+4(,%eax,4),%esi 823LLEntry4_8: 824 cmpw 8(%ecx),%bp 825 jl Lp13 826 movb (%esi),%al 827 cmpb $(TRANSPARENT_COLOR),%al 828 jz Lp13 829 movw %bp,8(%ecx) 830 movb %al,4(%edi) 831Lp13: 832 addl izistep,%ebp 833 adcl $0,%ebp 834 addl tstep,%edx 835 sbbl %eax,%eax 836 addl sstep,%ebx 837 adcl advancetable+4(,%eax,4),%esi 838LLEntry3_8: 839 cmpw 10(%ecx),%bp 840 jl Lp14 841 movb (%esi),%al 842 cmpb $(TRANSPARENT_COLOR),%al 843 jz Lp14 844 movw %bp,10(%ecx) 845 movb %al,5(%edi) 846Lp14: 847 addl izistep,%ebp 848 adcl $0,%ebp 849 addl tstep,%edx 850 sbbl %eax,%eax 851 addl sstep,%ebx 852 adcl advancetable+4(,%eax,4),%esi 853LLEntry2_8: 854 cmpw 12(%ecx),%bp 855 jl Lp15 856 movb (%esi),%al 857 cmpb $(TRANSPARENT_COLOR),%al 858 jz Lp15 859 movw %bp,12(%ecx) 860 movb %al,6(%edi) 861Lp15: 862 addl izistep,%ebp 863 adcl $0,%ebp 864 addl tstep,%edx 865 sbbl %eax,%eax 866 addl sstep,%ebx 867 adcl advancetable+4(,%eax,4),%esi 868 869LEndSpan: 870 cmpw 14(%ecx),%bp 871 jl Lp16 872 movb (%esi),%al // load first texel in segment 873 cmpb $(TRANSPARENT_COLOR),%al 874 jz Lp16 875 movw %bp,14(%ecx) 876 movb %al,7(%edi) 877Lp16: 878 879// 880// clear s/z, t/z, 1/z from FP stack 881// 882 fstp %st(0) 883 fstp %st(0) 884 fstp %st(0) 885 886 popl %ebx // restore spans pointer 887LNextSpan: 888 addl $(sspan_t_size),%ebx // point to next span 889 movl sspan_t_count(%ebx),%ecx 890 cmpl $0,%ecx // any more spans? 891 jg LSpanLoop // yes 892 jz LNextSpan // yes, but this one's empty 893 894 popl %ebx // restore register variables 895 popl %esi 896 popl %edi 897 popl %ebp // restore the caller's stack frame 898 ret 899 900#endif // id386 901