1/* 2Copyright (C) 1996-1997 Id Software, Inc. 3 4This program is free software; you can redistribute it and/or 5modify it under the terms of the GNU General Public License 6as published by the Free Software Foundation; either version 2 7of the License, or (at your option) any later version. 8 9This program is distributed in the hope that it will be useful, 10but WITHOUT ANY WARRANTY; without even the implied warranty of 11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13See the GNU General Public License for more details. 14 15You should have received a copy of the GNU General Public License 16along with this program; if not, write to the Free Software 17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 19*/ 20// 21// d_draw.s 22// x86 assembly-language horizontal 8-bpp span-drawing code. 23// 24 25#include "asm_i386.h" 26#include "quakeasm.h" 27#include "asm_draw.h" 28#include "d_ifacea.h" 29 30#if id386 31 32//---------------------------------------------------------------------- 33// 8-bpp horizontal span drawing code for polygons, with no transparency. 34// 35// Assumes there is at least one span in pspans, and that every span 36// contains at least one pixel 37//---------------------------------------------------------------------- 38 39 .text 40 41// out-of-line, rarely-needed clamping code 42 43LClampHigh0: 44 movl C(bbextents),%esi 45 jmp LClampReentry0 46LClampHighOrLow0: 47 jg LClampHigh0 48 xorl %esi,%esi 49 jmp LClampReentry0 50 51LClampHigh1: 52 movl C(bbextentt),%edx 53 jmp LClampReentry1 54LClampHighOrLow1: 55 jg LClampHigh1 56 xorl %edx,%edx 57 jmp LClampReentry1 58 59LClampLow2: 60 movl $2048,%ebp 61 jmp LClampReentry2 62LClampHigh2: 63 movl C(bbextents),%ebp 64 jmp LClampReentry2 65 66LClampLow3: 67 movl $2048,%ecx 68 jmp LClampReentry3 69LClampHigh3: 70 movl C(bbextentt),%ecx 71 jmp LClampReentry3 72 73LClampLow4: 74 movl $2048,%eax 75 jmp LClampReentry4 76LClampHigh4: 77 movl C(bbextents),%eax 78 jmp LClampReentry4 79 80LClampLow5: 81 movl $2048,%ebx 82 jmp LClampReentry5 83LClampHigh5: 84 movl C(bbextentt),%ebx 85 jmp LClampReentry5 86 87 88#define pspans 4+16 89 90 .align 4 91.globl C(D_DrawSpans8) 92C(D_DrawSpans8): 93 pushl %ebp // preserve caller's stack frame 94 pushl %edi 95 pushl %esi // preserve register variables 96 pushl %ebx 97 98// 99// set up scaled-by-8 steps, for 8-long segments; also set up cacheblock 100// and span list pointers 101// 102// TODO: any overlap from rearranging? 103 flds C(d_sdivzstepu) 104 fmuls fp_8 105 movl C(cacheblock),%edx 106 flds C(d_tdivzstepu) 107 fmuls fp_8 108 movl pspans(%esp),%ebx // point to the first span descriptor 109 flds C(d_zistepu) 110 fmuls fp_8 111 movl %edx,pbase // pbase = cacheblock 112 fstps zi8stepu 113 fstps tdivz8stepu 114 fstps sdivz8stepu 115 116LSpanLoop: 117// 118// set up the initial s/z, t/z, and 1/z on the FP stack, and generate the 119// initial s and t values 120// 121// FIXME: pipeline FILD? 122 fildl espan_t_v(%ebx) 123 fildl espan_t_u(%ebx) 124 125 fld %st(1) // dv | du | dv 126 fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv 127 fld %st(1) // du | dv*d_sdivzstepv | du | dv 128 fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv 129 fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv 130 fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu | 131 // dv*d_sdivzstepv | du | dv 132 fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu | 133 // dv*d_sdivzstepv | du | dv 134 faddp %st(0),%st(2) // du*d_tdivzstepu | 135 // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv 136 fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | 137 // du*d_tdivzstepu | du | dv 138 fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv | 139 // du*d_tdivzstepu | du | dv 140 fmuls C(d_tdivzstepv) // dv*d_tdivzstepv | 141 // du*d_sdivzstepu + dv*d_sdivzstepv | 142 // du*d_tdivzstepu | du | dv 143 fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | 144 // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv 145 fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv + 146 // du*d_sdivzstepu; stays in %st(2) at end 147 fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | 148 // s/z 149 fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv | 150 // du*d_tdivzstepu | du | s/z 151 fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv | 152 // du*d_tdivzstepu | du | s/z 153 faddp %st(0),%st(2) // dv*d_zistepv | 154 // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z 155 fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu | 156 // dv*d_zistepv | s/z 157 fmuls C(d_zistepu) // du*d_zistepu | 158 // dv*d_tdivzstepv + du*d_tdivzstepu | 159 // dv*d_zistepv | s/z 160 fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu | 161 // du*d_zistepu | dv*d_zistepv | s/z 162 fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv + 163 // du*d_tdivzstepu; stays in %st(1) at end 164 fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z 165 faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z 166 167 flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z 168 fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z 169 fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + 170 // du*d_zistepu; stays in %st(0) at end 171 // 1/z | fp_64k | t/z | s/z 172// 173// calculate and clamp s & t 174// 175 fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z 176 177// 178// point %edi to the first pixel in the span 179// 180 movl C(d_viewbuffer),%ecx 181 movl espan_t_v(%ebx),%eax 182 movl %ebx,pspantemp // preserve spans pointer 183 184 movl C(tadjust),%edx 185 movl C(sadjust),%esi 186 movl C(d_scantable)(,%eax,4),%edi // v * screenwidth 187 addl %ecx,%edi 188 movl espan_t_u(%ebx),%ecx 189 addl %ecx,%edi // pdest = &pdestspan[scans->u]; 190 movl espan_t_count(%ebx),%ecx 191 192// 193// now start the FDIV for the end of the span 194// 195 cmpl $8,%ecx 196 ja LSetupNotLast1 197 198 decl %ecx 199 jz LCleanup1 // if only one pixel, no need to start an FDIV 200 movl %ecx,spancountminus1 201 202// finish up the s and t calcs 203 fxch %st(1) // z*64k | 1/z | t/z | s/z 204 205 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z 206 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z 207 fxch %st(1) // z*64k | s | 1/z | t/z | s/z 208 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z 209 fxch %st(1) // s | t | 1/z | t/z | s/z 210 fistpl s // 1/z | t | t/z | s/z 211 fistpl t // 1/z | t/z | s/z 212 213 fildl spancountminus1 214 215 flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1 216 flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1 217 fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1 218 fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 219 fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 220 fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 221 fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 | 222 // C(d_tdivzstepu)*scm1 223 fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 | 224 // C(d_tdivzstepu)*scm1 225 faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 226 fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 227 faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 228 faddp %st(0),%st(3) 229 230 flds fp_64k 231 fdiv %st(1),%st(0) // this is what we've gone to all this trouble to 232 // overlap 233 jmp LFDIVInFlight1 234 235LCleanup1: 236// finish up the s and t calcs 237 fxch %st(1) // z*64k | 1/z | t/z | s/z 238 239 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z 240 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z 241 fxch %st(1) // z*64k | s | 1/z | t/z | s/z 242 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z 243 fxch %st(1) // s | t | 1/z | t/z | s/z 244 fistpl s // 1/z | t | t/z | s/z 245 fistpl t // 1/z | t/z | s/z 246 jmp LFDIVInFlight1 247 248 .align 4 249LSetupNotLast1: 250// finish up the s and t calcs 251 fxch %st(1) // z*64k | 1/z | t/z | s/z 252 253 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z 254 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z 255 fxch %st(1) // z*64k | s | 1/z | t/z | s/z 256 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z 257 fxch %st(1) // s | t | 1/z | t/z | s/z 258 fistpl s // 1/z | t | t/z | s/z 259 fistpl t // 1/z | t/z | s/z 260 261 fadds zi8stepu 262 fxch %st(2) 263 fadds sdivz8stepu 264 fxch %st(2) 265 flds tdivz8stepu 266 faddp %st(0),%st(2) 267 flds fp_64k 268 fdiv %st(1),%st(0) // z = 1/1/z 269 // this is what we've gone to all this trouble to 270 // overlap 271LFDIVInFlight1: 272 273 addl s,%esi 274 addl t,%edx 275 movl C(bbextents),%ebx 276 movl C(bbextentt),%ebp 277 cmpl %ebx,%esi 278 ja LClampHighOrLow0 279LClampReentry0: 280 movl %esi,s 281 movl pbase,%ebx 282 shll $16,%esi 283 cmpl %ebp,%edx 284 movl %esi,sfracf 285 ja LClampHighOrLow1 286LClampReentry1: 287 movl %edx,t 288 movl s,%esi // sfrac = scans->sfrac; 289 shll $16,%edx 290 movl t,%eax // tfrac = scans->tfrac; 291 sarl $16,%esi 292 movl %edx,tfracf 293 294// 295// calculate the texture starting address 296// 297 sarl $16,%eax 298 movl C(cachewidth),%edx 299 imull %edx,%eax // (tfrac >> 16) * cachewidth 300 addl %ebx,%esi 301 addl %eax,%esi // psource = pbase + (sfrac >> 16) + 302 // ((tfrac >> 16) * cachewidth); 303 304// 305// determine whether last span or not 306// 307 cmpl $8,%ecx 308 jna LLastSegment 309 310// 311// not the last segment; do full 8-wide segment 312// 313LNotLastSegment: 314 315// 316// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to 317// get there 318// 319 320// pick up after the FDIV that was left in flight previously 321 322 fld %st(0) // duplicate it 323 fmul %st(4),%st(0) // s = s/z * z 324 fxch %st(1) 325 fmul %st(3),%st(0) // t = t/z * z 326 fxch %st(1) 327 fistpl snext 328 fistpl tnext 329 movl snext,%eax 330 movl tnext,%edx 331 332 movb (%esi),%bl // get first source texel 333 subl $8,%ecx // count off this segments' pixels 334 movl C(sadjust),%ebp 335 movl %ecx,counttemp // remember count of remaining pixels 336 337 movl C(tadjust),%ecx 338 movb %bl,(%edi) // store first dest pixel 339 340 addl %eax,%ebp 341 addl %edx,%ecx 342 343 movl C(bbextents),%eax 344 movl C(bbextentt),%edx 345 346 cmpl $2048,%ebp 347 jl LClampLow2 348 cmpl %eax,%ebp 349 ja LClampHigh2 350LClampReentry2: 351 352 cmpl $2048,%ecx 353 jl LClampLow3 354 cmpl %edx,%ecx 355 ja LClampHigh3 356LClampReentry3: 357 358 movl %ebp,snext 359 movl %ecx,tnext 360 361 subl s,%ebp 362 subl t,%ecx 363 364// 365// set up advancetable 366// 367 movl %ecx,%eax 368 movl %ebp,%edx 369 sarl $19,%eax // tstep >>= 16; 370 jz LZero 371 sarl $19,%edx // sstep >>= 16; 372 movl C(cachewidth),%ebx 373 imull %ebx,%eax 374 jmp LSetUp1 375 376LZero: 377 sarl $19,%edx // sstep >>= 16; 378 movl C(cachewidth),%ebx 379 380LSetUp1: 381 382 addl %edx,%eax // add in sstep 383 // (tstep >> 16) * cachewidth + (sstep >> 16); 384 movl tfracf,%edx 385 movl %eax,advancetable+4 // advance base in t 386 addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + 387 // (sstep >> 16); 388 shll $13,%ebp // left-justify sstep fractional part 389 movl sfracf,%ebx 390 shll $13,%ecx // left-justify tstep fractional part 391 movl %eax,advancetable // advance extra in t 392 393 movl %ecx,tstep 394 addl %ecx,%edx // advance tfrac fractional part by tstep frac 395 396 sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none) 397 addl %ebp,%ebx // advance sfrac fractional part by sstep frac 398 adcl advancetable+4(,%ecx,4),%esi // point to next source texel 399 400 addl tstep,%edx 401 sbbl %ecx,%ecx 402 movb (%esi),%al 403 addl %ebp,%ebx 404 movb %al,1(%edi) 405 adcl advancetable+4(,%ecx,4),%esi 406 407 addl tstep,%edx 408 sbbl %ecx,%ecx 409 addl %ebp,%ebx 410 movb (%esi),%al 411 adcl advancetable+4(,%ecx,4),%esi 412 413 addl tstep,%edx 414 sbbl %ecx,%ecx 415 movb %al,2(%edi) 416 addl %ebp,%ebx 417 movb (%esi),%al 418 adcl advancetable+4(,%ecx,4),%esi 419 420 addl tstep,%edx 421 sbbl %ecx,%ecx 422 movb %al,3(%edi) 423 addl %ebp,%ebx 424 movb (%esi),%al 425 adcl advancetable+4(,%ecx,4),%esi 426 427 428// 429// start FDIV for end of next segment in flight, so it can overlap 430// 431 movl counttemp,%ecx 432 cmpl $8,%ecx // more than one segment after this? 433 ja LSetupNotLast2 // yes 434 435 decl %ecx 436 jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV 437 movl %ecx,spancountminus1 438 fildl spancountminus1 439 440 flds C(d_zistepu) // C(d_zistepu) | spancountminus1 441 fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1 442 flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 443 fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 444 fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1 445 faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1 446 fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1 447 fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 448 fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 449 faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 450 flds fp_64k // 64k | C(d_sdivzstepu)*scm1 451 fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k 452 faddp %st(0),%st(4) // 64k 453 454 fdiv %st(1),%st(0) // this is what we've gone to all this trouble to 455 // overlap 456 jmp LFDIVInFlight2 457 458 .align 4 459LSetupNotLast2: 460 fadds zi8stepu 461 fxch %st(2) 462 fadds sdivz8stepu 463 fxch %st(2) 464 flds tdivz8stepu 465 faddp %st(0),%st(2) 466 flds fp_64k 467 fdiv %st(1),%st(0) // z = 1/1/z 468 // this is what we've gone to all this trouble to 469 // overlap 470LFDIVInFlight2: 471 movl %ecx,counttemp 472 473 addl tstep,%edx 474 sbbl %ecx,%ecx 475 movb %al,4(%edi) 476 addl %ebp,%ebx 477 movb (%esi),%al 478 adcl advancetable+4(,%ecx,4),%esi 479 480 addl tstep,%edx 481 sbbl %ecx,%ecx 482 movb %al,5(%edi) 483 addl %ebp,%ebx 484 movb (%esi),%al 485 adcl advancetable+4(,%ecx,4),%esi 486 487 addl tstep,%edx 488 sbbl %ecx,%ecx 489 movb %al,6(%edi) 490 addl %ebp,%ebx 491 movb (%esi),%al 492 adcl advancetable+4(,%ecx,4),%esi 493 494 addl $8,%edi 495 movl %edx,tfracf 496 movl snext,%edx 497 movl %ebx,sfracf 498 movl tnext,%ebx 499 movl %edx,s 500 movl %ebx,t 501 502 movl counttemp,%ecx // retrieve count 503 504// 505// determine whether last span or not 506// 507 cmpl $8,%ecx // are there multiple segments remaining? 508 movb %al,-1(%edi) 509 ja LNotLastSegment // yes 510 511// 512// last segment of scan 513// 514LLastSegment: 515 516// 517// advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to 518// get there. The number of pixels left is variable, and we want to land on the 519// last pixel, not step one past it, so we can't run into arithmetic problems 520// 521 testl %ecx,%ecx 522 jz LNoSteps // just draw the last pixel and we're done 523 524// pick up after the FDIV that was left in flight previously 525 526 527 fld %st(0) // duplicate it 528 fmul %st(4),%st(0) // s = s/z * z 529 fxch %st(1) 530 fmul %st(3),%st(0) // t = t/z * z 531 fxch %st(1) 532 fistpl snext 533 fistpl tnext 534 535 movb (%esi),%al // load first texel in segment 536 movl C(tadjust),%ebx 537 movb %al,(%edi) // store first pixel in segment 538 movl C(sadjust),%eax 539 540 addl snext,%eax 541 addl tnext,%ebx 542 543 movl C(bbextents),%ebp 544 movl C(bbextentt),%edx 545 546 cmpl $2048,%eax 547 jl LClampLow4 548 cmpl %ebp,%eax 549 ja LClampHigh4 550LClampReentry4: 551 movl %eax,snext 552 553 cmpl $2048,%ebx 554 jl LClampLow5 555 cmpl %edx,%ebx 556 ja LClampHigh5 557LClampReentry5: 558 559 cmpl $1,%ecx // don't bother 560 je LOnlyOneStep // if two pixels in segment, there's only one step, 561 // of the segment length 562 subl s,%eax 563 subl t,%ebx 564 565 addl %eax,%eax // convert to 15.17 format so multiply by 1.31 566 addl %ebx,%ebx // reciprocal yields 16.48 567 568 imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1) 569 movl %edx,%ebp 570 571 movl %ebx,%eax 572 imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1) 573 574LSetEntryvec: 575// 576// set up advancetable 577// 578 movl entryvec_table(,%ecx,4),%ebx 579 movl %edx,%eax 580 movl %ebx,jumptemp // entry point into code for RET later 581 movl %ebp,%ecx 582 sarl $16,%edx // tstep >>= 16; 583 movl C(cachewidth),%ebx 584 sarl $16,%ecx // sstep >>= 16; 585 imull %ebx,%edx 586 587 addl %ecx,%edx // add in sstep 588 // (tstep >> 16) * cachewidth + (sstep >> 16); 589 movl tfracf,%ecx 590 movl %edx,advancetable+4 // advance base in t 591 addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + 592 // (sstep >> 16); 593 shll $16,%ebp // left-justify sstep fractional part 594 movl sfracf,%ebx 595 shll $16,%eax // left-justify tstep fractional part 596 movl %edx,advancetable // advance extra in t 597 598 movl %eax,tstep 599 movl %ecx,%edx 600 addl %eax,%edx 601 sbbl %ecx,%ecx 602 addl %ebp,%ebx 603 adcl advancetable+4(,%ecx,4),%esi 604 605 jmp *jumptemp // jump to the number-of-pixels handler 606 607//---------------------------------------- 608 609LNoSteps: 610 movb (%esi),%al // load first texel in segment 611 subl $7,%edi // adjust for hardwired offset 612 jmp LEndSpan 613 614 615LOnlyOneStep: 616 subl s,%eax 617 subl t,%ebx 618 movl %eax,%ebp 619 movl %ebx,%edx 620 jmp LSetEntryvec 621 622//---------------------------------------- 623 624.globl Entry2_8 625Entry2_8: 626 subl $6,%edi // adjust for hardwired offsets 627 movb (%esi),%al 628 jmp LLEntry2_8 629 630//---------------------------------------- 631 632.globl Entry3_8 633Entry3_8: 634 subl $5,%edi // adjust for hardwired offsets 635 addl %eax,%edx 636 movb (%esi),%al 637 sbbl %ecx,%ecx 638 addl %ebp,%ebx 639 adcl advancetable+4(,%ecx,4),%esi 640 jmp LLEntry3_8 641 642//---------------------------------------- 643 644.globl Entry4_8 645Entry4_8: 646 subl $4,%edi // adjust for hardwired offsets 647 addl %eax,%edx 648 movb (%esi),%al 649 sbbl %ecx,%ecx 650 addl %ebp,%ebx 651 adcl advancetable+4(,%ecx,4),%esi 652 addl tstep,%edx 653 jmp LLEntry4_8 654 655//---------------------------------------- 656 657.globl Entry5_8 658Entry5_8: 659 subl $3,%edi // adjust for hardwired offsets 660 addl %eax,%edx 661 movb (%esi),%al 662 sbbl %ecx,%ecx 663 addl %ebp,%ebx 664 adcl advancetable+4(,%ecx,4),%esi 665 addl tstep,%edx 666 jmp LLEntry5_8 667 668//---------------------------------------- 669 670.globl Entry6_8 671Entry6_8: 672 subl $2,%edi // adjust for hardwired offsets 673 addl %eax,%edx 674 movb (%esi),%al 675 sbbl %ecx,%ecx 676 addl %ebp,%ebx 677 adcl advancetable+4(,%ecx,4),%esi 678 addl tstep,%edx 679 jmp LLEntry6_8 680 681//---------------------------------------- 682 683.globl Entry7_8 684Entry7_8: 685 decl %edi // adjust for hardwired offsets 686 addl %eax,%edx 687 movb (%esi),%al 688 sbbl %ecx,%ecx 689 addl %ebp,%ebx 690 adcl advancetable+4(,%ecx,4),%esi 691 addl tstep,%edx 692 jmp LLEntry7_8 693 694//---------------------------------------- 695 696.globl Entry8_8 697Entry8_8: 698 addl %eax,%edx 699 movb (%esi),%al 700 sbbl %ecx,%ecx 701 addl %ebp,%ebx 702 adcl advancetable+4(,%ecx,4),%esi 703 704 addl tstep,%edx 705 sbbl %ecx,%ecx 706 movb %al,1(%edi) 707 addl %ebp,%ebx 708 movb (%esi),%al 709 adcl advancetable+4(,%ecx,4),%esi 710 addl tstep,%edx 711LLEntry7_8: 712 sbbl %ecx,%ecx 713 movb %al,2(%edi) 714 addl %ebp,%ebx 715 movb (%esi),%al 716 adcl advancetable+4(,%ecx,4),%esi 717 addl tstep,%edx 718LLEntry6_8: 719 sbbl %ecx,%ecx 720 movb %al,3(%edi) 721 addl %ebp,%ebx 722 movb (%esi),%al 723 adcl advancetable+4(,%ecx,4),%esi 724 addl tstep,%edx 725LLEntry5_8: 726 sbbl %ecx,%ecx 727 movb %al,4(%edi) 728 addl %ebp,%ebx 729 movb (%esi),%al 730 adcl advancetable+4(,%ecx,4),%esi 731 addl tstep,%edx 732LLEntry4_8: 733 sbbl %ecx,%ecx 734 movb %al,5(%edi) 735 addl %ebp,%ebx 736 movb (%esi),%al 737 adcl advancetable+4(,%ecx,4),%esi 738LLEntry3_8: 739 movb %al,6(%edi) 740 movb (%esi),%al 741LLEntry2_8: 742 743LEndSpan: 744 745// 746// clear s/z, t/z, 1/z from FP stack 747// 748 fstp %st(0) 749 fstp %st(0) 750 fstp %st(0) 751 752 movl pspantemp,%ebx // restore spans pointer 753 movl espan_t_pnext(%ebx),%ebx // point to next span 754 testl %ebx,%ebx // any more spans? 755 movb %al,7(%edi) 756 jnz LSpanLoop // more spans 757 758 popl %ebx // restore register variables 759 popl %esi 760 popl %edi 761 popl %ebp // restore the caller's stack frame 762 ret 763 764//---------------------------------------------------------------------- 765// 8-bpp horizontal span z drawing codefor polygons, with no transparency. 766// 767// Assumes there is at least one span in pzspans, and that every span 768// contains at least one pixel 769//---------------------------------------------------------------------- 770 771 .text 772 773// z-clamp on a non-negative gradient span 774LClamp: 775 movl $0x40000000,%edx 776 xorl %ebx,%ebx 777 fstp %st(0) 778 jmp LZDraw 779 780// z-clamp on a negative gradient span 781LClampNeg: 782 movl $0x40000000,%edx 783 xorl %ebx,%ebx 784 fstp %st(0) 785 jmp LZDrawNeg 786 787 788#define pzspans 4+16 789 790.globl C(D_DrawZSpans) 791C(D_DrawZSpans): 792 pushl %ebp // preserve caller's stack frame 793 pushl %edi 794 pushl %esi // preserve register variables 795 pushl %ebx 796 797 flds C(d_zistepu) 798 movl C(d_zistepu),%eax 799 movl pzspans(%esp),%esi 800 testl %eax,%eax 801 jz LFNegSpan 802 803 fmuls Float2ToThe31nd 804 fistpl izistep // note: we are relying on FP exceptions being turned 805 // off here to avoid range problems 806 movl izistep,%ebx // remains loaded for all spans 807 808LFSpanLoop: 809// set up the initial 1/z value 810 fildl espan_t_v(%esi) 811 fildl espan_t_u(%esi) 812 movl espan_t_v(%esi),%ecx 813 movl C(d_pzbuffer),%edi 814 fmuls C(d_zistepu) 815 fxch %st(1) 816 fmuls C(d_zistepv) 817 fxch %st(1) 818 fadds C(d_ziorigin) 819 imull C(d_zrowbytes),%ecx 820 faddp %st(0),%st(1) 821 822// clamp if z is nearer than 2 (1/z > 0.5) 823 fcoms float_point5 824 addl %ecx,%edi 825 movl espan_t_u(%esi),%edx 826 addl %edx,%edx // word count 827 movl espan_t_count(%esi),%ecx 828 addl %edx,%edi // pdest = &pdestspan[scans->u]; 829 pushl %esi // preserve spans pointer 830 fnstsw %ax 831 testb $0x45,%ah 832 jz LClamp 833 834 fmuls Float2ToThe31nd 835 fistpl izi // note: we are relying on FP exceptions being turned 836 // off here to avoid problems when the span is closer 837 // than 1/(2**31) 838 movl izi,%edx 839 840// at this point: 841// %ebx = izistep 842// %ecx = count 843// %edx = izi 844// %edi = pdest 845 846LZDraw: 847 848// do a single pixel up front, if necessary to dword align the destination 849 testl $2,%edi 850 jz LFMiddle 851 movl %edx,%eax 852 addl %ebx,%edx 853 shrl $16,%eax 854 decl %ecx 855 movw %ax,(%edi) 856 addl $2,%edi 857 858// do middle a pair of aligned dwords at a time 859LFMiddle: 860 pushl %ecx 861 shrl $1,%ecx // count / 2 862 jz LFLast // no aligned dwords to do 863 shrl $1,%ecx // (count / 2) / 2 864 jnc LFMiddleLoop // even number of aligned dwords to do 865 866 movl %edx,%eax 867 addl %ebx,%edx 868 shrl $16,%eax 869 movl %edx,%esi 870 addl %ebx,%edx 871 andl $0xFFFF0000,%esi 872 orl %esi,%eax 873 movl %eax,(%edi) 874 addl $4,%edi 875 andl %ecx,%ecx 876 jz LFLast 877 878LFMiddleLoop: 879 movl %edx,%eax 880 addl %ebx,%edx 881 shrl $16,%eax 882 movl %edx,%esi 883 addl %ebx,%edx 884 andl $0xFFFF0000,%esi 885 orl %esi,%eax 886 movl %edx,%ebp 887 movl %eax,(%edi) 888 addl %ebx,%edx 889 shrl $16,%ebp 890 movl %edx,%esi 891 addl %ebx,%edx 892 andl $0xFFFF0000,%esi 893 orl %esi,%ebp 894 movl %ebp,4(%edi) // FIXME: eliminate register contention 895 addl $8,%edi 896 897 decl %ecx 898 jnz LFMiddleLoop 899 900LFLast: 901 popl %ecx // retrieve count 902 popl %esi // retrieve span pointer 903 904// do the last, unaligned pixel, if there is one 905 andl $1,%ecx // is there an odd pixel left to do? 906 jz LFSpanDone // no 907 shrl $16,%edx 908 movw %dx,(%edi) // do the final pixel's z 909 910LFSpanDone: 911 movl espan_t_pnext(%esi),%esi 912 testl %esi,%esi 913 jnz LFSpanLoop 914 915 jmp LFDone 916 917LFNegSpan: 918 fmuls FloatMinus2ToThe31nd 919 fistpl izistep // note: we are relying on FP exceptions being turned 920 // off here to avoid range problems 921 movl izistep,%ebx // remains loaded for all spans 922 923LFNegSpanLoop: 924// set up the initial 1/z value 925 fildl espan_t_v(%esi) 926 fildl espan_t_u(%esi) 927 movl espan_t_v(%esi),%ecx 928 movl C(d_pzbuffer),%edi 929 fmuls C(d_zistepu) 930 fxch %st(1) 931 fmuls C(d_zistepv) 932 fxch %st(1) 933 fadds C(d_ziorigin) 934 imull C(d_zrowbytes),%ecx 935 faddp %st(0),%st(1) 936 937// clamp if z is nearer than 2 (1/z > 0.5) 938 fcoms float_point5 939 addl %ecx,%edi 940 movl espan_t_u(%esi),%edx 941 addl %edx,%edx // word count 942 movl espan_t_count(%esi),%ecx 943 addl %edx,%edi // pdest = &pdestspan[scans->u]; 944 pushl %esi // preserve spans pointer 945 fnstsw %ax 946 testb $0x45,%ah 947 jz LClampNeg 948 949 fmuls Float2ToThe31nd 950 fistpl izi // note: we are relying on FP exceptions being turned 951 // off here to avoid problems when the span is closer 952 // than 1/(2**31) 953 movl izi,%edx 954 955// at this point: 956// %ebx = izistep 957// %ecx = count 958// %edx = izi 959// %edi = pdest 960 961LZDrawNeg: 962 963// do a single pixel up front, if necessary to dword align the destination 964 testl $2,%edi 965 jz LFNegMiddle 966 movl %edx,%eax 967 subl %ebx,%edx 968 shrl $16,%eax 969 decl %ecx 970 movw %ax,(%edi) 971 addl $2,%edi 972 973// do middle a pair of aligned dwords at a time 974LFNegMiddle: 975 pushl %ecx 976 shrl $1,%ecx // count / 2 977 jz LFNegLast // no aligned dwords to do 978 shrl $1,%ecx // (count / 2) / 2 979 jnc LFNegMiddleLoop // even number of aligned dwords to do 980 981 movl %edx,%eax 982 subl %ebx,%edx 983 shrl $16,%eax 984 movl %edx,%esi 985 subl %ebx,%edx 986 andl $0xFFFF0000,%esi 987 orl %esi,%eax 988 movl %eax,(%edi) 989 addl $4,%edi 990 andl %ecx,%ecx 991 jz LFNegLast 992 993LFNegMiddleLoop: 994 movl %edx,%eax 995 subl %ebx,%edx 996 shrl $16,%eax 997 movl %edx,%esi 998 subl %ebx,%edx 999 andl $0xFFFF0000,%esi 1000 orl %esi,%eax 1001 movl %edx,%ebp 1002 movl %eax,(%edi) 1003 subl %ebx,%edx 1004 shrl $16,%ebp 1005 movl %edx,%esi 1006 subl %ebx,%edx 1007 andl $0xFFFF0000,%esi 1008 orl %esi,%ebp 1009 movl %ebp,4(%edi) // FIXME: eliminate register contention 1010 addl $8,%edi 1011 1012 decl %ecx 1013 jnz LFNegMiddleLoop 1014 1015LFNegLast: 1016 popl %ecx // retrieve count 1017 popl %esi // retrieve span pointer 1018 1019// do the last, unaligned pixel, if there is one 1020 andl $1,%ecx // is there an odd pixel left to do? 1021 jz LFNegSpanDone // no 1022 shrl $16,%edx 1023 movw %dx,(%edi) // do the final pixel's z 1024 1025LFNegSpanDone: 1026 movl espan_t_pnext(%esi),%esi 1027 testl %esi,%esi 1028 jnz LFNegSpanLoop 1029 1030LFDone: 1031 popl %ebx // restore register variables 1032 popl %esi 1033 popl %edi 1034 popl %ebp // restore the caller's stack frame 1035 ret 1036 1037#endif // id386 1038