1/* 2Copyright (C) 1996-1997 Id Software, Inc. 3 4This program is free software; you can redistribute it and/or 5modify it under the terms of the GNU General Public License 6as published by the Free Software Foundation; either version 2 7of the License, or (at your option) any later version. 8 9This program is distributed in the hope that it will be useful, 10but WITHOUT ANY WARRANTY; without even the implied warranty of 11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13See the GNU General Public License for more details. 14 15You should have received a copy of the GNU General Public License 16along with this program; if not, write to the Free Software 17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 19*/ 20// 21// d_parta.s 22// x86 assembly-language 8-bpp particle-drawing code. 23// 24 25#include "asm_i386.h" 26#include "quakeasm.h" 27#include "d_ifacea.h" 28#include "asm_draw.h" 29 30#if id386 31 32//---------------------------------------------------------------------- 33// 8-bpp particle drawing code. 34//---------------------------------------------------------------------- 35 36//FIXME: comments, full optimization 37 38//---------------------------------------------------------------------- 39// 8-bpp particle queueing code. 40//---------------------------------------------------------------------- 41 42 .text 43 44#define P 12+4 45 46 .align 4 47.globl C(D_DrawParticle) 48C(D_DrawParticle): 49 pushl %ebp // preserve caller's stack frame 50 pushl %edi // preserve register variables 51 pushl %ebx 52 53 movl P(%esp),%edi 54 55// FIXME: better FP overlap in general here 56 57// transform point 58// VectorSubtract (p->org, r_origin, local); 59 flds C(r_origin) 60 fsubrs pt_org(%edi) 61 flds pt_org+4(%edi) 62 fsubs C(r_origin)+4 63 flds pt_org+8(%edi) 64 fsubs C(r_origin)+8 65 fxch %st(2) // local[0] | local[1] | local[2] 66 67// transformed[2] = DotProduct(local, r_ppn); 68 flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2] 69 fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2] 70 flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2] 71 fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2] 72 flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] | 73 // local[1] | local[2] 74 fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2] 75 fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2] 76 faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] | 77 // local[2] 78 faddp %st(0),%st(1) // z | local[0] | local[1] | local[2] 79 fld %st(0) // z | z | local[0] | local[1] | 80 // local[2] 81 fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2] 82 fxch %st(1) // z | 1/z | local[0] | local[1] | local[2] 83 84// if (transformed[2] < PARTICLE_Z_CLIP) 85// return; 86 fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2] 87 fxch %st(3) // local[2] | local[0] | local[1] | 1/z 88 89 flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z 90 fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z 91 flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] | 92 // local[1] | 1/z 93 94 fnstsw %ax 95 testb $1,%ah 96 jnz LPop6AndDone 97 98// transformed[1] = DotProduct(local, r_pup); 99 fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z 100 flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] | 101 // local[0] | local[1] | 1/z 102 fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] | 103 // local[1] | 1/z 104 fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] | 105 // local[1] | 1/z 106 faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] | 107 // local[1] | 1/z 108 faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z 109 fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z 110 111// transformed[0] = DotProduct(local, r_pright); 112 fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z 113 fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z 114 fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z 115 fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z 116 fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z 117 fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z 118 faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z 119 120 faddp %st(0),%st(1) // x | y | 1/z 121 fxch %st(1) // y | x | 1/z 122 123// project the point 124 fmul %st(2),%st(0) // y/z | x | 1/z 125 fxch %st(1) // x | y/z | 1/z 126 fmul %st(2),%st(0) // x/z | y/z | 1/z 127 fxch %st(1) // y/z | x/z | 1/z 128 fsubrs C(ycenter) // v | x/z | 1/z 129 fxch %st(1) // x/z | v | 1/z 130 fadds C(xcenter) // u | v | 1/z 131// FIXME: preadjust xcenter and ycenter 132 fxch %st(1) // v | u | 1/z 133 fadds float_point5 // v | u | 1/z 134 fxch %st(1) // u | v | 1/z 135 fadds float_point5 // u | v | 1/z 136 fxch %st(2) // 1/z | v | u 137 fmuls DP_32768 // 1/z * 0x8000 | v | u 138 fxch %st(2) // u | v | 1/z * 0x8000 139 140// FIXME: use Terje's fp->int trick here? 141// FIXME: check we're getting proper rounding here 142 fistpl DP_u // v | 1/z * 0x8000 143 fistpl DP_v // 1/z * 0x8000 144 145 movl DP_u,%eax 146 movl DP_v,%edx 147 148// if ((v > d_vrectbottom_particle) || 149// (u > d_vrectright_particle) || 150// (v < d_vrecty) || 151// (u < d_vrectx)) 152// { 153// continue; 154// } 155 156 movl C(d_vrectbottom_particle),%ebx 157 movl C(d_vrectright_particle),%ecx 158 cmpl %ebx,%edx 159 jg LPop1AndDone 160 cmpl %ecx,%eax 161 jg LPop1AndDone 162 movl C(d_vrecty),%ebx 163 movl C(d_vrectx),%ecx 164 cmpl %ebx,%edx 165 jl LPop1AndDone 166 167 cmpl %ecx,%eax 168 jl LPop1AndDone 169 170 flds pt_color(%edi) // color | 1/z * 0x8000 171// FIXME: use Terje's fast fp->int trick? 172 fistpl DP_Color // 1/z * 0x8000 173 174 movl C(d_viewbuffer),%ebx 175 176 addl %eax,%ebx 177 movl C(d_scantable)(,%edx,4),%edi // point to the pixel 178 179 imull C(d_zrowbytes),%edx // point to the z pixel 180 181 leal (%edx,%eax,2),%edx 182 movl C(d_pzbuffer),%eax 183 184 fistpl izi 185 186 addl %ebx,%edi 187 addl %eax,%edx 188 189// pix = izi >> d_pix_shift; 190 191 movl izi,%eax 192 movl C(d_pix_shift),%ecx 193 shrl %cl,%eax 194 movl izi,%ebp 195 196// if (pix < d_pix_min) 197// pix = d_pix_min; 198// else if (pix > d_pix_max) 199// pix = d_pix_max; 200 201 movl C(d_pix_min),%ebx 202 movl C(d_pix_max),%ecx 203 cmpl %ebx,%eax 204 jnl LTestPixMax 205 movl %ebx,%eax 206 jmp LTestDone 207 208LTestPixMax: 209 cmpl %ecx,%eax 210 jng LTestDone 211 movl %ecx,%eax 212LTestDone: 213 214 movb DP_Color,%ch 215 216 movl C(d_y_aspect_shift),%ebx 217 testl %ebx,%ebx 218 jnz LDefault 219 220 cmpl $4,%eax 221 ja LDefault 222 223 jmp DP_EntryTable-4(,%eax,4) 224 225// 1x1 226.globl DP_1x1 227DP_1x1: 228 cmpw %bp,(%edx) // just one pixel to do 229 jg LDone 230 movw %bp,(%edx) 231 movb %ch,(%edi) 232 jmp LDone 233 234// 2x2 235.globl DP_2x2 236DP_2x2: 237 pushl %esi 238 movl C(screenwidth),%ebx 239 movl C(d_zrowbytes),%esi 240 241 cmpw %bp,(%edx) 242 jg L2x2_1 243 movw %bp,(%edx) 244 movb %ch,(%edi) 245L2x2_1: 246 cmpw %bp,2(%edx) 247 jg L2x2_2 248 movw %bp,2(%edx) 249 movb %ch,1(%edi) 250L2x2_2: 251 cmpw %bp,(%edx,%esi,1) 252 jg L2x2_3 253 movw %bp,(%edx,%esi,1) 254 movb %ch,(%edi,%ebx,1) 255L2x2_3: 256 cmpw %bp,2(%edx,%esi,1) 257 jg L2x2_4 258 movw %bp,2(%edx,%esi,1) 259 movb %ch,1(%edi,%ebx,1) 260L2x2_4: 261 262 popl %esi 263 jmp LDone 264 265// 3x3 266.globl DP_3x3 267DP_3x3: 268 pushl %esi 269 movl C(screenwidth),%ebx 270 movl C(d_zrowbytes),%esi 271 272 cmpw %bp,(%edx) 273 jg L3x3_1 274 movw %bp,(%edx) 275 movb %ch,(%edi) 276L3x3_1: 277 cmpw %bp,2(%edx) 278 jg L3x3_2 279 movw %bp,2(%edx) 280 movb %ch,1(%edi) 281L3x3_2: 282 cmpw %bp,4(%edx) 283 jg L3x3_3 284 movw %bp,4(%edx) 285 movb %ch,2(%edi) 286L3x3_3: 287 288 cmpw %bp,(%edx,%esi,1) 289 jg L3x3_4 290 movw %bp,(%edx,%esi,1) 291 movb %ch,(%edi,%ebx,1) 292L3x3_4: 293 cmpw %bp,2(%edx,%esi,1) 294 jg L3x3_5 295 movw %bp,2(%edx,%esi,1) 296 movb %ch,1(%edi,%ebx,1) 297L3x3_5: 298 cmpw %bp,4(%edx,%esi,1) 299 jg L3x3_6 300 movw %bp,4(%edx,%esi,1) 301 movb %ch,2(%edi,%ebx,1) 302L3x3_6: 303 304 cmpw %bp,(%edx,%esi,2) 305 jg L3x3_7 306 movw %bp,(%edx,%esi,2) 307 movb %ch,(%edi,%ebx,2) 308L3x3_7: 309 cmpw %bp,2(%edx,%esi,2) 310 jg L3x3_8 311 movw %bp,2(%edx,%esi,2) 312 movb %ch,1(%edi,%ebx,2) 313L3x3_8: 314 cmpw %bp,4(%edx,%esi,2) 315 jg L3x3_9 316 movw %bp,4(%edx,%esi,2) 317 movb %ch,2(%edi,%ebx,2) 318L3x3_9: 319 320 popl %esi 321 jmp LDone 322 323 324// 4x4 325.globl DP_4x4 326DP_4x4: 327 pushl %esi 328 movl C(screenwidth),%ebx 329 movl C(d_zrowbytes),%esi 330 331 cmpw %bp,(%edx) 332 jg L4x4_1 333 movw %bp,(%edx) 334 movb %ch,(%edi) 335L4x4_1: 336 cmpw %bp,2(%edx) 337 jg L4x4_2 338 movw %bp,2(%edx) 339 movb %ch,1(%edi) 340L4x4_2: 341 cmpw %bp,4(%edx) 342 jg L4x4_3 343 movw %bp,4(%edx) 344 movb %ch,2(%edi) 345L4x4_3: 346 cmpw %bp,6(%edx) 347 jg L4x4_4 348 movw %bp,6(%edx) 349 movb %ch,3(%edi) 350L4x4_4: 351 352 cmpw %bp,(%edx,%esi,1) 353 jg L4x4_5 354 movw %bp,(%edx,%esi,1) 355 movb %ch,(%edi,%ebx,1) 356L4x4_5: 357 cmpw %bp,2(%edx,%esi,1) 358 jg L4x4_6 359 movw %bp,2(%edx,%esi,1) 360 movb %ch,1(%edi,%ebx,1) 361L4x4_6: 362 cmpw %bp,4(%edx,%esi,1) 363 jg L4x4_7 364 movw %bp,4(%edx,%esi,1) 365 movb %ch,2(%edi,%ebx,1) 366L4x4_7: 367 cmpw %bp,6(%edx,%esi,1) 368 jg L4x4_8 369 movw %bp,6(%edx,%esi,1) 370 movb %ch,3(%edi,%ebx,1) 371L4x4_8: 372 373 leal (%edx,%esi,2),%edx 374 leal (%edi,%ebx,2),%edi 375 376 cmpw %bp,(%edx) 377 jg L4x4_9 378 movw %bp,(%edx) 379 movb %ch,(%edi) 380L4x4_9: 381 cmpw %bp,2(%edx) 382 jg L4x4_10 383 movw %bp,2(%edx) 384 movb %ch,1(%edi) 385L4x4_10: 386 cmpw %bp,4(%edx) 387 jg L4x4_11 388 movw %bp,4(%edx) 389 movb %ch,2(%edi) 390L4x4_11: 391 cmpw %bp,6(%edx) 392 jg L4x4_12 393 movw %bp,6(%edx) 394 movb %ch,3(%edi) 395L4x4_12: 396 397 cmpw %bp,(%edx,%esi,1) 398 jg L4x4_13 399 movw %bp,(%edx,%esi,1) 400 movb %ch,(%edi,%ebx,1) 401L4x4_13: 402 cmpw %bp,2(%edx,%esi,1) 403 jg L4x4_14 404 movw %bp,2(%edx,%esi,1) 405 movb %ch,1(%edi,%ebx,1) 406L4x4_14: 407 cmpw %bp,4(%edx,%esi,1) 408 jg L4x4_15 409 movw %bp,4(%edx,%esi,1) 410 movb %ch,2(%edi,%ebx,1) 411L4x4_15: 412 cmpw %bp,6(%edx,%esi,1) 413 jg L4x4_16 414 movw %bp,6(%edx,%esi,1) 415 movb %ch,3(%edi,%ebx,1) 416L4x4_16: 417 418 popl %esi 419 jmp LDone 420 421// default case, handling any size particle 422LDefault: 423 424// count = pix << d_y_aspect_shift; 425 426 movl %eax,%ebx 427 movl %eax,DP_Pix 428 movb C(d_y_aspect_shift),%cl 429 shll %cl,%ebx 430 431// for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth) 432// { 433// for (i=0 ; i<pix ; i++) 434// { 435// if (pz[i] <= izi) 436// { 437// pz[i] = izi; 438// pdest[i] = color; 439// } 440// } 441// } 442 443LGenRowLoop: 444 movl DP_Pix,%eax 445 446LGenColLoop: 447 cmpw %bp,-2(%edx,%eax,2) 448 jg LGSkip 449 movw %bp,-2(%edx,%eax,2) 450 movb %ch,-1(%edi,%eax,1) 451LGSkip: 452 decl %eax // --pix 453 jnz LGenColLoop 454 455 addl C(d_zrowbytes),%edx 456 addl C(screenwidth),%edi 457 458 decl %ebx // --count 459 jnz LGenRowLoop 460 461LDone: 462 popl %ebx // restore register variables 463 popl %edi 464 popl %ebp // restore the caller's stack frame 465 ret 466 467LPop6AndDone: 468 fstp %st(0) 469 fstp %st(0) 470 fstp %st(0) 471 fstp %st(0) 472 fstp %st(0) 473LPop1AndDone: 474 fstp %st(0) 475 jmp LDone 476 477#endif // id386 478