1/* 2Copyright (C) 1996-1997 Id Software, Inc. 3 4This program is free software; you can redistribute it and/or 5modify it under the terms of the GNU General Public License 6as published by the Free Software Foundation; either version 2 7of the License, or (at your option) any later version. 8 9This program is distributed in the hope that it will be useful, 10but WITHOUT ANY WARRANTY; without even the implied warranty of 11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13See the GNU General Public License for more details. 14 15You should have received a copy of the GNU General Public License 16along with this program; if not, write to the Free Software 17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 19*/ 20// 21// d_polysa.s 22// x86 assembly-language polygon model drawing code 23// 24 25#include "asm_i386.h" 26#include "quakeasm.h" 27#include "asm_draw.h" 28#include "d_ifacea.h" 29 30#if id386 31 32// !!! if this is changed, it must be changed in d_polyse.c too !!! 33#define DPS_MAXSPANS MAXHEIGHT+1 34 // 1 extra for spanpackage that marks end 35 36//#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size) 37#define SPAN_SIZE (1024+1+1+1)*32 38 39 40 .data 41 42 .align 4 43p10_minus_p20: .single 0 44p01_minus_p21: .single 0 45temp0: .single 0 46temp1: .single 0 47Ltemp: .single 0 48 49aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5 50 .long LDraw4, LDraw3, LDraw2, LDraw1 51 52lzistepx: .long 0 53 54 55 .text 56 57#ifndef NeXT 58 .extern C(D_PolysetSetEdgeTable) 59 .extern C(D_RasterizeAliasPolySmooth) 60#endif 61 62//---------------------------------------------------------------------- 63// affine triangle gradient calculation code 64//---------------------------------------------------------------------- 65 66#define skinwidth 4+0 67 68.globl C(D_PolysetCalcGradients) 69C(D_PolysetCalcGradients): 70 71// p00_minus_p20 = r_p0[0] - r_p2[0]; 72// p01_minus_p21 = r_p0[1] - r_p2[1]; 73// p10_minus_p20 = r_p1[0] - r_p2[0]; 74// p11_minus_p21 = r_p1[1] - r_p2[1]; 75// 76// xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 - 77// p00_minus_p20 * p11_minus_p21); 78// 79// ystepdenominv = -xstepdenominv; 80 81 fildl C(r_p0)+0 // r_p0[0] 82 fildl C(r_p2)+0 // r_p2[0] | r_p0[0] 83 fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0] 84 fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] 85 fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] 86 fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] | 87 // r_p2[0] | r_p0[0] 88 fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] | 89 // r_p2[0] | r_p0[0] 90 fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] | 91 // r_p2[0] | r_p0[0] 92 fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] | 93 // r_p2[0] | r_p0[0] 94 fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] | 95 // r_p1[1] | r_p2[0] | r_p0[0] 96 fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] | 97 // r_p1[1] | r_p2[0] | p10_minus_p20 98 fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] | 99 // p00_minus_p20 | p10_minus_p20 100 fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 | 101 // p00_minus_p20 | p10_minus_p20 102 fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 | 103 // p00_minus_p20 | p10_minus_p20 104 fxch %st(1) // p01_minus_p21 | p11_minus_p21 | 105 // p00_minus_p20 | p10_minus_p20 106 flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 | 107 // p00_minus_p20 | p10_minus_p20 108 fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 | 109 // p00_minus_p20 | d_xdenom 110 fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 | 111 // p00_minus_p20 | d_xdenom 112 fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv 113 fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21 114 115//// ceil () for light so positive steps are exaggerated, negative steps 116//// diminished, pushing us away from underflow toward overflow. Underflow is 117//// very visible, overflow is very unlikely, because of ambient lighting 118// t0 = r_p0[4] - r_p2[4]; 119// t1 = r_p1[4] - r_p2[4]; 120 121 fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 | 122 // p11_minus_p21 123 fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv | 124 // p00_minus_p20 | p11_minus_p21 125 fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv | 126 // p00_minus_p20 | p11_minus_p21 127 fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv | 128 // p00_minus_p20 | p11_minus_p21 129 fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] | 130 // xstepdenominv | p00_minus_p20 | p11_minus_p21 131 fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv | 132 // p00_minus_p20 | p11_minus_p21 133 fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 | 134 // p11_minus_p21 135 136// r_lstepx = (int) 137// ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv); 138// r_lstepy = (int) 139// ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv); 140 141 fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 | 142 // p11_minus_p21 143 fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv | 144 // p00_minus_p20 | p11_minus_p21 145 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv | 146 // p00_minus_p20 | p11_minus_p21 147 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | 148 // xstepdenominv | p00_minus_p20 | p11_minus_p21 149 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | 150 // xstepdenominv | p00_minus_p20 | p11_minus_p21 151 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | 152 // xstepdenominv | p00_minus_p20 | p11_minus_p21 153 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | 154 // t0*p11_minus_p21 | xstepdenominv | 155 // p00_minus_p20 | p11_minus_p21 156 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | 157 // t0*p11_minus_p21 | xstepdenominv | 158 // p00_minus_p20 | p11_minus_p21 159 fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | 160 // t1*p01_minus_p21 | t0*p11_minus_p21 | 161 // xstepdenominv | p00_minus_p20 | p11_minus_p21 162 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | 163 // t1*p00_minus_p20 | t0*p11_minus_p21 | 164 // xstepdenominv | p00_minus_p20 | p11_minus_p21 165 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | 166 // t1*p01_minus_p21 - t0*p11_minus_p21 | 167 // xstepdenominv | p00_minus_p20 | p11_minus_p21 168 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | 169 // t1*p01_minus_p21 - t0*p11_minus_p21 | 170 // xstepdenominv | p00_minus_p20 | p11_minus_p21 171 fld %st(2) // xstepdenominv | 172 // t1*p00_minus_p20 - t0*p10_minus_p20 | 173 // t1*p01_minus_p21 - t0*p11_minus_p21 | 174 // xstepdenominv | p00_minus_p20 | p11_minus_p21 175 fmuls float_minus_1 // ystepdenominv | 176 // t1*p00_minus_p20 - t0*p10_minus_p20 | 177 // t1*p01_minus_p21 - t0*p11_minus_p21 | 178 // xstepdenominv | p00_minus_p20 | p11_minus_p21 179 fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 | 180 // t1*p00_minus_p20 - t0*p10_minus_p20 | 181 // ystepdenominv | xstepdenominv | p00_minus_p20 | 182 // p11_minus_p21 183 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* 184 // xstepdenominv | 185 // t1*p00_minus_p20 - t0*p10_minus_p20 | 186 // | ystepdenominv | xstepdenominv | 187 // p00_minus_p20 | p11_minus_p21 188 fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | 189 // (t1*p01_minus_p21 - t0*p11_minus_p21)* 190 // xstepdenominv | ystepdenominv | 191 // xstepdenominv | p00_minus_p20 | p11_minus_p21 192 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 193 // ystepdenominv | 194 // (t1*p01_minus_p21 - t0*p11_minus_p21)* 195 // xstepdenominv | ystepdenominv | 196 // xstepdenominv | p00_minus_p20 | p11_minus_p21 197 fldcw ceil_cw 198 fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv | 199 // p00_minus_p20 | p11_minus_p21 200 fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | 201 // p11_minus_p21 202 fldcw single_cw 203 204// t0 = r_p0[2] - r_p2[2]; 205// t1 = r_p1[2] - r_p2[2]; 206 207 fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv | 208 // p00_minus_p20 | p11_minus_p21 209 fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv | 210 // xstepdenominv | p00_minus_p20 | p11_minus_p21 211 fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv | 212 // xstepdenominv | p00_minus_p20 | p11_minus_p21 213 fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv | 214 // xstepdenominv | p00_minus_p20 | p11_minus_p21 215 fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] | 216 // ystepdenominv | xstepdenominv | p00_minus_p20 | 217 // p11_minus_p21 218 fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv | 219 // xstepdenominv | p00_minus_p20 | p11_minus_p21 220 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | 221 // p00_minus_p20 | p11_minus_p21 222 223// r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * 224// xstepdenominv); 225// r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * 226// ystepdenominv); 227 228 fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv 229 fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | 230 // xstepdenominv | p00_minus_p20 | p11_minus_p21 231 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | 232 // xstepdenominv | p00_minus_p20 | p11_minus_p21 233 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | 234 // ystepdenominv | xstepdenominv | p00_minus_p20 | 235 // p11_minus_p21 236 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | 237 // ystepdenominv | xstepdenominv | p00_minus_p20 | 238 // p11_minus_p21 239 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | 240 // ystepdenominv | xstepdenominv | p00_minus_p20 | 241 // p11_minus_p21 242 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | 243 // t0*p11_minus_p21 | ystepdenominv | 244 // xstepdenominv | p00_minus_p20 | p11_minus_p21 245 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | 246 // t0*p11_minus_p21 | ystepdenominv | 247 // xstepdenominv | p00_minus_p20 | p11_minus_p21 248 fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | 249 // t1*p01_minus_p21 | t0*p11_minus_p21 | 250 // ystepdenominv | xstepdenominv | p00_minus_p20 | 251 // p11_minus_p21 252 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | 253 // t1*p00_minus_p20 | t0*p11_minus_p21 | 254 // ystepdenominv | xstepdenominv | p00_minus_p20 | 255 // p11_minus_p21 256 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | 257 // t1*p01_minus_p21 - t0*p11_minus_p21 | 258 // ystepdenominv | xstepdenominv | p00_minus_p20 | 259 // p11_minus_p21 260 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | 261 // t1*p01_minus_p21 - t0*p11_minus_p21 | 262 // ystepdenominv | xstepdenominv | p00_minus_p20 | 263 // p11_minus_p21 264 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 265 // ystepdenominv | 266 // t1*p01_minus_p21 - t0*p11_minus_p21 | 267 // ystepdenominv | xstepdenominv | p00_minus_p20 | 268 // p11_minus_p21 269 fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | 270 // (t1*p00_minus_p20 - t0*p10_minus_p20)* 271 // ystepdenominv | ystepdenominv | 272 // xstepdenominv | p00_minus_p20 | p11_minus_p21 273 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* 274 // xstepdenominv | 275 // (t1*p00_minus_p20 - t0*p10_minus_p20)* 276 // ystepdenominv | ystepdenominv | 277 // xstepdenominv | p00_minus_p20 | p11_minus_p21 278 fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 279 // ystepdenominv | 280 // (t1*p01_minus_p21 - t0*p11_minus_p21)* 281 // xstepdenominv | ystepdenominv | 282 // xstepdenominv | p00_minus_p20 | p11_minus_p21 283 fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv | 284 // p00_minus_p20 | p11_minus_p21 285 fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | 286 // p11_minus_p21 287 288// t0 = r_p0[3] - r_p2[3]; 289// t1 = r_p1[3] - r_p2[3]; 290 291 fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv | 292 // p00_minus_p20 | p11_minus_p21 293 fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv | 294 // xstepdenominv | p00_minus_p20 | p11_minus_p21 295 fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv | 296 // xstepdenominv | p00_minus_p20 | p11_minus_p21 297 fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv | 298 // xstepdenominv | p00_minus_p20 | p11_minus_p21 299 fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] | 300 // ystepdenominv | xstepdenominv | p00_minus_p20 | 301 // p11_minus_p21 302 fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv | 303 // xstepdenominv | p00_minus_p20 | p11_minus_p21 304 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | 305 // p00_minus_p20 | p11_minus_p21 306 307// r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * 308// xstepdenominv); 309// r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * 310// ystepdenominv); 311 312 fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | 313 // p00_minus_p20 | p11_minus_p21 314 fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | 315 // xstepdenominv | p00_minus_p20 | p11_minus_p21 316 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | 317 // xstepdenominv | p00_minus_p20 | p11_minus_p21 318 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | 319 // ystepdenominv | xstepdenominv | p00_minus_p20 | 320 // p11_minus_p21 321 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | 322 // ystepdenominv | xstepdenominv | p00_minus_p20 | 323 // p11_minus_p21 324 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | 325 // ystepdenominv | xstepdenominv | p00_minus_p20 | 326 // p11_minus_p21 327 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | 328 // t0*p11_minus_p21 | ystepdenominv | 329 // xstepdenominv | p00_minus_p20 | p11_minus_p21 330 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | 331 // t0*p11_minus_p21 | ystepdenominv | 332 // xstepdenominv | p00_minus_p20 | p11_minus_p21 333 fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | 334 // t1*p01_minus_p21 | t0*p11_minus_p21 | 335 // ystepdenominv | xstepdenominv | p00_minus_p20 | 336 // p11_minus_p21 337 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | 338 // t1*p00_minus_p20 | t0*p11_minus_p21 | 339 // ystepdenominv | xstepdenominv | p00_minus_p20 | 340 // p11_minus_p21 341 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | 342 // t1*p01_minus_p21 - t0*p11_minus_p21 | 343 // ystepdenominv | xstepdenominv | p00_minus_p20 | 344 // p11_minus_p21 345 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | 346 // t1*p01_minus_p21 - t0*p11_minus_p21 | 347 // ystepdenominv | xstepdenominv | p00_minus_p20 | 348 // p11_minus_p21 349 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 350 // ystepdenominv | 351 // t1*p01_minus_p21 - t0*p11_minus_p21 | 352 // ystepdenominv | xstepdenominv | p00_minus_p20 | 353 // p11_minus_p21 354 fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | 355 // (t1*p00_minus_p20 - t0*p10_minus_p20)* 356 // ystepdenominv | ystepdenominv | 357 // xstepdenominv | p00_minus_p20 | p11_minus_p21 358 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* 359 // xstepdenominv | 360 // (t1*p00_minus_p20 - t0*p10_minus_p20)* 361 // ystepdenominv | ystepdenominv | 362 // xstepdenominv | p00_minus_p20 | p11_minus_p21 363 fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 364 // ystepdenominv | 365 // (t1*p01_minus_p21 - t0*p11_minus_p21)* 366 // xstepdenominv | ystepdenominv | 367 // xstepdenominv | p00_minus_p20 | p11_minus_p21 368 fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv | 369 // p00_minus_p20 | p11_minus_p21 370 fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | 371 // p11_minus_p21 372 373// t0 = r_p0[5] - r_p2[5]; 374// t1 = r_p1[5] - r_p2[5]; 375 376 fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv | 377 // p00_minus_p20 | p11_minus_p21 378 fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv | 379 // xstepdenominv | p00_minus_p20 | p11_minus_p21 380 fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv | 381 // xstepdenominv | p00_minus_p20 | p11_minus_p21 382 fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv | 383 // xstepdenominv | p00_minus_p20 | p11_minus_p21 384 fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] | 385 // ystepdenominv | xstepdenominv | p00_minus_p20 | 386 // p11_minus_p21 387 fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv | 388 // xstepdenominv | p00_minus_p20 | p11_minus_p21 389 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | 390 // p00_minus_p20 | p11_minus_p21 391 392// r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * 393// xstepdenominv); 394// r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * 395// ystepdenominv); 396 397 fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | 398 // p00_minus_p20 | p11_minus_p21 399 fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv | 400 // p00_minus_p20 | t0*p11_minus_p21 401 fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv | 402 // p00_minus_p20 | t0*p11_minus_p21 403 fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv | 404 // p00_minus_p20 | t0*p11_minus_p21 405 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv | 406 // xstepdenominv | p00_minus_p20 | 407 // t0*p11_minus_p21 408 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv | 409 // xstepdenominv | p00_minus_p20 | 410 // t0*p11_minus_p21 411 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | 412 // ystepdenominv | xstepdenominv | p00_minus_p20 | 413 // t0*p11_minus_p21 414 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | 415 // ystepdenominv | xstepdenominv | p00_minus_p20 | 416 // t0*p11_minus_p21 417 fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 | 418 // ystepdenominv | xstepdenominv | 419 // t1*p00_minus_p20 | t0*p11_minus_p21 420 fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 | 421 // ystepdenominv | xstepdenominv | 422 // t1*p00_minus_p20 | t0*p10_minus_p20 423 fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | 424 // ystepdenominv | xstepdenominv | 425 // t1*p00_minus_p20 | t0*p10_minus_p20 426 fxch %st(3) // t1*p00_minus_p20 | ystepdenominv | 427 // xstepdenominv | 428 // t1*p01_minus_p21 - t0*p11_minus_p21 | 429 // t0*p10_minus_p20 430 fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv | 431 // t1*p01_minus_p21 - t0*p11_minus_p21 | 432 // t1*p00_minus_p20 - t0*p10_minus_p20 433 fxch %st(1) // xstepdenominv | ystepdenominv | 434 // t1*p01_minus_p21 - t0*p11_minus_p21 | 435 // t1*p00_minus_p20 - t0*p10_minus_p20 436 fmulp %st(0),%st(2) // ystepdenominv | 437 // (t1*p01_minus_p21 - t0*p11_minus_p21) * 438 // xstepdenominv | 439 // t1*p00_minus_p20 - t0*p10_minus_p20 440 fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) * 441 // xstepdenominv | 442 // (t1*p00_minus_p20 - t0*p10_minus_p20) * 443 // ystepdenominv 444 fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) * 445 // ystepdenominv 446 fistpl C(r_zistepy) 447 448// a_sstepxfrac = r_sstepx << 16; 449// a_tstepxfrac = r_tstepx << 16; 450// 451// a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) + 452// (r_sstepx >> 16); 453 454 movl C(r_sstepx),%eax 455 movl C(r_tstepx),%edx 456 shll $16,%eax 457 shll $16,%edx 458 movl %eax,C(a_sstepxfrac) 459 movl %edx,C(a_tstepxfrac) 460 461 movl C(r_sstepx),%ecx 462 movl C(r_tstepx),%eax 463 sarl $16,%ecx 464 sarl $16,%eax 465 imull skinwidth(%esp) 466 addl %ecx,%eax 467 movl %eax,C(a_ststepxwhole) 468 469 ret 470 471 472//---------------------------------------------------------------------- 473// recursive subdivision affine triangle drawing code 474// 475// not C-callable because of stdcall return 476//---------------------------------------------------------------------- 477 478#define lp1 4+16 479#define lp2 8+16 480#define lp3 12+16 481 482.globl C(D_PolysetRecursiveTriangle) 483C(D_PolysetRecursiveTriangle): 484 pushl %ebp // preserve caller stack frame pointer 485 pushl %esi // preserve register variables 486 pushl %edi 487 pushl %ebx 488 489// int *temp; 490// int d; 491// int new[6]; 492// int i; 493// int z; 494// short *zbuf; 495 movl lp2(%esp),%esi 496 movl lp1(%esp),%ebx 497 movl lp3(%esp),%edi 498 499// d = lp2[0] - lp1[0]; 500// if (d < -1 || d > 1) 501// goto split; 502 movl 0(%esi),%eax 503 504 movl 0(%ebx),%edx 505 movl 4(%esi),%ebp 506 507 subl %edx,%eax 508 movl 4(%ebx),%ecx 509 510 subl %ecx,%ebp 511 incl %eax 512 513 cmpl $2,%eax 514 ja LSplit 515 516// d = lp2[1] - lp1[1]; 517// if (d < -1 || d > 1) 518// goto split; 519 movl 0(%edi),%eax 520 incl %ebp 521 522 cmpl $2,%ebp 523 ja LSplit 524 525// d = lp3[0] - lp2[0]; 526// if (d < -1 || d > 1) 527// goto split2; 528 movl 0(%esi),%edx 529 movl 4(%edi),%ebp 530 531 subl %edx,%eax 532 movl 4(%esi),%ecx 533 534 subl %ecx,%ebp 535 incl %eax 536 537 cmpl $2,%eax 538 ja LSplit2 539 540// d = lp3[1] - lp2[1]; 541// if (d < -1 || d > 1) 542// goto split2; 543 movl 0(%ebx),%eax 544 incl %ebp 545 546 cmpl $2,%ebp 547 ja LSplit2 548 549// d = lp1[0] - lp3[0]; 550// if (d < -1 || d > 1) 551// goto split3; 552 movl 0(%edi),%edx 553 movl 4(%ebx),%ebp 554 555 subl %edx,%eax 556 movl 4(%edi),%ecx 557 558 subl %ecx,%ebp 559 incl %eax 560 561 incl %ebp 562 movl %ebx,%edx 563 564 cmpl $2,%eax 565 ja LSplit3 566 567// d = lp1[1] - lp3[1]; 568// if (d < -1 || d > 1) 569// { 570//split3: 571// temp = lp1; 572// lp3 = lp2; 573// lp1 = lp3; 574// lp2 = temp; 575// goto split; 576// } 577// 578// return; // entire tri is filled 579// 580 cmpl $2,%ebp 581 jna LDone 582 583LSplit3: 584 movl %edi,%ebx 585 movl %esi,%edi 586 movl %edx,%esi 587 jmp LSplit 588 589//split2: 590LSplit2: 591 592// temp = lp1; 593// lp1 = lp2; 594// lp2 = lp3; 595// lp3 = temp; 596 movl %ebx,%eax 597 movl %esi,%ebx 598 movl %edi,%esi 599 movl %eax,%edi 600 601//split: 602LSplit: 603 604 subl $24,%esp // allocate space for a new vertex 605 606//// split this edge 607// new[0] = (lp1[0] + lp2[0]) >> 1; 608// new[1] = (lp1[1] + lp2[1]) >> 1; 609// new[2] = (lp1[2] + lp2[2]) >> 1; 610// new[3] = (lp1[3] + lp2[3]) >> 1; 611// new[5] = (lp1[5] + lp2[5]) >> 1; 612 movl 8(%ebx),%eax 613 614 movl 8(%esi),%edx 615 movl 12(%ebx),%ecx 616 617 addl %edx,%eax 618 movl 12(%esi),%edx 619 620 sarl $1,%eax 621 addl %edx,%ecx 622 623 movl %eax,8(%esp) 624 movl 20(%ebx),%eax 625 626 sarl $1,%ecx 627 movl 20(%esi),%edx 628 629 movl %ecx,12(%esp) 630 addl %edx,%eax 631 632 movl 0(%ebx),%ecx 633 movl 0(%esi),%edx 634 635 sarl $1,%eax 636 addl %ecx,%edx 637 638 movl %eax,20(%esp) 639 movl 4(%ebx),%eax 640 641 sarl $1,%edx 642 movl 4(%esi),%ebp 643 644 movl %edx,0(%esp) 645 addl %eax,%ebp 646 647 sarl $1,%ebp 648 movl %ebp,4(%esp) 649 650//// draw the point if splitting a leading edge 651// if (lp2[1] > lp1[1]) 652// goto nodraw; 653 cmpl %eax,4(%esi) 654 jg LNoDraw 655 656// if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0])) 657// goto nodraw; 658 movl 0(%esi),%edx 659 jnz LDraw 660 661 cmpl %ecx,%edx 662 jl LNoDraw 663 664LDraw: 665 666// z = new[5] >> 16; 667 movl 20(%esp),%edx 668 movl 4(%esp),%ecx 669 670 sarl $16,%edx 671 movl 0(%esp),%ebp 672 673// zbuf = zspantable[new[1]] + new[0]; 674 movl C(zspantable)(,%ecx,4),%eax 675 676// if (z >= *zbuf) 677// { 678 cmpw (%eax,%ebp,2),%dx 679 jnge LNoDraw 680 681// int pix; 682// 683// *zbuf = z; 684 movw %dx,(%eax,%ebp,2) 685 686// pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]]; 687 movl 12(%esp),%eax 688 689 sarl $16,%eax 690 movl 8(%esp),%edx 691 692 sarl $16,%edx 693 subl %ecx,%ecx 694 695 movl C(skintable)(,%eax,4),%eax 696 movl 4(%esp),%ebp 697 698 movb (%eax,%edx,),%cl 699 movl C(d_pcolormap),%edx 700 701 movb (%edx,%ecx,),%dl 702 movl 0(%esp),%ecx 703 704// d_viewbuffer[d_scantable[new[1]] + new[0]] = pix; 705 movl C(d_scantable)(,%ebp,4),%eax 706 addl %eax,%ecx 707 movl C(d_viewbuffer),%eax 708 movb %dl,(%eax,%ecx,1) 709 710// } 711// 712//nodraw: 713LNoDraw: 714 715//// recursively continue 716// D_PolysetRecursiveTriangle (lp3, lp1, new); 717 pushl %esp 718 pushl %ebx 719 pushl %edi 720 call C(D_PolysetRecursiveTriangle) 721 722// D_PolysetRecursiveTriangle (lp3, new, lp2); 723 movl %esp,%ebx 724 pushl %esi 725 pushl %ebx 726 pushl %edi 727 call C(D_PolysetRecursiveTriangle) 728 addl $24,%esp 729 730LDone: 731 popl %ebx // restore register variables 732 popl %edi 733 popl %esi 734 popl %ebp // restore caller stack frame pointer 735 ret $12 736 737 738//---------------------------------------------------------------------- 739// 8-bpp horizontal span drawing code for affine polygons, with smooth 740// shading and no transparency 741//---------------------------------------------------------------------- 742 743#define pspans 4+8 744 745.globl C(D_PolysetAff8Start) 746C(D_PolysetAff8Start): 747 748.globl C(D_PolysetDrawSpans8) 749C(D_PolysetDrawSpans8): 750 pushl %esi // preserve register variables 751 pushl %ebx 752 753 movl pspans(%esp),%esi // point to the first span descriptor 754 movl C(r_zistepx),%ecx 755 756 pushl %ebp // preserve caller's stack frame 757 pushl %edi 758 759 rorl $16,%ecx // put high 16 bits of 1/z step in low word 760 movl spanpackage_t_count(%esi),%edx 761 762 movl %ecx,lzistepx 763 764LSpanLoop: 765 766// lcount = d_aspancount - pspanpackage->count; 767// 768// errorterm += erroradjustup; 769// if (errorterm >= 0) 770// { 771// d_aspancount += d_countextrastep; 772// errorterm -= erroradjustdown; 773// } 774// else 775// { 776// d_aspancount += ubasestep; 777// } 778 movl C(d_aspancount),%eax 779 subl %edx,%eax 780 781 movl C(erroradjustup),%edx 782 movl C(errorterm),%ebx 783 addl %edx,%ebx 784 js LNoTurnover 785 786 movl C(erroradjustdown),%edx 787 movl C(d_countextrastep),%edi 788 subl %edx,%ebx 789 movl C(d_aspancount),%ebp 790 movl %ebx,C(errorterm) 791 addl %edi,%ebp 792 movl %ebp,C(d_aspancount) 793 jmp LRightEdgeStepped 794 795LNoTurnover: 796 movl C(d_aspancount),%edi 797 movl C(ubasestep),%edx 798 movl %ebx,C(errorterm) 799 addl %edx,%edi 800 movl %edi,C(d_aspancount) 801 802LRightEdgeStepped: 803 cmpl $1,%eax 804 805 jl LNextSpan 806 jz LExactlyOneLong 807 808// 809// set up advancetable 810// 811 movl C(a_ststepxwhole),%ecx 812 movl C(r_affinetridesc)+atd_skinwidth,%edx 813 814 movl %ecx,advancetable+4 // advance base in t 815 addl %edx,%ecx 816 817 movl %ecx,advancetable // advance extra in t 818 movl C(a_tstepxfrac),%ecx 819 820 movw C(r_lstepx),%cx 821 movl %eax,%edx // count 822 823 movl %ecx,tstep 824 addl $7,%edx 825 826 shrl $3,%edx // count of full and partial loops 827 movl spanpackage_t_sfrac(%esi),%ebx 828 829 movw %dx,%bx 830 movl spanpackage_t_pz(%esi),%ecx 831 832 negl %eax 833 834 movl spanpackage_t_pdest(%esi),%edi 835 andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1 836 837 subl %eax,%edi // compensate for hardwired offsets 838 subl %eax,%ecx 839 840 subl %eax,%ecx 841 movl spanpackage_t_tfrac(%esi),%edx 842 843 movw spanpackage_t_light(%esi),%dx 844 movl spanpackage_t_zi(%esi),%ebp 845 846 rorl $16,%ebp // put high 16 bits of 1/z in low word 847 pushl %esi 848 849 movl spanpackage_t_ptex(%esi),%esi 850 jmp aff8entryvec_table(,%eax,4) 851 852// %bx = count of full and partial loops 853// %ebx high word = sfrac 854// %ecx = pz 855// %dx = light 856// %edx high word = tfrac 857// %esi = ptex 858// %edi = pdest 859// %ebp = 1/z 860// tstep low word = C(r_lstepx) 861// tstep high word = C(a_tstepxfrac) 862// C(a_sstepxfrac) low word = 0 863// C(a_sstepxfrac) high word = C(a_sstepxfrac) 864 865LDrawLoop: 866 867// FIXME: do we need to clamp light? We may need at least a buffer bit to 868// keep it from poking into tfrac and causing problems 869 870LDraw8: 871 cmpw (%ecx),%bp 872 jl Lp1 873 xorl %eax,%eax 874 movb %dh,%ah 875 movb (%esi),%al 876 movw %bp,(%ecx) 877 movb 0x12345678(%eax),%al 878LPatch8: 879 movb %al,(%edi) 880Lp1: 881 addl tstep,%edx 882 sbbl %eax,%eax 883 addl lzistepx,%ebp 884 adcl $0,%ebp 885 addl C(a_sstepxfrac),%ebx 886 adcl advancetable+4(,%eax,4),%esi 887 888LDraw7: 889 cmpw 2(%ecx),%bp 890 jl Lp2 891 xorl %eax,%eax 892 movb %dh,%ah 893 movb (%esi),%al 894 movw %bp,2(%ecx) 895 movb 0x12345678(%eax),%al 896LPatch7: 897 movb %al,1(%edi) 898Lp2: 899 addl tstep,%edx 900 sbbl %eax,%eax 901 addl lzistepx,%ebp 902 adcl $0,%ebp 903 addl C(a_sstepxfrac),%ebx 904 adcl advancetable+4(,%eax,4),%esi 905 906LDraw6: 907 cmpw 4(%ecx),%bp 908 jl Lp3 909 xorl %eax,%eax 910 movb %dh,%ah 911 movb (%esi),%al 912 movw %bp,4(%ecx) 913 movb 0x12345678(%eax),%al 914LPatch6: 915 movb %al,2(%edi) 916Lp3: 917 addl tstep,%edx 918 sbbl %eax,%eax 919 addl lzistepx,%ebp 920 adcl $0,%ebp 921 addl C(a_sstepxfrac),%ebx 922 adcl advancetable+4(,%eax,4),%esi 923 924LDraw5: 925 cmpw 6(%ecx),%bp 926 jl Lp4 927 xorl %eax,%eax 928 movb %dh,%ah 929 movb (%esi),%al 930 movw %bp,6(%ecx) 931 movb 0x12345678(%eax),%al 932LPatch5: 933 movb %al,3(%edi) 934Lp4: 935 addl tstep,%edx 936 sbbl %eax,%eax 937 addl lzistepx,%ebp 938 adcl $0,%ebp 939 addl C(a_sstepxfrac),%ebx 940 adcl advancetable+4(,%eax,4),%esi 941 942LDraw4: 943 cmpw 8(%ecx),%bp 944 jl Lp5 945 xorl %eax,%eax 946 movb %dh,%ah 947 movb (%esi),%al 948 movw %bp,8(%ecx) 949 movb 0x12345678(%eax),%al 950LPatch4: 951 movb %al,4(%edi) 952Lp5: 953 addl tstep,%edx 954 sbbl %eax,%eax 955 addl lzistepx,%ebp 956 adcl $0,%ebp 957 addl C(a_sstepxfrac),%ebx 958 adcl advancetable+4(,%eax,4),%esi 959 960LDraw3: 961 cmpw 10(%ecx),%bp 962 jl Lp6 963 xorl %eax,%eax 964 movb %dh,%ah 965 movb (%esi),%al 966 movw %bp,10(%ecx) 967 movb 0x12345678(%eax),%al 968LPatch3: 969 movb %al,5(%edi) 970Lp6: 971 addl tstep,%edx 972 sbbl %eax,%eax 973 addl lzistepx,%ebp 974 adcl $0,%ebp 975 addl C(a_sstepxfrac),%ebx 976 adcl advancetable+4(,%eax,4),%esi 977 978LDraw2: 979 cmpw 12(%ecx),%bp 980 jl Lp7 981 xorl %eax,%eax 982 movb %dh,%ah 983 movb (%esi),%al 984 movw %bp,12(%ecx) 985 movb 0x12345678(%eax),%al 986LPatch2: 987 movb %al,6(%edi) 988Lp7: 989 addl tstep,%edx 990 sbbl %eax,%eax 991 addl lzistepx,%ebp 992 adcl $0,%ebp 993 addl C(a_sstepxfrac),%ebx 994 adcl advancetable+4(,%eax,4),%esi 995 996LDraw1: 997 cmpw 14(%ecx),%bp 998 jl Lp8 999 xorl %eax,%eax 1000 movb %dh,%ah 1001 movb (%esi),%al 1002 movw %bp,14(%ecx) 1003 movb 0x12345678(%eax),%al 1004LPatch1: 1005 movb %al,7(%edi) 1006Lp8: 1007 addl tstep,%edx 1008 sbbl %eax,%eax 1009 addl lzistepx,%ebp 1010 adcl $0,%ebp 1011 addl C(a_sstepxfrac),%ebx 1012 adcl advancetable+4(,%eax,4),%esi 1013 1014 addl $8,%edi 1015 addl $16,%ecx 1016 1017 decw %bx 1018 jnz LDrawLoop 1019 1020 popl %esi // restore spans pointer 1021LNextSpan: 1022 addl $(spanpackage_t_size),%esi // point to next span 1023LNextSpanESISet: 1024 movl spanpackage_t_count(%esi),%edx 1025 cmpl $-999999,%edx // any more spans? 1026 jnz LSpanLoop // yes 1027 1028 popl %edi 1029 popl %ebp // restore the caller's stack frame 1030 popl %ebx // restore register variables 1031 popl %esi 1032 ret 1033 1034 1035// draw a one-long span 1036 1037LExactlyOneLong: 1038 1039 movl spanpackage_t_pz(%esi),%ecx 1040 movl spanpackage_t_zi(%esi),%ebp 1041 1042 rorl $16,%ebp // put high 16 bits of 1/z in low word 1043 movl spanpackage_t_ptex(%esi),%ebx 1044 1045 cmpw (%ecx),%bp 1046 jl LNextSpan 1047 xorl %eax,%eax 1048 movl spanpackage_t_pdest(%esi),%edi 1049 movb spanpackage_t_light+1(%esi),%ah 1050 addl $(spanpackage_t_size),%esi // point to next span 1051 movb (%ebx),%al 1052 movw %bp,(%ecx) 1053 movb 0x12345678(%eax),%al 1054LPatch9: 1055 movb %al,(%edi) 1056 1057 jmp LNextSpanESISet 1058 1059.globl C(D_PolysetAff8End) 1060C(D_PolysetAff8End): 1061 1062 1063#define pcolormap 4 1064 1065.globl C(D_Aff8Patch) 1066C(D_Aff8Patch): 1067 movl pcolormap(%esp),%eax 1068 movl %eax,LPatch1-4 1069 movl %eax,LPatch2-4 1070 movl %eax,LPatch3-4 1071 movl %eax,LPatch4-4 1072 movl %eax,LPatch5-4 1073 movl %eax,LPatch6-4 1074 movl %eax,LPatch7-4 1075 movl %eax,LPatch8-4 1076 movl %eax,LPatch9-4 1077 1078 ret 1079 1080 1081//---------------------------------------------------------------------- 1082// Alias model polygon dispatching code, combined with subdivided affine 1083// triangle drawing code 1084//---------------------------------------------------------------------- 1085 1086.globl C(D_PolysetDraw) 1087C(D_PolysetDraw): 1088 1089// spanpackage_t spans[DPS_MAXSPANS + 1 + 1090// ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1]; 1091// // one extra because of cache line pretouching 1092// 1093// a_spans = (spanpackage_t *) 1094// (((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1)); 1095 subl $(SPAN_SIZE),%esp 1096 movl %esp,%eax 1097 addl $(CACHE_SIZE - 1),%eax 1098 andl $(~(CACHE_SIZE - 1)),%eax 1099 movl %eax,C(a_spans) 1100 1101// if (r_affinetridesc.drawtype) 1102// D_DrawSubdiv (); 1103// else 1104// D_DrawNonSubdiv (); 1105 movl C(r_affinetridesc)+atd_drawtype,%eax 1106 testl %eax,%eax 1107 jz C(D_DrawNonSubdiv) 1108 1109 pushl %ebp // preserve caller stack frame pointer 1110 1111// lnumtriangles = r_affinetridesc.numtriangles; 1112 movl C(r_affinetridesc)+atd_numtriangles,%ebp 1113 1114 pushl %esi // preserve register variables 1115 shll $4,%ebp 1116 1117 pushl %ebx 1118// ptri = r_affinetridesc.ptriangles; 1119 movl C(r_affinetridesc)+atd_ptriangles,%ebx 1120 1121 pushl %edi 1122 1123// mtriangle_t *ptri; 1124// finalvert_t *pfv, *index0, *index1, *index2; 1125// int i; 1126// int lnumtriangles; 1127// int s0, s1, s2; 1128 1129// pfv = r_affinetridesc.pfinalverts; 1130 movl C(r_affinetridesc)+atd_pfinalverts,%edi 1131 1132// for (i=0 ; i<lnumtriangles ; i++) 1133// { 1134 1135Llooptop: 1136 1137// index0 = pfv + ptri[i].vertindex[0]; 1138// index1 = pfv + ptri[i].vertindex[1]; 1139// index2 = pfv + ptri[i].vertindex[2]; 1140 movl mtri_vertindex-16+0(%ebx,%ebp,),%ecx 1141 movl mtri_vertindex-16+4(%ebx,%ebp,),%esi 1142 1143 shll $(fv_shift),%ecx 1144 movl mtri_vertindex-16+8(%ebx,%ebp,),%edx 1145 1146 shll $(fv_shift),%esi 1147 addl %edi,%ecx 1148 1149 shll $(fv_shift),%edx 1150 addl %edi,%esi 1151 1152 addl %edi,%edx 1153 1154// if (((index0->v[1]-index1->v[1]) * 1155// (index0->v[0]-index2->v[0]) - 1156// (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0) 1157// { 1158// continue; 1159// } 1160// 1161// d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00]; 1162 fildl fv_v+4(%ecx) // i0v1 1163 fildl fv_v+4(%esi) // i1v1 | i0v1 1164 fildl fv_v+0(%ecx) // i0v0 | i1v1 | i0v1 1165 fildl fv_v+0(%edx) // i2v0 | i0v0 | i1v1 | i0v1 1166 fxch %st(2) // i1v1 | i0v0 | i2v0 | i0v1 1167 fsubr %st(3),%st(0) // i0v1-i1v1 | i0v0 | i2v0 | i0v1 1168 fildl fv_v+0(%esi) // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1 1169 fxch %st(2) // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1 1170 fsub %st(0),%st(3) // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1 1171 fildl fv_v+4(%edx) // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1 1172 fxch %st(1) // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1 1173 fsubp %st(0),%st(3) // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1 1174 fxch %st(1) // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1 1175 fmulp %st(0),%st(3) // i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1 1176 fsubrp %st(0),%st(3) // i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1 1177 movl fv_v+16(%ecx),%eax 1178 andl $0xFF00,%eax 1179 fmulp %st(0),%st(2) // i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1 1180 addl C(acolormap),%eax 1181 fsubp %st(0),%st(1) // (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1) 1182 movl %eax,C(d_pcolormap) 1183 fstps Ltemp 1184 movl Ltemp,%eax 1185 subl $0x80000001,%eax 1186 jc Lskip 1187 1188// if (ptri[i].facesfront) 1189// { 1190// D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v); 1191 movl mtri_facesfront-16(%ebx,%ebp,),%eax 1192 testl %eax,%eax 1193 jz Lfacesback 1194 1195 pushl %edx 1196 pushl %esi 1197 pushl %ecx 1198 call C(D_PolysetRecursiveTriangle) 1199 1200 subl $16,%ebp 1201 jnz Llooptop 1202 jmp Ldone2 1203 1204// } 1205// else 1206// { 1207Lfacesback: 1208 1209// s0 = index0->v[2]; 1210// s1 = index1->v[2]; 1211// s2 = index2->v[2]; 1212 movl fv_v+8(%ecx),%eax 1213 pushl %eax 1214 movl fv_v+8(%esi),%eax 1215 pushl %eax 1216 movl fv_v+8(%edx),%eax 1217 pushl %eax 1218 pushl %ecx 1219 pushl %edx 1220 1221// if (index0->flags & ALIAS_ONSEAM) 1222// index0->v[2] += r_affinetridesc.seamfixupX16; 1223 movl C(r_affinetridesc)+atd_seamfixupX16,%eax 1224 testl $(ALIAS_ONSEAM),fv_flags(%ecx) 1225 jz Lp11 1226 addl %eax,fv_v+8(%ecx) 1227Lp11: 1228 1229// if (index1->flags & ALIAS_ONSEAM) 1230// index1->v[2] += r_affinetridesc.seamfixupX16; 1231 testl $(ALIAS_ONSEAM),fv_flags(%esi) 1232 jz Lp12 1233 addl %eax,fv_v+8(%esi) 1234Lp12: 1235 1236// if (index2->flags & ALIAS_ONSEAM) 1237// index2->v[2] += r_affinetridesc.seamfixupX16; 1238 testl $(ALIAS_ONSEAM),fv_flags(%edx) 1239 jz Lp13 1240 addl %eax,fv_v+8(%edx) 1241Lp13: 1242 1243// D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v); 1244 pushl %edx 1245 pushl %esi 1246 pushl %ecx 1247 call C(D_PolysetRecursiveTriangle) 1248 1249// index0->v[2] = s0; 1250// index1->v[2] = s1; 1251// index2->v[2] = s2; 1252 popl %edx 1253 popl %ecx 1254 popl %eax 1255 movl %eax,fv_v+8(%edx) 1256 popl %eax 1257 movl %eax,fv_v+8(%esi) 1258 popl %eax 1259 movl %eax,fv_v+8(%ecx) 1260 1261// } 1262// } 1263Lskip: 1264 subl $16,%ebp 1265 jnz Llooptop 1266 1267Ldone2: 1268 popl %edi // restore the caller's stack frame 1269 popl %ebx 1270 popl %esi // restore register variables 1271 popl %ebp 1272 1273 addl $(SPAN_SIZE),%esp 1274 1275 ret 1276 1277 1278//---------------------------------------------------------------------- 1279// Alias model triangle left-edge scanning code 1280//---------------------------------------------------------------------- 1281 1282#define height 4+16 1283 1284.globl C(D_PolysetScanLeftEdge) 1285C(D_PolysetScanLeftEdge): 1286 pushl %ebp // preserve caller stack frame pointer 1287 pushl %esi // preserve register variables 1288 pushl %edi 1289 pushl %ebx 1290 1291 movl height(%esp),%eax 1292 movl C(d_sfrac),%ecx 1293 andl $0xFFFF,%eax 1294 movl C(d_ptex),%ebx 1295 orl %eax,%ecx 1296 movl C(d_pedgespanpackage),%esi 1297 movl C(d_tfrac),%edx 1298 movl C(d_light),%edi 1299 movl C(d_zi),%ebp 1300 1301// %eax: scratch 1302// %ebx: d_ptex 1303// %ecx: d_sfrac in high word, count in low word 1304// %edx: d_tfrac 1305// %esi: d_pedgespanpackage, errorterm, scratch alternately 1306// %edi: d_light 1307// %ebp: d_zi 1308 1309// do 1310// { 1311 1312LScanLoop: 1313 1314// d_pedgespanpackage->ptex = ptex; 1315// d_pedgespanpackage->pdest = d_pdest; 1316// d_pedgespanpackage->pz = d_pz; 1317// d_pedgespanpackage->count = d_aspancount; 1318// d_pedgespanpackage->light = d_light; 1319// d_pedgespanpackage->zi = d_zi; 1320// d_pedgespanpackage->sfrac = d_sfrac << 16; 1321// d_pedgespanpackage->tfrac = d_tfrac << 16; 1322 movl %ebx,spanpackage_t_ptex(%esi) 1323 movl C(d_pdest),%eax 1324 movl %eax,spanpackage_t_pdest(%esi) 1325 movl C(d_pz),%eax 1326 movl %eax,spanpackage_t_pz(%esi) 1327 movl C(d_aspancount),%eax 1328 movl %eax,spanpackage_t_count(%esi) 1329 movl %edi,spanpackage_t_light(%esi) 1330 movl %ebp,spanpackage_t_zi(%esi) 1331 movl %ecx,spanpackage_t_sfrac(%esi) 1332 movl %edx,spanpackage_t_tfrac(%esi) 1333 1334// pretouch the next cache line 1335 movb spanpackage_t_size(%esi),%al 1336 1337// d_pedgespanpackage++; 1338 addl $(spanpackage_t_size),%esi 1339 movl C(erroradjustup),%eax 1340 movl %esi,C(d_pedgespanpackage) 1341 1342// errorterm += erroradjustup; 1343 movl C(errorterm),%esi 1344 addl %eax,%esi 1345 movl C(d_pdest),%eax 1346 1347// if (errorterm >= 0) 1348// { 1349 js LNoLeftEdgeTurnover 1350 1351// errorterm -= erroradjustdown; 1352// d_pdest += d_pdestextrastep; 1353 subl C(erroradjustdown),%esi 1354 addl C(d_pdestextrastep),%eax 1355 movl %esi,C(errorterm) 1356 movl %eax,C(d_pdest) 1357 1358// d_pz += d_pzextrastep; 1359// d_aspancount += d_countextrastep; 1360// d_ptex += d_ptexextrastep; 1361// d_sfrac += d_sfracextrastep; 1362// d_ptex += d_sfrac >> 16; 1363// d_sfrac &= 0xFFFF; 1364// d_tfrac += d_tfracextrastep; 1365 movl C(d_pz),%eax 1366 movl C(d_aspancount),%esi 1367 addl C(d_pzextrastep),%eax 1368 addl C(d_sfracextrastep),%ecx 1369 adcl C(d_ptexextrastep),%ebx 1370 addl C(d_countextrastep),%esi 1371 movl %eax,C(d_pz) 1372 movl C(d_tfracextrastep),%eax 1373 movl %esi,C(d_aspancount) 1374 addl %eax,%edx 1375 1376// if (d_tfrac & 0x10000) 1377// { 1378 jnc LSkip1 1379 1380// d_ptex += r_affinetridesc.skinwidth; 1381// d_tfrac &= 0xFFFF; 1382 addl C(r_affinetridesc)+atd_skinwidth,%ebx 1383 1384// } 1385 1386LSkip1: 1387 1388// d_light += d_lightextrastep; 1389// d_zi += d_ziextrastep; 1390 addl C(d_lightextrastep),%edi 1391 addl C(d_ziextrastep),%ebp 1392 1393// } 1394 movl C(d_pedgespanpackage),%esi 1395 decl %ecx 1396 testl $0xFFFF,%ecx 1397 jnz LScanLoop 1398 1399 popl %ebx 1400 popl %edi 1401 popl %esi 1402 popl %ebp 1403 ret 1404 1405// else 1406// { 1407 1408LNoLeftEdgeTurnover: 1409 movl %esi,C(errorterm) 1410 1411// d_pdest += d_pdestbasestep; 1412 addl C(d_pdestbasestep),%eax 1413 movl %eax,C(d_pdest) 1414 1415// d_pz += d_pzbasestep; 1416// d_aspancount += ubasestep; 1417// d_ptex += d_ptexbasestep; 1418// d_sfrac += d_sfracbasestep; 1419// d_ptex += d_sfrac >> 16; 1420// d_sfrac &= 0xFFFF; 1421 movl C(d_pz),%eax 1422 movl C(d_aspancount),%esi 1423 addl C(d_pzbasestep),%eax 1424 addl C(d_sfracbasestep),%ecx 1425 adcl C(d_ptexbasestep),%ebx 1426 addl C(ubasestep),%esi 1427 movl %eax,C(d_pz) 1428 movl %esi,C(d_aspancount) 1429 1430// d_tfrac += d_tfracbasestep; 1431 movl C(d_tfracbasestep),%esi 1432 addl %esi,%edx 1433 1434// if (d_tfrac & 0x10000) 1435// { 1436 jnc LSkip2 1437 1438// d_ptex += r_affinetridesc.skinwidth; 1439// d_tfrac &= 0xFFFF; 1440 addl C(r_affinetridesc)+atd_skinwidth,%ebx 1441 1442// } 1443 1444LSkip2: 1445 1446// d_light += d_lightbasestep; 1447// d_zi += d_zibasestep; 1448 addl C(d_lightbasestep),%edi 1449 addl C(d_zibasestep),%ebp 1450 1451// } 1452// } while (--height); 1453 movl C(d_pedgespanpackage),%esi 1454 decl %ecx 1455 testl $0xFFFF,%ecx 1456 jnz LScanLoop 1457 1458 popl %ebx 1459 popl %edi 1460 popl %esi 1461 popl %ebp 1462 ret 1463 1464 1465//---------------------------------------------------------------------- 1466// Alias model vertex drawing code 1467//---------------------------------------------------------------------- 1468 1469#define fv 4+8 1470#define numverts 8+8 1471 1472.globl C(D_PolysetDrawFinalVerts) 1473C(D_PolysetDrawFinalVerts): 1474 pushl %ebp // preserve caller stack frame pointer 1475 pushl %ebx 1476 1477// int i, z; 1478// short *zbuf; 1479 1480 movl numverts(%esp),%ecx 1481 movl fv(%esp),%ebx 1482 1483 pushl %esi // preserve register variables 1484 pushl %edi 1485 1486LFVLoop: 1487 1488// for (i=0 ; i<numverts ; i++, fv++) 1489// { 1490// // valid triangle coordinates for filling can include the bottom and 1491// // right clip edges, due to the fill rule; these shouldn't be drawn 1492// if ((fv->v[0] < r_refdef.vrectright) && 1493// (fv->v[1] < r_refdef.vrectbottom)) 1494// { 1495 movl fv_v+0(%ebx),%eax 1496 movl C(r_refdef)+rd_vrectright,%edx 1497 cmpl %edx,%eax 1498 jge LNextVert 1499 movl fv_v+4(%ebx),%esi 1500 movl C(r_refdef)+rd_vrectbottom,%edx 1501 cmpl %edx,%esi 1502 jge LNextVert 1503 1504// zbuf = zspantable[fv->v[1]] + fv->v[0]; 1505 movl C(zspantable)(,%esi,4),%edi 1506 1507// z = fv->v[5]>>16; 1508 movl fv_v+20(%ebx),%edx 1509 shrl $16,%edx 1510 1511// if (z >= *zbuf) 1512// { 1513// int pix; 1514 cmpw (%edi,%eax,2),%dx 1515 jl LNextVert 1516 1517// *zbuf = z; 1518 movw %dx,(%edi,%eax,2) 1519 1520// pix = skintable[fv->v[3]>>16][fv->v[2]>>16]; 1521 movl fv_v+12(%ebx),%edi 1522 shrl $16,%edi 1523 movl C(skintable)(,%edi,4),%edi 1524 movl fv_v+8(%ebx),%edx 1525 shrl $16,%edx 1526 movb (%edi,%edx),%dl 1527 1528// pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)]; 1529 movl fv_v+16(%ebx),%edi 1530 andl $0xFF00,%edi 1531 andl $0x00FF,%edx 1532 addl %edx,%edi 1533 movl C(acolormap),%edx 1534 movb (%edx,%edi,1),%dl 1535 1536// d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix; 1537 movl C(d_scantable)(,%esi,4),%edi 1538 movl C(d_viewbuffer),%esi 1539 addl %eax,%edi 1540 movb %dl,(%esi,%edi) 1541 1542// } 1543// } 1544// } 1545LNextVert: 1546 addl $(fv_size),%ebx 1547 decl %ecx 1548 jnz LFVLoop 1549 1550 popl %edi 1551 popl %esi 1552 popl %ebx 1553 popl %ebp 1554 ret 1555 1556 1557//---------------------------------------------------------------------- 1558// Alias model non-subdivided polygon dispatching code 1559// 1560// not C-callable because of stack buffer cleanup 1561//---------------------------------------------------------------------- 1562 1563.globl C(D_DrawNonSubdiv) 1564C(D_DrawNonSubdiv): 1565 pushl %ebp // preserve caller stack frame pointer 1566 movl C(r_affinetridesc)+atd_numtriangles,%ebp 1567 pushl %ebx 1568 shll $(mtri_shift),%ebp 1569 pushl %esi // preserve register variables 1570 movl C(r_affinetridesc)+atd_ptriangles,%esi 1571 pushl %edi 1572 1573// mtriangle_t *ptri; 1574// finalvert_t *pfv, *index0, *index1, *index2; 1575// int i; 1576// int lnumtriangles; 1577 1578// pfv = r_affinetridesc.pfinalverts; 1579// ptri = r_affinetridesc.ptriangles; 1580// lnumtriangles = r_affinetridesc.numtriangles; 1581 1582LNDLoop: 1583 1584// for (i=0 ; i<lnumtriangles ; i++, ptri++) 1585// { 1586// index0 = pfv + ptri->vertindex[0]; 1587// index1 = pfv + ptri->vertindex[1]; 1588// index2 = pfv + ptri->vertindex[2]; 1589 movl C(r_affinetridesc)+atd_pfinalverts,%edi 1590 movl mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx 1591 shll $(fv_shift),%ecx 1592 movl mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx 1593 shll $(fv_shift),%edx 1594 movl mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx 1595 shll $(fv_shift),%ebx 1596 addl %edi,%ecx 1597 addl %edi,%edx 1598 addl %edi,%ebx 1599 1600// d_xdenom = (index0->v[1]-index1->v[1]) * 1601// (index0->v[0]-index2->v[0]) - 1602// (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]); 1603 movl fv_v+4(%ecx),%eax 1604 movl fv_v+0(%ecx),%esi 1605 subl fv_v+4(%edx),%eax 1606 subl fv_v+0(%ebx),%esi 1607 imull %esi,%eax 1608 movl fv_v+0(%ecx),%esi 1609 movl fv_v+4(%ecx),%edi 1610 subl fv_v+0(%edx),%esi 1611 subl fv_v+4(%ebx),%edi 1612 imull %esi,%edi 1613 subl %edi,%eax 1614 1615// if (d_xdenom >= 0) 1616// { 1617// continue; 1618 jns LNextTri 1619 1620// } 1621 1622 movl %eax,C(d_xdenom) 1623 fildl C(d_xdenom) 1624 1625// r_p0[0] = index0->v[0]; // u 1626// r_p0[1] = index0->v[1]; // v 1627// r_p0[2] = index0->v[2]; // s 1628// r_p0[3] = index0->v[3]; // t 1629// r_p0[4] = index0->v[4]; // light 1630// r_p0[5] = index0->v[5]; // iz 1631 movl fv_v+0(%ecx),%eax 1632 movl fv_v+4(%ecx),%esi 1633 movl %eax,C(r_p0)+0 1634 movl %esi,C(r_p0)+4 1635 movl fv_v+8(%ecx),%eax 1636 movl fv_v+12(%ecx),%esi 1637 movl %eax,C(r_p0)+8 1638 movl %esi,C(r_p0)+12 1639 movl fv_v+16(%ecx),%eax 1640 movl fv_v+20(%ecx),%esi 1641 movl %eax,C(r_p0)+16 1642 movl %esi,C(r_p0)+20 1643 1644 fdivrs float_1 1645 1646// r_p1[0] = index1->v[0]; 1647// r_p1[1] = index1->v[1]; 1648// r_p1[2] = index1->v[2]; 1649// r_p1[3] = index1->v[3]; 1650// r_p1[4] = index1->v[4]; 1651// r_p1[5] = index1->v[5]; 1652 movl fv_v+0(%edx),%eax 1653 movl fv_v+4(%edx),%esi 1654 movl %eax,C(r_p1)+0 1655 movl %esi,C(r_p1)+4 1656 movl fv_v+8(%edx),%eax 1657 movl fv_v+12(%edx),%esi 1658 movl %eax,C(r_p1)+8 1659 movl %esi,C(r_p1)+12 1660 movl fv_v+16(%edx),%eax 1661 movl fv_v+20(%edx),%esi 1662 movl %eax,C(r_p1)+16 1663 movl %esi,C(r_p1)+20 1664 1665// r_p2[0] = index2->v[0]; 1666// r_p2[1] = index2->v[1]; 1667// r_p2[2] = index2->v[2]; 1668// r_p2[3] = index2->v[3]; 1669// r_p2[4] = index2->v[4]; 1670// r_p2[5] = index2->v[5]; 1671 movl fv_v+0(%ebx),%eax 1672 movl fv_v+4(%ebx),%esi 1673 movl %eax,C(r_p2)+0 1674 movl %esi,C(r_p2)+4 1675 movl fv_v+8(%ebx),%eax 1676 movl fv_v+12(%ebx),%esi 1677 movl %eax,C(r_p2)+8 1678 movl %esi,C(r_p2)+12 1679 movl fv_v+16(%ebx),%eax 1680 movl fv_v+20(%ebx),%esi 1681 movl %eax,C(r_p2)+16 1682 movl C(r_affinetridesc)+atd_ptriangles,%edi 1683 movl %esi,C(r_p2)+20 1684 movl mtri_facesfront-mtri_size(%edi,%ebp,1),%eax 1685 1686// if (!ptri->facesfront) 1687// { 1688 testl %eax,%eax 1689 jnz LFacesFront 1690 1691// if (index0->flags & ALIAS_ONSEAM) 1692// r_p0[2] += r_affinetridesc.seamfixupX16; 1693 movl fv_flags(%ecx),%eax 1694 movl fv_flags(%edx),%esi 1695 movl fv_flags(%ebx),%edi 1696 testl $(ALIAS_ONSEAM),%eax 1697 movl C(r_affinetridesc)+atd_seamfixupX16,%eax 1698 jz LOnseamDone0 1699 addl %eax,C(r_p0)+8 1700LOnseamDone0: 1701 1702// if (index1->flags & ALIAS_ONSEAM) 1703// r_p1[2] += r_affinetridesc.seamfixupX16; 1704 testl $(ALIAS_ONSEAM),%esi 1705 jz LOnseamDone1 1706 addl %eax,C(r_p1)+8 1707LOnseamDone1: 1708 1709// if (index2->flags & ALIAS_ONSEAM) 1710// r_p2[2] += r_affinetridesc.seamfixupX16; 1711 testl $(ALIAS_ONSEAM),%edi 1712 jz LOnseamDone2 1713 addl %eax,C(r_p2)+8 1714LOnseamDone2: 1715 1716// } 1717 1718LFacesFront: 1719 1720 fstps C(d_xdenom) 1721 1722// D_PolysetSetEdgeTable (); 1723// D_RasterizeAliasPolySmooth (); 1724 call C(D_PolysetSetEdgeTable) 1725 call C(D_RasterizeAliasPolySmooth) 1726 1727LNextTri: 1728 movl C(r_affinetridesc)+atd_ptriangles,%esi 1729 subl $16,%ebp 1730 jnz LNDLoop 1731// } 1732 1733 popl %edi 1734 popl %esi 1735 popl %ebx 1736 popl %ebp 1737 1738 addl $(SPAN_SIZE),%esp 1739 1740 ret 1741 1742 1743#endif // id386 1744 1745