1/* 2Copyright (C) 1996-1997 Id Software, Inc. 3 4This program is free software; you can redistribute it and/or 5modify it under the terms of the GNU General Public License 6as published by the Free Software Foundation; either version 2 7of the License, or (at your option) any later version. 8 9This program is distributed in the hope that it will be useful, 10but WITHOUT ANY WARRANTY; without even the implied warranty of 11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13See the GNU General Public License for more details. 14 15You should have received a copy of the GNU General Public License 16along with this program; if not, write to the Free Software 17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 19*/ 20// 21// math.s 22// x86 assembly-language math routines. 23 24#define GLQUAKE 1 // don't include unneeded defs 25#include "asm_i386.h" 26#include "quakeasm.h" 27 28 29#if id386 30 31 .data 32 33 .align 4 34Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3 35 .long Lcase4, Lcase5, Lcase6, Lcase7 36 37 .text 38 39// TODO: rounding needed? 40// stack parameter offset 41#define val 4 42 43.globl C(Invert24To16) 44C(Invert24To16): 45 46 movl val(%esp),%ecx 47 movl $0x100,%edx // 0x10000000000 as dividend 48 cmpl %edx,%ecx 49 jle LOutOfRange 50 51 subl %eax,%eax 52 divl %ecx 53 54 ret 55 56LOutOfRange: 57 movl $0xFFFFFFFF,%eax 58 ret 59 60#define in 4 61#define out 8 62 63 .align 2 64.globl C(TransformVector) 65C(TransformVector): 66 movl in(%esp),%eax 67 movl out(%esp),%edx 68 69 flds (%eax) // in[0] 70 fmuls C(vright) // in[0]*vright[0] 71 flds (%eax) // in[0] | in[0]*vright[0] 72 fmuls C(vup) // in[0]*vup[0] | in[0]*vright[0] 73 flds (%eax) // in[0] | in[0]*vup[0] | in[0]*vright[0] 74 fmuls C(vpn) // in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0] 75 76 flds 4(%eax) // in[1] | ... 77 fmuls C(vright)+4 // in[1]*vright[1] | ... 78 flds 4(%eax) // in[1] | in[1]*vright[1] | ... 79 fmuls C(vup)+4 // in[1]*vup[1] | in[1]*vright[1] | ... 80 flds 4(%eax) // in[1] | in[1]*vup[1] | in[1]*vright[1] | ... 81 fmuls C(vpn)+4 // in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ... 82 fxch %st(2) // in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ... 83 84 faddp %st(0),%st(5) // in[1]*vup[1] | in[1]*vpn[1] | ... 85 faddp %st(0),%st(3) // in[1]*vpn[1] | ... 86 faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum 87 88 flds 8(%eax) // in[2] | ... 89 fmuls C(vright)+8 // in[2]*vright[2] | ... 90 flds 8(%eax) // in[2] | in[2]*vright[2] | ... 91 fmuls C(vup)+8 // in[2]*vup[2] | in[2]*vright[2] | ... 92 flds 8(%eax) // in[2] | in[2]*vup[2] | in[2]*vright[2] | ... 93 fmuls C(vpn)+8 // in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ... 94 fxch %st(2) // in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ... 95 96 faddp %st(0),%st(5) // in[2]*vup[2] | in[2]*vpn[2] | ... 97 faddp %st(0),%st(3) // in[2]*vpn[2] | ... 98 faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum 99 100 fstps 8(%edx) // out[2] 101 fstps 4(%edx) // out[1] 102 fstps (%edx) // out[0] 103 104 ret 105 106 107#define EMINS 4+4 108#define EMAXS 4+8 109#define P 4+12 110 111 .align 2 112.globl C(BoxOnPlaneSide) 113C(BoxOnPlaneSide): 114 pushl %ebx 115 116 movl P(%esp),%edx 117 movl EMINS(%esp),%ecx 118 xorl %eax,%eax 119 movl EMAXS(%esp),%ebx 120 movb pl_signbits(%edx),%al 121 cmpb $8,%al 122 jge Lerror 123 flds pl_normal(%edx) // p->normal[0] 124 fld %st(0) // p->normal[0] | p->normal[0] 125 jmp Ljmptab(,%eax,4) 126 127 128//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; 129//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; 130Lcase0: 131 fmuls (%ebx) // p->normal[0]*emaxs[0] | p->normal[0] 132 flds pl_normal+4(%edx) // p->normal[1] | p->normal[0]*emaxs[0] | 133 // p->normal[0] 134 fxch %st(2) // p->normal[0] | p->normal[0]*emaxs[0] | 135 // p->normal[1] 136 fmuls (%ecx) // p->normal[0]*emins[0] | 137 // p->normal[0]*emaxs[0] | p->normal[1] 138 fxch %st(2) // p->normal[1] | p->normal[0]*emaxs[0] | 139 // p->normal[0]*emins[0] 140 fld %st(0) // p->normal[1] | p->normal[1] | 141 // p->normal[0]*emaxs[0] | 142 // p->normal[0]*emins[0] 143 fmuls 4(%ebx) // p->normal[1]*emaxs[1] | p->normal[1] | 144 // p->normal[0]*emaxs[0] | 145 // p->normal[0]*emins[0] 146 flds pl_normal+8(%edx) // p->normal[2] | p->normal[1]*emaxs[1] | 147 // p->normal[1] | p->normal[0]*emaxs[0] | 148 // p->normal[0]*emins[0] 149 fxch %st(2) // p->normal[1] | p->normal[1]*emaxs[1] | 150 // p->normal[2] | p->normal[0]*emaxs[0] | 151 // p->normal[0]*emins[0] 152 fmuls 4(%ecx) // p->normal[1]*emins[1] | 153 // p->normal[1]*emaxs[1] | 154 // p->normal[2] | p->normal[0]*emaxs[0] | 155 // p->normal[0]*emins[0] 156 fxch %st(2) // p->normal[2] | p->normal[1]*emaxs[1] | 157 // p->normal[1]*emins[1] | 158 // p->normal[0]*emaxs[0] | 159 // p->normal[0]*emins[0] 160 fld %st(0) // p->normal[2] | p->normal[2] | 161 // p->normal[1]*emaxs[1] | 162 // p->normal[1]*emins[1] | 163 // p->normal[0]*emaxs[0] | 164 // p->normal[0]*emins[0] 165 fmuls 8(%ebx) // p->normal[2]*emaxs[2] | 166 // p->normal[2] | 167 // p->normal[1]*emaxs[1] | 168 // p->normal[1]*emins[1] | 169 // p->normal[0]*emaxs[0] | 170 // p->normal[0]*emins[0] 171 fxch %st(5) // p->normal[0]*emins[0] | 172 // p->normal[2] | 173 // p->normal[1]*emaxs[1] | 174 // p->normal[1]*emins[1] | 175 // p->normal[0]*emaxs[0] | 176 // p->normal[2]*emaxs[2] 177 faddp %st(0),%st(3) //p->normal[2] | 178 // p->normal[1]*emaxs[1] | 179 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 180 // p->normal[0]*emaxs[0] | 181 // p->normal[2]*emaxs[2] 182 fmuls 8(%ecx) //p->normal[2]*emins[2] | 183 // p->normal[1]*emaxs[1] | 184 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 185 // p->normal[0]*emaxs[0] | 186 // p->normal[2]*emaxs[2] 187 fxch %st(1) //p->normal[1]*emaxs[1] | 188 // p->normal[2]*emins[2] | 189 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 190 // p->normal[0]*emaxs[0] | 191 // p->normal[2]*emaxs[2] 192 faddp %st(0),%st(3) //p->normal[2]*emins[2] | 193 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 194 // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]| 195 // p->normal[2]*emaxs[2] 196 fxch %st(3) //p->normal[2]*emaxs[2] + 197 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 198 // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]| 199 // p->normal[2]*emins[2] 200 faddp %st(0),%st(2) //p->normal[1]*emins[1]+p->normal[0]*emins[0]| 201 // dist1 | p->normal[2]*emins[2] 202 203 jmp LSetSides 204 205//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; 206//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; 207Lcase1: 208 fmuls (%ecx) // emins[0] 209 flds pl_normal+4(%edx) 210 fxch %st(2) 211 fmuls (%ebx) // emaxs[0] 212 fxch %st(2) 213 fld %st(0) 214 fmuls 4(%ebx) // emaxs[1] 215 flds pl_normal+8(%edx) 216 fxch %st(2) 217 fmuls 4(%ecx) // emins[1] 218 fxch %st(2) 219 fld %st(0) 220 fmuls 8(%ebx) // emaxs[2] 221 fxch %st(5) 222 faddp %st(0),%st(3) 223 fmuls 8(%ecx) // emins[2] 224 fxch %st(1) 225 faddp %st(0),%st(3) 226 fxch %st(3) 227 faddp %st(0),%st(2) 228 229 jmp LSetSides 230 231//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; 232//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; 233Lcase2: 234 fmuls (%ebx) // emaxs[0] 235 flds pl_normal+4(%edx) 236 fxch %st(2) 237 fmuls (%ecx) // emins[0] 238 fxch %st(2) 239 fld %st(0) 240 fmuls 4(%ecx) // emins[1] 241 flds pl_normal+8(%edx) 242 fxch %st(2) 243 fmuls 4(%ebx) // emaxs[1] 244 fxch %st(2) 245 fld %st(0) 246 fmuls 8(%ebx) // emaxs[2] 247 fxch %st(5) 248 faddp %st(0),%st(3) 249 fmuls 8(%ecx) // emins[2] 250 fxch %st(1) 251 faddp %st(0),%st(3) 252 fxch %st(3) 253 faddp %st(0),%st(2) 254 255 jmp LSetSides 256 257//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; 258//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; 259Lcase3: 260 fmuls (%ecx) // emins[0] 261 flds pl_normal+4(%edx) 262 fxch %st(2) 263 fmuls (%ebx) // emaxs[0] 264 fxch %st(2) 265 fld %st(0) 266 fmuls 4(%ecx) // emins[1] 267 flds pl_normal+8(%edx) 268 fxch %st(2) 269 fmuls 4(%ebx) // emaxs[1] 270 fxch %st(2) 271 fld %st(0) 272 fmuls 8(%ebx) // emaxs[2] 273 fxch %st(5) 274 faddp %st(0),%st(3) 275 fmuls 8(%ecx) // emins[2] 276 fxch %st(1) 277 faddp %st(0),%st(3) 278 fxch %st(3) 279 faddp %st(0),%st(2) 280 281 jmp LSetSides 282 283//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; 284//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; 285Lcase4: 286 fmuls (%ebx) // emaxs[0] 287 flds pl_normal+4(%edx) 288 fxch %st(2) 289 fmuls (%ecx) // emins[0] 290 fxch %st(2) 291 fld %st(0) 292 fmuls 4(%ebx) // emaxs[1] 293 flds pl_normal+8(%edx) 294 fxch %st(2) 295 fmuls 4(%ecx) // emins[1] 296 fxch %st(2) 297 fld %st(0) 298 fmuls 8(%ecx) // emins[2] 299 fxch %st(5) 300 faddp %st(0),%st(3) 301 fmuls 8(%ebx) // emaxs[2] 302 fxch %st(1) 303 faddp %st(0),%st(3) 304 fxch %st(3) 305 faddp %st(0),%st(2) 306 307 jmp LSetSides 308 309//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; 310//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; 311Lcase5: 312 fmuls (%ecx) // emins[0] 313 flds pl_normal+4(%edx) 314 fxch %st(2) 315 fmuls (%ebx) // emaxs[0] 316 fxch %st(2) 317 fld %st(0) 318 fmuls 4(%ebx) // emaxs[1] 319 flds pl_normal+8(%edx) 320 fxch %st(2) 321 fmuls 4(%ecx) // emins[1] 322 fxch %st(2) 323 fld %st(0) 324 fmuls 8(%ecx) // emins[2] 325 fxch %st(5) 326 faddp %st(0),%st(3) 327 fmuls 8(%ebx) // emaxs[2] 328 fxch %st(1) 329 faddp %st(0),%st(3) 330 fxch %st(3) 331 faddp %st(0),%st(2) 332 333 jmp LSetSides 334 335//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; 336//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; 337Lcase6: 338 fmuls (%ebx) // emaxs[0] 339 flds pl_normal+4(%edx) 340 fxch %st(2) 341 fmuls (%ecx) // emins[0] 342 fxch %st(2) 343 fld %st(0) 344 fmuls 4(%ecx) // emins[1] 345 flds pl_normal+8(%edx) 346 fxch %st(2) 347 fmuls 4(%ebx) // emaxs[1] 348 fxch %st(2) 349 fld %st(0) 350 fmuls 8(%ecx) // emins[2] 351 fxch %st(5) 352 faddp %st(0),%st(3) 353 fmuls 8(%ebx) // emaxs[2] 354 fxch %st(1) 355 faddp %st(0),%st(3) 356 fxch %st(3) 357 faddp %st(0),%st(2) 358 359 jmp LSetSides 360 361//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; 362//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; 363Lcase7: 364 fmuls (%ecx) // emins[0] 365 flds pl_normal+4(%edx) 366 fxch %st(2) 367 fmuls (%ebx) // emaxs[0] 368 fxch %st(2) 369 fld %st(0) 370 fmuls 4(%ecx) // emins[1] 371 flds pl_normal+8(%edx) 372 fxch %st(2) 373 fmuls 4(%ebx) // emaxs[1] 374 fxch %st(2) 375 fld %st(0) 376 fmuls 8(%ecx) // emins[2] 377 fxch %st(5) 378 faddp %st(0),%st(3) 379 fmuls 8(%ebx) // emaxs[2] 380 fxch %st(1) 381 faddp %st(0),%st(3) 382 fxch %st(3) 383 faddp %st(0),%st(2) 384 385LSetSides: 386 387// sides = 0; 388// if (dist1 >= p->dist) 389// sides = 1; 390// if (dist2 < p->dist) 391// sides |= 2; 392 393 faddp %st(0),%st(2) // dist1 | dist2 394 fcomps pl_dist(%edx) 395 xorl %ecx,%ecx 396 fnstsw %ax 397 fcomps pl_dist(%edx) 398 andb $1,%ah 399 xorb $1,%ah 400 addb %ah,%cl 401 402 fnstsw %ax 403 andb $1,%ah 404 addb %ah,%ah 405 addb %ah,%cl 406 407// return sides; 408 409 popl %ebx 410 movl %ecx,%eax // return status 411 412 ret 413 414 415Lerror: 416 call C(BOPS_Error) 417 418#endif // id386 419