1/* 2Copyright (C) 1996-1997 Id Software, Inc. 3 4This program is free software; you can redistribute it and/or 5modify it under the terms of the GNU General Public License 6as published by the Free Software Foundation; either version 2 7of the License, or (at your option) any later version. 8 9This program is distributed in the hope that it will be useful, 10but WITHOUT ANY WARRANTY; without even the implied warranty of 11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13See the GNU General Public License for more details. 14 15You should have received a copy of the GNU General Public License 16along with this program; if not, write to the Free Software 17Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 19*/ 20// 21// math.s 22// x86 assembly-language math routines. 23 24#include "asm_i386.h" 25#include "quakeasm.h" 26 27 28#if id386 29 30 .data 31 32 .align 4 33Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3 34 .long Lcase4, Lcase5, Lcase6, Lcase7 35 36 .text 37 38// TODO: rounding needed? 39// stack parameter offset 40#define val 4 41 42.globl C(Invert24To16) 43C(Invert24To16): 44 45 movl val(%esp),%ecx 46 movl $0x100,%edx // 0x10000000000 as dividend 47 cmpl %edx,%ecx 48 jle LOutOfRange 49 50 subl %eax,%eax 51 divl %ecx 52 53 ret 54 55LOutOfRange: 56 movl $0xFFFFFFFF,%eax 57 ret 58 59#define in 4 60#define out 8 61 62 .align 2 63.globl C(TransformVector) 64C(TransformVector): 65 movl in(%esp),%eax 66 movl out(%esp),%edx 67 68 flds (%eax) // in[0] 69 fmuls C(vright) // in[0]*vright[0] 70 flds (%eax) // in[0] | in[0]*vright[0] 71 fmuls C(vup) // in[0]*vup[0] | in[0]*vright[0] 72 flds (%eax) // in[0] | in[0]*vup[0] | in[0]*vright[0] 73 fmuls C(vpn) // in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0] 74 75 flds 4(%eax) // in[1] | ... 76 fmuls C(vright)+4 // in[1]*vright[1] | ... 77 flds 4(%eax) // in[1] | in[1]*vright[1] | ... 78 fmuls C(vup)+4 // in[1]*vup[1] | in[1]*vright[1] | ... 79 flds 4(%eax) // in[1] | in[1]*vup[1] | in[1]*vright[1] | ... 80 fmuls C(vpn)+4 // in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ... 81 fxch %st(2) // in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ... 82 83 faddp %st(0),%st(5) // in[1]*vup[1] | in[1]*vpn[1] | ... 84 faddp %st(0),%st(3) // in[1]*vpn[1] | ... 85 faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum 86 87 flds 8(%eax) // in[2] | ... 88 fmuls C(vright)+8 // in[2]*vright[2] | ... 89 flds 8(%eax) // in[2] | in[2]*vright[2] | ... 90 fmuls C(vup)+8 // in[2]*vup[2] | in[2]*vright[2] | ... 91 flds 8(%eax) // in[2] | in[2]*vup[2] | in[2]*vright[2] | ... 92 fmuls C(vpn)+8 // in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ... 93 fxch %st(2) // in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ... 94 95 faddp %st(0),%st(5) // in[2]*vup[2] | in[2]*vpn[2] | ... 96 faddp %st(0),%st(3) // in[2]*vpn[2] | ... 97 faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum 98 99 fstps 8(%edx) // out[2] 100 fstps 4(%edx) // out[1] 101 fstps (%edx) // out[0] 102 103 ret 104 105 106#define EMINS 4+4 107#define EMAXS 4+8 108#define P 4+12 109 110 .align 2 111.globl C(BoxOnPlaneSide) 112C(BoxOnPlaneSide): 113 pushl %ebx 114 115 movl P(%esp),%edx 116 movl EMINS(%esp),%ecx 117 xorl %eax,%eax 118 movl EMAXS(%esp),%ebx 119 movb pl_signbits(%edx),%al 120 cmpb $8,%al 121 jge Lerror 122 flds pl_normal(%edx) // p->normal[0] 123 fld %st(0) // p->normal[0] | p->normal[0] 124 jmp Ljmptab(,%eax,4) 125 126 127//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; 128//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; 129Lcase0: 130 fmuls (%ebx) // p->normal[0]*emaxs[0] | p->normal[0] 131 flds pl_normal+4(%edx) // p->normal[1] | p->normal[0]*emaxs[0] | 132 // p->normal[0] 133 fxch %st(2) // p->normal[0] | p->normal[0]*emaxs[0] | 134 // p->normal[1] 135 fmuls (%ecx) // p->normal[0]*emins[0] | 136 // p->normal[0]*emaxs[0] | p->normal[1] 137 fxch %st(2) // p->normal[1] | p->normal[0]*emaxs[0] | 138 // p->normal[0]*emins[0] 139 fld %st(0) // p->normal[1] | p->normal[1] | 140 // p->normal[0]*emaxs[0] | 141 // p->normal[0]*emins[0] 142 fmuls 4(%ebx) // p->normal[1]*emaxs[1] | p->normal[1] | 143 // p->normal[0]*emaxs[0] | 144 // p->normal[0]*emins[0] 145 flds pl_normal+8(%edx) // p->normal[2] | p->normal[1]*emaxs[1] | 146 // p->normal[1] | p->normal[0]*emaxs[0] | 147 // p->normal[0]*emins[0] 148 fxch %st(2) // p->normal[1] | p->normal[1]*emaxs[1] | 149 // p->normal[2] | p->normal[0]*emaxs[0] | 150 // p->normal[0]*emins[0] 151 fmuls 4(%ecx) // p->normal[1]*emins[1] | 152 // p->normal[1]*emaxs[1] | 153 // p->normal[2] | p->normal[0]*emaxs[0] | 154 // p->normal[0]*emins[0] 155 fxch %st(2) // p->normal[2] | p->normal[1]*emaxs[1] | 156 // p->normal[1]*emins[1] | 157 // p->normal[0]*emaxs[0] | 158 // p->normal[0]*emins[0] 159 fld %st(0) // p->normal[2] | p->normal[2] | 160 // p->normal[1]*emaxs[1] | 161 // p->normal[1]*emins[1] | 162 // p->normal[0]*emaxs[0] | 163 // p->normal[0]*emins[0] 164 fmuls 8(%ebx) // p->normal[2]*emaxs[2] | 165 // p->normal[2] | 166 // p->normal[1]*emaxs[1] | 167 // p->normal[1]*emins[1] | 168 // p->normal[0]*emaxs[0] | 169 // p->normal[0]*emins[0] 170 fxch %st(5) // p->normal[0]*emins[0] | 171 // p->normal[2] | 172 // p->normal[1]*emaxs[1] | 173 // p->normal[1]*emins[1] | 174 // p->normal[0]*emaxs[0] | 175 // p->normal[2]*emaxs[2] 176 faddp %st(0),%st(3) //p->normal[2] | 177 // p->normal[1]*emaxs[1] | 178 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 179 // p->normal[0]*emaxs[0] | 180 // p->normal[2]*emaxs[2] 181 fmuls 8(%ecx) //p->normal[2]*emins[2] | 182 // p->normal[1]*emaxs[1] | 183 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 184 // p->normal[0]*emaxs[0] | 185 // p->normal[2]*emaxs[2] 186 fxch %st(1) //p->normal[1]*emaxs[1] | 187 // p->normal[2]*emins[2] | 188 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 189 // p->normal[0]*emaxs[0] | 190 // p->normal[2]*emaxs[2] 191 faddp %st(0),%st(3) //p->normal[2]*emins[2] | 192 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 193 // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]| 194 // p->normal[2]*emaxs[2] 195 fxch %st(3) //p->normal[2]*emaxs[2] + 196 // p->normal[1]*emins[1]+p->normal[0]*emins[0]| 197 // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]| 198 // p->normal[2]*emins[2] 199 faddp %st(0),%st(2) //p->normal[1]*emins[1]+p->normal[0]*emins[0]| 200 // dist1 | p->normal[2]*emins[2] 201 202 jmp LSetSides 203 204//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; 205//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; 206Lcase1: 207 fmuls (%ecx) // emins[0] 208 flds pl_normal+4(%edx) 209 fxch %st(2) 210 fmuls (%ebx) // emaxs[0] 211 fxch %st(2) 212 fld %st(0) 213 fmuls 4(%ebx) // emaxs[1] 214 flds pl_normal+8(%edx) 215 fxch %st(2) 216 fmuls 4(%ecx) // emins[1] 217 fxch %st(2) 218 fld %st(0) 219 fmuls 8(%ebx) // emaxs[2] 220 fxch %st(5) 221 faddp %st(0),%st(3) 222 fmuls 8(%ecx) // emins[2] 223 fxch %st(1) 224 faddp %st(0),%st(3) 225 fxch %st(3) 226 faddp %st(0),%st(2) 227 228 jmp LSetSides 229 230//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; 231//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; 232Lcase2: 233 fmuls (%ebx) // emaxs[0] 234 flds pl_normal+4(%edx) 235 fxch %st(2) 236 fmuls (%ecx) // emins[0] 237 fxch %st(2) 238 fld %st(0) 239 fmuls 4(%ecx) // emins[1] 240 flds pl_normal+8(%edx) 241 fxch %st(2) 242 fmuls 4(%ebx) // emaxs[1] 243 fxch %st(2) 244 fld %st(0) 245 fmuls 8(%ebx) // emaxs[2] 246 fxch %st(5) 247 faddp %st(0),%st(3) 248 fmuls 8(%ecx) // emins[2] 249 fxch %st(1) 250 faddp %st(0),%st(3) 251 fxch %st(3) 252 faddp %st(0),%st(2) 253 254 jmp LSetSides 255 256//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; 257//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; 258Lcase3: 259 fmuls (%ecx) // emins[0] 260 flds pl_normal+4(%edx) 261 fxch %st(2) 262 fmuls (%ebx) // emaxs[0] 263 fxch %st(2) 264 fld %st(0) 265 fmuls 4(%ecx) // emins[1] 266 flds pl_normal+8(%edx) 267 fxch %st(2) 268 fmuls 4(%ebx) // emaxs[1] 269 fxch %st(2) 270 fld %st(0) 271 fmuls 8(%ebx) // emaxs[2] 272 fxch %st(5) 273 faddp %st(0),%st(3) 274 fmuls 8(%ecx) // emins[2] 275 fxch %st(1) 276 faddp %st(0),%st(3) 277 fxch %st(3) 278 faddp %st(0),%st(2) 279 280 jmp LSetSides 281 282//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; 283//dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; 284Lcase4: 285 fmuls (%ebx) // emaxs[0] 286 flds pl_normal+4(%edx) 287 fxch %st(2) 288 fmuls (%ecx) // emins[0] 289 fxch %st(2) 290 fld %st(0) 291 fmuls 4(%ebx) // emaxs[1] 292 flds pl_normal+8(%edx) 293 fxch %st(2) 294 fmuls 4(%ecx) // emins[1] 295 fxch %st(2) 296 fld %st(0) 297 fmuls 8(%ecx) // emins[2] 298 fxch %st(5) 299 faddp %st(0),%st(3) 300 fmuls 8(%ebx) // emaxs[2] 301 fxch %st(1) 302 faddp %st(0),%st(3) 303 fxch %st(3) 304 faddp %st(0),%st(2) 305 306 jmp LSetSides 307 308//dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2]; 309//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2]; 310Lcase5: 311 fmuls (%ecx) // emins[0] 312 flds pl_normal+4(%edx) 313 fxch %st(2) 314 fmuls (%ebx) // emaxs[0] 315 fxch %st(2) 316 fld %st(0) 317 fmuls 4(%ebx) // emaxs[1] 318 flds pl_normal+8(%edx) 319 fxch %st(2) 320 fmuls 4(%ecx) // emins[1] 321 fxch %st(2) 322 fld %st(0) 323 fmuls 8(%ecx) // emins[2] 324 fxch %st(5) 325 faddp %st(0),%st(3) 326 fmuls 8(%ebx) // emaxs[2] 327 fxch %st(1) 328 faddp %st(0),%st(3) 329 fxch %st(3) 330 faddp %st(0),%st(2) 331 332 jmp LSetSides 333 334//dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; 335//dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; 336Lcase6: 337 fmuls (%ebx) // emaxs[0] 338 flds pl_normal+4(%edx) 339 fxch %st(2) 340 fmuls (%ecx) // emins[0] 341 fxch %st(2) 342 fld %st(0) 343 fmuls 4(%ecx) // emins[1] 344 flds pl_normal+8(%edx) 345 fxch %st(2) 346 fmuls 4(%ebx) // emaxs[1] 347 fxch %st(2) 348 fld %st(0) 349 fmuls 8(%ecx) // emins[2] 350 fxch %st(5) 351 faddp %st(0),%st(3) 352 fmuls 8(%ebx) // emaxs[2] 353 fxch %st(1) 354 faddp %st(0),%st(3) 355 fxch %st(3) 356 faddp %st(0),%st(2) 357 358 jmp LSetSides 359 360//dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2]; 361//dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2]; 362Lcase7: 363 fmuls (%ecx) // emins[0] 364 flds pl_normal+4(%edx) 365 fxch %st(2) 366 fmuls (%ebx) // emaxs[0] 367 fxch %st(2) 368 fld %st(0) 369 fmuls 4(%ecx) // emins[1] 370 flds pl_normal+8(%edx) 371 fxch %st(2) 372 fmuls 4(%ebx) // emaxs[1] 373 fxch %st(2) 374 fld %st(0) 375 fmuls 8(%ecx) // emins[2] 376 fxch %st(5) 377 faddp %st(0),%st(3) 378 fmuls 8(%ebx) // emaxs[2] 379 fxch %st(1) 380 faddp %st(0),%st(3) 381 fxch %st(3) 382 faddp %st(0),%st(2) 383 384LSetSides: 385 386// sides = 0; 387// if (dist1 >= p->dist) 388// sides = 1; 389// if (dist2 < p->dist) 390// sides |= 2; 391 392 faddp %st(0),%st(2) // dist1 | dist2 393 fcomps pl_dist(%edx) 394 xorl %ecx,%ecx 395 fnstsw %ax 396 fcomps pl_dist(%edx) 397 andb $1,%ah 398 xorb $1,%ah 399 addb %ah,%cl 400 401 fnstsw %ax 402 andb $1,%ah 403 addb %ah,%ah 404 addb %ah,%cl 405 406// return sides; 407 408 popl %ebx 409 movl %ecx,%eax // return status 410 411 ret 412 413 414Lerror: 415 call C(BOPS_Error) 416 417#endif // id386 418