3dnow_normal.S revision 946ad2720a00696a59a253d81110ea96d397a463
1/* $Id: 3dnow_normal.S,v 1.3 2002/08/08 15:36:50 brianp Exp $ */ 2 3/* 4 * Mesa 3-D graphics library 5 * Version: 4.1 6 * 7 * Copyright (C) 1999-2002 Brian Paul All Rights Reserved. 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a 10 * copy of this software and associated documentation files (the "Software"), 11 * to deal in the Software without restriction, including without limitation 12 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 * and/or sell copies of the Software, and to permit persons to whom the 14 * Software is furnished to do so, subject to the following conditions: 15 * 16 * The above copyright notice and this permission notice shall be included 17 * in all copies or substantial portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27/* 28 * 3Dnow assembly code by Holger Waechtler 29 */ 30 31#include "matypes.h" 32#include "norm_args.h" 33 34 SEG_TEXT 35 36#define M(i) REGOFF(i * 4, ECX) 37#define STRIDE REGOFF(12, ESI) 38 39 40ALIGNTEXT16 41GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals) 42GLNAME(_mesa_3dnow_transform_normalize_normals): 43 44 #define FRAME_OFFSET 12 45 46 PUSH_L ( EDI ) 47 PUSH_L ( ESI ) 48 PUSH_L ( EBP ) 49 50 MOV_L ( ARG_LENGTHS, EDI ) 51 MOV_L ( ARG_IN, ESI ) 52 MOV_L ( ARG_DEST, EAX ) 53 MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */ 54 MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) ) 55 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ 56 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ 57 MOV_L ( ARG_MAT, ECX ) 58 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ 59 60 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ 61 JE ( LLBL (G3TN_end) ) 62 63 MOV_L ( REGOFF (V3F_COUNT, ESI), EBP ) 64 FEMMS 65 66 PUSH_L ( EBP ) 67 PUSH_L ( EAX ) 68 PUSH_L ( EDX ) /* save counter & pointer for */ 69 /* the normalize pass */ 70 #undef FRAME_OFFSET 71 #define FRAME_OFFSET 24 72 73 MOVQ ( M(0), MM3 ) /* m1 | m0 */ 74 MOVQ ( M(4), MM4 ) /* m5 | m4 */ 75 76 MOVD ( M(2), MM5 ) /* | m2 */ 77 PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */ 78 79 MOVQ ( M(8), MM6 ) /* m9 | m8 */ 80 MOVQ ( M(10), MM7 ) /* | m10 */ 81 82 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ 83 JNE ( LLBL (G3TN_scale_end ) ) 84 85 MOVD ( ARG_SCALE, MM0 ) /* | scale */ 86 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */ 87 88 PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */ 89 PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */ 90 PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */ 91 PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */ 92 PFMUL ( MM0, MM7 ) /* | scale * m10 */ 93 94ALIGNTEXT32 95LLBL (G3TN_scale_end): 96LLBL (G3TN_transform): 97 MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */ 98 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */ 99 100 MOVQ ( MM0, MM1 ) /* x1 | x0 */ 101 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ 102 103 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ 104 ADD_L ( CONST(16), EAX ) /* next r */ 105 106 PREFETCHW ( REGIND(EAX) ) 107 108 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ 109 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ 110 111 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ 112 PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/ 113 114 MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */ 115 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ 116 117 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ 118 MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */ 119 120 PFMUL ( MM7, MM2 ) /* | x2*m10 */ 121 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ 122 123 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/ 124 ADD_L ( STRIDE, EDX ) /* next normal */ 125 126 PREFETCH ( REGIND(EDX) ) 127 128 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ 129 DEC_L ( EBP ) /* decrement normal counter */ 130 JA ( LLBL (G3TN_transform) ) 131 132 133 POP_L ( EDX ) /* end of transform --- */ 134 POP_L ( EAX ) /* now normalizing ... */ 135 POP_L ( EBP ) 136 137 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ 138 JE ( LLBL (G3TN_norm ) ) /* calculate lengths */ 139 140 141ALIGNTEXT32 142LLBL (G3TN_norm_w_lengths): 143 144 PREFETCHW ( REGOFF(12,EAX) ) 145 146 MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ 147 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ 148 149 MOVD ( REGIND (EDI), MM3 ) /* | length (x) */ 150 PFMUL ( MM3, MM1 ) /* | x2 (normalize*/ 151 152 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ 153 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/ 154 155 ADD_L ( STRIDE, EDX ) /* next normal */ 156 ADD_L ( CONST(4), EDI ) /* next length */ 157 158 PREFETCH ( REGIND(EDI) ) 159 160 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */ 161 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ 162 163 ADD_L ( CONST(16), EAX ) /* next r */ 164 DEC_L ( EBP ) /* decrement normal counter */ 165 166 JA ( LLBL (G3TN_norm_w_lengths) ) 167 JMP ( LLBL (G3TN_exit_3dnow) ) 168 169ALIGNTEXT32 170LLBL (G3TN_norm): 171 172 PREFETCHW ( REGIND(EAX) ) 173 174 MOVQ ( MM0, MM3 ) /* x1 | x0 */ 175 MOVQ ( MM1, MM4 ) /* | x2 */ 176 177 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */ 178 ADD_L ( CONST(16), EAX ) /* next r */ 179 180 PFMUL ( MM1, MM4 ) /* | x2*x2 */ 181 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */ 182 183 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/ 184 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ 185 186 MOVQ ( MM5, MM4 ) 187 PUNPCKLDQ ( MM3, MM3 ) 188 189 DEC_L ( EBP ) /* decrement normal counter */ 190 PFMUL ( MM5, MM5 ) 191 192 PFRSQIT1 ( MM3, MM5 ) 193 PFRCPIT2 ( MM4, MM5 ) 194 195 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/ 196 197 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */ 198 PFMUL ( MM5, MM1 ) /* | x2 (normalize*/ 199 200 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */ 201 MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */ 202 203 MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ 204 JA ( LLBL (G3TN_norm) ) 205 206LLBL (G3TN_exit_3dnow): 207 FEMMS 208 209LLBL (G3TN_end): 210 POP_L ( EBP ) 211 POP_L ( ESI ) 212 POP_L ( EDI ) 213 RET 214 215 216 217ALIGNTEXT16 218GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot) 219GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot): 220 221 #undef FRAME_OFFSET 222 #define FRAME_OFFSET 12 223 224 PUSH_L ( EDI ) 225 PUSH_L ( ESI ) 226 PUSH_L ( EBP ) 227 228 MOV_L ( ARG_LENGTHS, EDI ) 229 MOV_L ( ARG_IN, ESI ) 230 MOV_L ( ARG_DEST, EAX ) 231 MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */ 232 MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) ) 233 MOV_L ( ARG_MAT, ECX ) 234 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ 235 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ 236 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ 237 238 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ 239 JE ( LLBL (G3TNNR_end) ) 240 241 FEMMS 242 243 MOVD ( M(0), MM0 ) /* | m0 */ 244 PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */ 245 246 MOVD ( M(10), MM2 ) /* | m10 */ 247 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */ 248 249 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ 250 JNE ( LLBL (G3TNNR_scale_end ) ) 251 252 MOVD ( ARG_SCALE, MM7 ) /* | scale */ 253 PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */ 254 255 PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */ 256 PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */ 257 258ALIGNTEXT32 259LLBL (G3TNNR_scale_end): 260 CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */ 261 JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */ 262 263 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */ 264 265 266ALIGNTEXT32 267LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */ 268 269 PREFETCHW ( REGIND(EAX) ) 270 271 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */ 272 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */ 273 274 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */ 275 ADD_L ( STRIDE, EDX ) /* next normal */ 276 277 PREFETCH ( REGIND(EDX) ) 278 279 PFMUL ( MM2, MM7 ) /* | x2*m10 */ 280 ADD_L ( CONST(16), EAX ) /* next r */ 281 282 PFMUL ( MM3, MM7 ) /* | x2 (normalized) */ 283 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ 284 285 ADD_L ( CONST(4), EDI ) /* next length */ 286 PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */ 287 288 DEC_L ( EBP ) /* decrement normal counter */ 289 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */ 290 291 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */ 292 MOVD ( REGIND(EDI), MM3 ) /* | length (x) */ 293 294 JA ( LLBL (G3TNNR_norm_w_lengths) ) 295 JMP ( LLBL (G3TNNR_exit_3dnow) ) 296 297ALIGNTEXT32 298LLBL (G3TNNR_norm): /* need to calculate lengths */ 299 300 PREFETCHW ( REGIND(EAX) ) 301 302 MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */ 303 MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */ 304 305 PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */ 306 ADD_L ( CONST(16), EAX ) /* next r */ 307 308 PFMUL ( MM2, MM7 ) /* | x2*m10 */ 309 MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */ 310 311 MOVQ ( MM7, MM4 ) /* | x2 (transformed) */ 312 PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */ 313 314 315 PFMUL ( MM7, MM4 ) /* | x2*x2 */ 316 PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */ 317 318 PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/ 319 ADD_L ( STRIDE, EDX ) /* next normal */ 320 321 PREFETCH ( REGIND(EDX) ) 322 323 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ 324 MOVQ ( MM5, MM4 ) 325 326 PUNPCKLDQ ( MM3, MM3 ) 327 PFMUL ( MM5, MM5 ) 328 329 PFRSQIT1 ( MM3, MM5 ) 330 DEC_L ( EBP ) /* decrement normal counter */ 331 332 PFRCPIT2 ( MM4, MM5 ) 333 PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */ 334 335 MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */ 336 PFMUL ( MM5, MM7 ) /* | x2 (normalized) */ 337 338 MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */ 339 JA ( LLBL (G3TNNR_norm) ) 340 341 342LLBL (G3TNNR_exit_3dnow): 343 FEMMS 344 345LLBL (G3TNNR_end): 346 POP_L ( EBP ) 347 POP_L ( ESI ) 348 POP_L ( EDI ) 349 RET 350 351 352 353 354 355 356ALIGNTEXT16 357GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot) 358GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot): 359 360 #undef FRAME_OFFSET 361 #define FRAME_OFFSET 12 362 363 PUSH_L ( EDI ) 364 PUSH_L ( ESI ) 365 PUSH_L ( EBP ) 366 367 MOV_L ( ARG_IN, EAX ) 368 MOV_L ( ARG_DEST, EDX ) 369 MOV_L ( REGOFF(V3F_COUNT, EAX), EBP ) /* dest->count = in->count */ 370 MOV_L ( EBP, REGOFF(V3F_COUNT, EDX) ) 371 MOV_L ( ARG_IN, ESI ) 372 MOV_L ( ARG_MAT, ECX ) 373 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ 374 MOV_L ( REGOFF(V3F_START, EDX), EAX ) /* dest->start */ 375 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ 376 377 CMP_L ( CONST(0), EBP ) 378 JE ( LLBL (G3TRNR_end) ) 379 380 FEMMS 381 382 MOVD ( ARG_SCALE, MM6 ) /* | scale */ 383 PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */ 384 385 MOVD ( REGIND(ECX), MM0 ) /* | m0 */ 386 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */ 387 388 PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */ 389 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */ 390 391 PFMUL ( MM6, MM2 ) /* | scale*m10 */ 392 393ALIGNTEXT32 394LLBL (G3TRNR_rescale): 395 396 PREFETCHW ( REGIND(EAX) ) 397 398 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */ 399 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */ 400 401 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */ 402 ADD_L ( STRIDE, EDX ) /* next normal */ 403 404 PREFETCH ( REGIND(EDX) ) 405 406 PFMUL ( MM2, MM5 ) /* | x2*m10 */ 407 ADD_L ( CONST(16), EAX ) /* next r */ 408 409 DEC_L ( EBP ) /* decrement normal counter */ 410 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */ 411 412 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */ 413 JA ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */ 414 415 FEMMS 416 417LLBL (G3TRNR_end): 418 POP_L ( EBP ) 419 POP_L ( ESI ) 420 POP_L ( EDI ) 421 RET 422 423 424 425 426 427ALIGNTEXT16 428GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals) 429GLNAME(_mesa_3dnow_transform_rescale_normals): 430 431 #undef FRAME_OFFSET 432 #define FRAME_OFFSET 8 433 434 PUSH_L ( EDI ) 435 PUSH_L ( ESI ) 436 437 MOV_L ( ARG_IN, ESI ) 438 MOV_L ( ARG_DEST, EAX ) 439 MOV_L ( ARG_MAT, ECX ) 440 MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */ 441 MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) ) 442 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ 443 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ 444 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ 445 446 CMP_L ( CONST(0), EDI ) 447 JE ( LLBL (G3TR_end) ) 448 449 FEMMS 450 451 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */ 452 453 MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */ 454 MOVD ( ARG_SCALE, MM0 ) /* scale */ 455 456 MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */ 457 PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */ 458 459 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) 460 PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */ 461 462 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/ 463 PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */ 464 465 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ 466 PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */ 467 468 PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */ 469 470 PFMUL ( MM0, MM7 ) /* | scale*m10 */ 471 472ALIGNTEXT32 473LLBL (G3TR_rescale): 474 475 PREFETCHW ( REGIND(EAX) ) 476 477 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ 478 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ 479 480 MOVQ ( MM0, MM1 ) /* x1 | x0 */ 481 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ 482 483 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ 484 ADD_L ( CONST(16), EAX ) /* next r */ 485 486 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ 487 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ 488 489 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */ 490 491 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ 492 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */ 493 494 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ 495 ADD_L ( STRIDE, EDX ) /* next normal */ 496 497 PREFETCH ( REGIND(EDX) ) 498 499 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ 500 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ 501 502 PFMUL ( MM7, MM2 ) /* | x2*m10 */ 503 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ 504 505 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */ 506 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ 507 508 DEC_L ( EDI ) /* decrement normal counter */ 509 JA ( LLBL (G3TR_rescale) ) 510 511 FEMMS 512 513LLBL (G3TR_end): 514 POP_L ( ESI ) 515 POP_L ( EDI ) 516 RET 517 518 519 520 521 522 523 524ALIGNTEXT16 525GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot) 526GLNAME(_mesa_3dnow_transform_normals_no_rot): 527 528 #undef FRAME_OFFSET 529 #define FRAME_OFFSET 8 530 531 PUSH_L ( EDI ) 532 PUSH_L ( ESI ) 533 534 MOV_L ( ARG_IN, ESI ) 535 MOV_L ( ARG_DEST, EAX ) 536 MOV_L ( ARG_MAT, ECX ) 537 MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */ 538 MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) ) 539 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ 540 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ 541 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ 542 543 CMP_L ( CONST(0), EDI ) 544 JE ( LLBL (G3TNR_end) ) 545 546 FEMMS 547 548 MOVD ( REGIND(ECX), MM0 ) /* | m0 */ 549 PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */ 550 551 MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */ 552 PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */ 553 554ALIGNTEXT32 555LLBL (G3TNR_transform): 556 557 PREFETCHW ( REGIND(EAX) ) 558 559 MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */ 560 MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */ 561 562 PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */ 563 ADD_L ( STRIDE, EDX) /* next normal */ 564 565 PREFETCH ( REGIND(EDX) ) 566 567 PFMUL ( MM2, MM5 ) /* | x2*m10 */ 568 ADD_L ( CONST(16), EAX ) /* next r */ 569 570 DEC_L ( EDI ) /* decrement normal counter */ 571 MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */ 572 573 MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */ 574 JA ( LLBL (G3TNR_transform) ) 575 576 FEMMS 577 578LLBL (G3TNR_end): 579 POP_L ( ESI ) 580 POP_L ( EDI ) 581 RET 582 583 584 585 586 587 588 589 590ALIGNTEXT16 591GLOBL GLNAME(_mesa_3dnow_transform_normals) 592GLNAME(_mesa_3dnow_transform_normals): 593 594 #undef FRAME_OFFSET 595 #define FRAME_OFFSET 8 596 597 PUSH_L ( EDI ) 598 PUSH_L ( ESI ) 599 600 MOV_L ( ARG_IN, ESI ) 601 MOV_L ( ARG_DEST, EAX ) 602 MOV_L ( ARG_MAT, ECX ) 603 MOV_L ( REGOFF(V3F_COUNT, ESI), EDI ) /* dest->count = in->count */ 604 MOV_L ( EDI, REGOFF(V3F_COUNT, EAX) ) 605 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ 606 MOV_L ( REGOFF(V3F_START, ESI), EDX ) /* in->start */ 607 MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */ 608 609 CMP_L ( CONST(0), EDI ) /* count > 0 ?? */ 610 JE ( LLBL (G3T_end) ) 611 612 FEMMS 613 614 MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */ 615 MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */ 616 617 MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */ 618 PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */ 619 620 MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */ 621 MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ 622 623ALIGNTEXT32 624LLBL (G3T_transform): 625 626 PREFETCHW ( REGIND(EAX) ) 627 628 MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ 629 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ 630 631 MOVQ ( MM0, MM1 ) /* x1 | x0 */ 632 PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ 633 634 PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */ 635 ADD_L ( CONST(16), EAX ) /* next r */ 636 637 PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */ 638 PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */ 639 640 PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */ 641 PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */ 642 643 MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */ 644 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */ 645 646 PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */ 647 MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */ 648 649 PFMUL ( MM7, MM2 ) /* | x2*m10 */ 650 ADD_L ( STRIDE, EDX ) /* next normal */ 651 652 PREFETCH ( REGIND(EDX) ) 653 654 PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */ 655 PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */ 656 657 MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */ 658 DEC_L ( EDI ) /* decrement normal counter */ 659 660 JA ( LLBL (G3T_transform) ) 661 662 FEMMS 663 664LLBL (G3T_end): 665 POP_L ( ESI ) 666 POP_L ( EDI ) 667 RET 668 669 670 671 672 673 674ALIGNTEXT16 675GLOBL GLNAME(_mesa_3dnow_normalize_normals) 676GLNAME(_mesa_3dnow_normalize_normals): 677 678 #undef FRAME_OFFSET 679 #define FRAME_OFFSET 12 680 681 PUSH_L ( EDI ) 682 PUSH_L ( ESI ) 683 PUSH_L ( EBP ) 684 685 MOV_L ( ARG_IN, ESI ) 686 MOV_L ( ARG_DEST, EAX ) 687 MOV_L ( REGOFF(V3F_COUNT, ESI), EBP ) /* dest->count = in->count */ 688 MOV_L ( EBP, REGOFF(V3F_COUNT, EAX) ) 689 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ 690 MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */ 691 MOV_L ( ARG_LENGTHS, EDX ) 692 693 CMP_L ( CONST(0), EBP ) /* count > 0 ?? */ 694 JE ( LLBL (G3N_end) ) 695 696 FEMMS 697 698 CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */ 699 JE ( LLBL (G3N_norm2) ) /* calculate lengths */ 700 701ALIGNTEXT32 702LLBL (G3N_norm1): /* use precalculated lengths */ 703 704 PREFETCH ( REGIND(EAX) ) 705 706 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */ 707 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */ 708 709 MOVD ( REGIND(EDX), MM3 ) /* | length (x) */ 710 PFMUL ( MM3, MM1 ) /* | x2 (normalized) */ 711 712 PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */ 713 ADD_L ( STRIDE, ECX ) /* next normal */ 714 715 PREFETCH ( REGIND(ECX) ) 716 717 PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */ 718 MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */ 719 720 MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */ 721 ADD_L ( CONST(16), EAX ) /* next r */ 722 723 ADD_L ( CONST(4), EDX ) /* next length */ 724 DEC_L ( EBP ) /* decrement normal counter */ 725 726 JA ( LLBL (G3N_norm1) ) 727 728 JMP ( LLBL (G3N_end1) ) 729 730ALIGNTEXT32 731LLBL (G3N_norm2): /* need to calculate lengths */ 732 733 PREFETCHW ( REGIND(EAX) ) 734 735 MOVQ ( MM0, MM3 ) /* x1 | x0 */ 736 ADD_L ( STRIDE, ECX ) /* next normal */ 737 738 PREFETCH ( REGIND(ECX) ) 739 740 MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */ 741 MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */ 742 743 PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */ 744 MOVQ ( MM1, MM4 ) /* | x2 */ 745 746 ADD_L ( CONST(16), EAX ) /* next r */ 747 PFMUL ( MM1, MM4 ) /* | x2*x2 */ 748 749 PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */ 750 PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/ 751 752 PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */ 753 MOVQ ( MM5, MM4 ) 754 755 PUNPCKLDQ ( MM3, MM3 ) 756 PFMUL ( MM5, MM5 ) 757 758 PFRSQIT1 ( MM3, MM5 ) 759 DEC_L ( EBP ) /* decrement normal counter */ 760 761 PFRCPIT2 ( MM4, MM5 ) 762 763 PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */ 764 MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */ 765 766 PFMUL ( MM5, MM1 ) /* | x2 (normalized) */ 767 MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */ 768 769 JA ( LLBL (G3N_norm2) ) 770 771LLBL (G3N_end1): 772 FEMMS 773 774LLBL (G3N_end): 775 POP_L ( EBP ) 776 POP_L ( ESI ) 777 POP_L ( EDI ) 778 RET 779 780 781 782 783 784 785ALIGNTEXT16 786GLOBL GLNAME(_mesa_3dnow_rescale_normals) 787GLNAME(_mesa_3dnow_rescale_normals): 788 789 #undef FRAME_OFFSET 790 #define FRAME_OFFSET 8 791 PUSH_L ( EDI ) 792 PUSH_L ( ESI ) 793 794 MOV_L ( ARG_IN, ESI ) 795 MOV_L ( ARG_DEST, EAX ) 796 MOV_L ( REGOFF(V3F_COUNT, ESI), EDX ) /* dest->count = in->count */ 797 MOV_L ( EDX, REGOFF(V3F_COUNT, EAX) ) 798 MOV_L ( REGOFF(V3F_START, EAX), EAX ) /* dest->start */ 799 MOV_L ( REGOFF(V3F_START, ESI), ECX ) /* in->start */ 800 801 CMP_L ( CONST(0), EDX ) 802 JE ( LLBL (G3R_end) ) 803 804 FEMMS 805 806 MOVD ( ARG_SCALE, MM0 ) /* scale */ 807 PUNPCKLDQ ( MM0, MM0 ) 808 809ALIGNTEXT32 810LLBL (G3R_rescale): 811 812 PREFETCHW ( REGIND(EAX) ) 813 814 MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */ 815 MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */ 816 817 PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */ 818 ADD_L ( STRIDE, ECX ) /* next normal */ 819 820 PREFETCH ( REGIND(ECX) ) 821 822 PFMUL ( MM0, MM2 ) /* | x2*scale */ 823 ADD_L ( CONST(16), EAX ) /* next r */ 824 825 MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */ 826 MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */ 827 828 DEC_L ( EDX ) /* decrement normal counter */ 829 JA ( LLBL (G3R_rescale) ) 830 831 FEMMS 832 833LLBL (G3R_end): 834 POP_L ( ESI ) 835 POP_L ( EDI ) 836 RET 837