tgsi_ppc.c revision 9ee1bcf7a5442ccb517a5cfbaf024755bd4d2738
1/************************************************************************** 2 * 3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI to PowerPC code generation. 30 */ 31 32#include "pipe/p_config.h" 33 34#if defined(PIPE_ARCH_PPC) 35 36#include "util/u_debug.h" 37#include "pipe/p_shader_tokens.h" 38#include "util/u_math.h" 39#include "util/u_memory.h" 40#include "util/u_sse.h" 41#include "tgsi/tgsi_info.h" 42#include "tgsi/tgsi_parse.h" 43#include "tgsi/tgsi_util.h" 44#include "tgsi_dump.h" 45#include "tgsi_exec.h" 46#include "tgsi_ppc.h" 47#include "rtasm/rtasm_ppc.h" 48 49 50/** 51 * Since it's pretty much impossible to form PPC vector immediates, load 52 * them from memory here: 53 */ 54PIPE_ALIGN_VAR(16) const float 55ppc_builtin_constants[] = { 56 1.0f, -128.0f, 128.0, 0.0 57}; 58 59 60#define FOR_EACH_CHANNEL( CHAN )\ 61 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 62 63#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 64 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) 65 66#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 67 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 68 69#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 70 FOR_EACH_CHANNEL( CHAN )\ 71 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 72 73/** 74 * How many TGSI temps should be implemented with real PPC vector registers 75 * rather than memory. 76 */ 77#define MAX_PPC_TEMPS 3 78 79 80/** 81 * Context/state used during code gen. 82 */ 83struct gen_context 84{ 85 struct ppc_function *f; 86 int inputs_reg; /**< GP register pointing to input params */ 87 int outputs_reg; /**< GP register pointing to output params */ 88 int temps_reg; /**< GP register pointing to temporary "registers" */ 89 int immed_reg; /**< GP register pointing to immediates buffer */ 90 int const_reg; /**< GP register pointing to constants buffer */ 91 int builtins_reg; /**< GP register pointint to built-in constants */ 92 93 int offset_reg; /**< used to reduce redundant li instructions */ 94 int offset_value; 95 96 int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ 97 int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ 98 99 /** 100 * Map TGSI temps to PPC vector temps. 101 * We have 32 PPC vector regs. Use 16 of them for storing 4 TGSI temps. 102 * XXX currently only do this for TGSI temps [0..MAX_PPC_TEMPS-1]. 103 */ 104 int temps_map[MAX_PPC_TEMPS][4]; 105 106 /** 107 * Cache of src registers. 108 * This is used to avoid redundant load instructions. 109 */ 110 struct { 111 struct tgsi_full_src_register src; 112 uint chan; 113 uint vec; 114 } regs[12]; /* 3 src regs, 4 channels */ 115 uint num_regs; 116}; 117 118 119/** 120 * Initialize code generation context. 121 */ 122static void 123init_gen_context(struct gen_context *gen, struct ppc_function *func) 124{ 125 uint i; 126 127 memset(gen, 0, sizeof(*gen)); 128 gen->f = func; 129 gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */ 130 gen->outputs_reg = ppc_reserve_register(func, 4); /* second function param */ 131 gen->temps_reg = ppc_reserve_register(func, 5); /* ... */ 132 gen->immed_reg = ppc_reserve_register(func, 6); 133 gen->const_reg = ppc_reserve_register(func, 7); 134 gen->builtins_reg = ppc_reserve_register(func, 8); 135 gen->one_vec = -1; 136 gen->bit31_vec = -1; 137 gen->offset_reg = -1; 138 gen->offset_value = -9999999; 139 for (i = 0; i < MAX_PPC_TEMPS; i++) { 140 gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f); 141 gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f); 142 gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f); 143 gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f); 144 } 145} 146 147 148/** 149 * Is the given TGSI register stored as a real PPC vector register? 150 */ 151static boolean 152is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) 153{ 154 return (reg->Register.File == TGSI_FILE_TEMPORARY && 155 reg->Register.Index < MAX_PPC_TEMPS); 156} 157 158 159/** 160 * Is the given TGSI register stored as a real PPC vector register? 161 */ 162static boolean 163is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) 164{ 165 return (reg->Register.File == TGSI_FILE_TEMPORARY && 166 reg->Register.Index < MAX_PPC_TEMPS); 167} 168 169 170 171/** 172 * All PPC vector load/store instructions form an effective address 173 * by adding the contents of two registers. For example: 174 * lvx v2,r8,r9 # v2 = memory[r8 + r9] 175 * stvx v2,r8,r9 # memory[r8 + r9] = v2; 176 * So our lvx/stvx instructions are typically preceded by an 'li' instruction 177 * to load r9 (above) with an immediate (an offset). 178 * This code emits that 'li' instruction, but only if the offset value is 179 * different than the previous 'li'. 180 * This optimization seems to save about 10% in the instruction count. 181 * Note that we need to unconditionally emit an 'li' inside basic blocks 182 * (such as inside loops). 183 */ 184static int 185emit_li_offset(struct gen_context *gen, int offset) 186{ 187 if (gen->offset_reg <= 0) { 188 /* allocate a GP register for storing load/store offset */ 189 gen->offset_reg = ppc_allocate_register(gen->f); 190 } 191 192 /* emit new 'li' if offset is changing */ 193 if (gen->offset_value < 0 || gen->offset_value != offset) { 194 gen->offset_value = offset; 195 ppc_li(gen->f, gen->offset_reg, offset); 196 } 197 198 return gen->offset_reg; 199} 200 201 202/** 203 * Forces subsequent emit_li_offset() calls to emit an 'li'. 204 * To be called at the top of basic blocks. 205 */ 206static void 207reset_li_offset(struct gen_context *gen) 208{ 209 gen->offset_value = -9999999; 210} 211 212 213 214/** 215 * Load the given vector register with {value, value, value, value}. 216 * The value must be in the ppu_builtin_constants[] array. 217 * We wouldn't need this if there was a simple way to load PPC vector 218 * registers with immediate values! 219 */ 220static void 221load_constant_vec(struct gen_context *gen, int dst_vec, float value) 222{ 223 uint pos; 224 for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { 225 if (ppc_builtin_constants[pos] == value) { 226 int offset = pos * 4; 227 int offset_reg = emit_li_offset(gen, offset); 228 229 /* Load 4-byte word into vector register. 230 * The vector slot depends on the effective address we load from. 231 * We know that our builtins start at a 16-byte boundary so we 232 * know that 'swizzle' tells us which vector slot will have the 233 * loaded word. The other vector slots will be undefined. 234 */ 235 ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); 236 /* splat word[pos % 4] across the vector reg */ 237 ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); 238 return; 239 } 240 } 241 assert(0 && "Need to add new constant to ppc_builtin_constants array"); 242} 243 244 245/** 246 * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. 247 */ 248static int 249gen_one_vec(struct gen_context *gen) 250{ 251 if (gen->one_vec < 0) { 252 gen->one_vec = ppc_allocate_vec_register(gen->f); 253 load_constant_vec(gen, gen->one_vec, 1.0f); 254 } 255 return gen->one_vec; 256} 257 258/** 259 * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. 260 */ 261static int 262gen_get_bit31_vec(struct gen_context *gen) 263{ 264 if (gen->bit31_vec < 0) { 265 gen->bit31_vec = ppc_allocate_vec_register(gen->f); 266 ppc_vspltisw(gen->f, gen->bit31_vec, -1); 267 ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); 268 } 269 return gen->bit31_vec; 270} 271 272 273/** 274 * Register fetch. Return PPC vector register with result. 275 */ 276static int 277emit_fetch(struct gen_context *gen, 278 const struct tgsi_full_src_register *reg, 279 const unsigned chan_index) 280{ 281 uint swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); 282 int dst_vec = -1; 283 284 switch (swizzle) { 285 case TGSI_SWIZZLE_X: 286 case TGSI_SWIZZLE_Y: 287 case TGSI_SWIZZLE_Z: 288 case TGSI_SWIZZLE_W: 289 switch (reg->Register.File) { 290 case TGSI_FILE_INPUT: 291 { 292 int offset = (reg->Register.Index * 4 + swizzle) * 16; 293 int offset_reg = emit_li_offset(gen, offset); 294 dst_vec = ppc_allocate_vec_register(gen->f); 295 ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); 296 } 297 break; 298 case TGSI_FILE_SYSTEM_VALUE: 299 assert(!"unhandled system value in tgsi_ppc.c"); 300 break; 301 case TGSI_FILE_TEMPORARY: 302 if (is_ppc_vec_temporary(reg)) { 303 /* use PPC vec register */ 304 dst_vec = gen->temps_map[reg->Register.Index][swizzle]; 305 } 306 else { 307 /* use memory-based temp register "file" */ 308 int offset = (reg->Register.Index * 4 + swizzle) * 16; 309 int offset_reg = emit_li_offset(gen, offset); 310 dst_vec = ppc_allocate_vec_register(gen->f); 311 ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); 312 } 313 break; 314 case TGSI_FILE_IMMEDIATE: 315 { 316 int offset = (reg->Register.Index * 4 + swizzle) * 4; 317 int offset_reg = emit_li_offset(gen, offset); 318 dst_vec = ppc_allocate_vec_register(gen->f); 319 /* Load 4-byte word into vector register. 320 * The vector slot depends on the effective address we load from. 321 * We know that our immediates start at a 16-byte boundary so we 322 * know that 'swizzle' tells us which vector slot will have the 323 * loaded word. The other vector slots will be undefined. 324 */ 325 ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg); 326 /* splat word[swizzle] across the vector reg */ 327 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 328 } 329 break; 330 case TGSI_FILE_CONSTANT: 331 { 332 int offset = (reg->Register.Index * 4 + swizzle) * 4; 333 int offset_reg = emit_li_offset(gen, offset); 334 dst_vec = ppc_allocate_vec_register(gen->f); 335 /* Load 4-byte word into vector register. 336 * The vector slot depends on the effective address we load from. 337 * We know that our constants start at a 16-byte boundary so we 338 * know that 'swizzle' tells us which vector slot will have the 339 * loaded word. The other vector slots will be undefined. 340 */ 341 ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); 342 /* splat word[swizzle] across the vector reg */ 343 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 344 } 345 break; 346 default: 347 assert( 0 ); 348 } 349 break; 350 default: 351 assert( 0 ); 352 } 353 354 assert(dst_vec >= 0); 355 356 { 357 uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); 358 if (sign_op != TGSI_UTIL_SIGN_KEEP) { 359 int bit31_vec = gen_get_bit31_vec(gen); 360 int dst_vec2; 361 362 if (is_ppc_vec_temporary(reg)) { 363 /* need to use a new temp */ 364 dst_vec2 = ppc_allocate_vec_register(gen->f); 365 } 366 else { 367 dst_vec2 = dst_vec; 368 } 369 370 switch (sign_op) { 371 case TGSI_UTIL_SIGN_CLEAR: 372 /* vec = vec & ~bit31 */ 373 ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec); 374 break; 375 case TGSI_UTIL_SIGN_SET: 376 /* vec = vec | bit31 */ 377 ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec); 378 break; 379 case TGSI_UTIL_SIGN_TOGGLE: 380 /* vec = vec ^ bit31 */ 381 ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec); 382 break; 383 default: 384 assert(0); 385 } 386 return dst_vec2; 387 } 388 } 389 390 return dst_vec; 391} 392 393 394 395/** 396 * Test if two TGSI src registers refer to the same memory location. 397 * We use this to avoid redundant register loads. 398 */ 399static boolean 400equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, 401 const struct tgsi_full_src_register *b, uint chan_b) 402{ 403 int swz_a, swz_b; 404 int sign_a, sign_b; 405 if (a->Register.File != b->Register.File) 406 return FALSE; 407 if (a->Register.Index != b->Register.Index) 408 return FALSE; 409 swz_a = tgsi_util_get_full_src_register_swizzle(a, chan_a); 410 swz_b = tgsi_util_get_full_src_register_swizzle(b, chan_b); 411 if (swz_a != swz_b) 412 return FALSE; 413 sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a); 414 sign_b = tgsi_util_get_full_src_register_sign_mode(b, chan_b); 415 if (sign_a != sign_b) 416 return FALSE; 417 return TRUE; 418} 419 420 421/** 422 * Given a TGSI src register and channel index, return the PPC vector 423 * register containing the value. We use a cache to prevent re-loading 424 * the same register multiple times. 425 * \return index of PPC vector register with the desired src operand 426 */ 427static int 428get_src_vec(struct gen_context *gen, 429 struct tgsi_full_instruction *inst, int src_reg, uint chan) 430{ 431 const const struct tgsi_full_src_register *src = 432 &inst->Src[src_reg]; 433 int vec; 434 uint i; 435 436 /* check the cache */ 437 for (i = 0; i < gen->num_regs; i++) { 438 if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) { 439 /* cache hit */ 440 assert(gen->regs[i].vec >= 0); 441 return gen->regs[i].vec; 442 } 443 } 444 445 /* cache miss: allocate new vec reg and emit fetch/load code */ 446 vec = emit_fetch(gen, src, chan); 447 gen->regs[gen->num_regs].src = *src; 448 gen->regs[gen->num_regs].chan = chan; 449 gen->regs[gen->num_regs].vec = vec; 450 gen->num_regs++; 451 452 assert(gen->num_regs <= Elements(gen->regs)); 453 454 assert(vec >= 0); 455 456 return vec; 457} 458 459 460/** 461 * Clear the src operand cache. To be called at the end of each emit function. 462 */ 463static void 464release_src_vecs(struct gen_context *gen) 465{ 466 uint i; 467 for (i = 0; i < gen->num_regs; i++) { 468 const const struct tgsi_full_src_register src = gen->regs[i].src; 469 if (!is_ppc_vec_temporary(&src)) { 470 ppc_release_vec_register(gen->f, gen->regs[i].vec); 471 } 472 } 473 gen->num_regs = 0; 474} 475 476 477 478static int 479get_dst_vec(struct gen_context *gen, 480 const struct tgsi_full_instruction *inst, 481 unsigned chan_index) 482{ 483 const struct tgsi_full_dst_register *reg = &inst->Dst[0]; 484 485 if (is_ppc_vec_temporary_dst(reg)) { 486 int vec = gen->temps_map[reg->Register.Index][chan_index]; 487 return vec; 488 } 489 else { 490 return ppc_allocate_vec_register(gen->f); 491 } 492} 493 494 495/** 496 * Register store. Store 'src_vec' at location indicated by 'reg'. 497 * \param free_vec Should the src_vec be released when done? 498 */ 499static void 500emit_store(struct gen_context *gen, 501 int src_vec, 502 const struct tgsi_full_instruction *inst, 503 unsigned chan_index, 504 boolean free_vec) 505{ 506 const struct tgsi_full_dst_register *reg = &inst->Dst[0]; 507 508 switch (reg->Register.File) { 509 case TGSI_FILE_OUTPUT: 510 { 511 int offset = (reg->Register.Index * 4 + chan_index) * 16; 512 int offset_reg = emit_li_offset(gen, offset); 513 ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); 514 } 515 break; 516 case TGSI_FILE_TEMPORARY: 517 if (is_ppc_vec_temporary_dst(reg)) { 518 if (!free_vec) { 519 int dst_vec = gen->temps_map[reg->Register.Index][chan_index]; 520 if (dst_vec != src_vec) 521 ppc_vmove(gen->f, dst_vec, src_vec); 522 } 523 free_vec = FALSE; 524 } 525 else { 526 int offset = (reg->Register.Index * 4 + chan_index) * 16; 527 int offset_reg = emit_li_offset(gen, offset); 528 ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); 529 } 530 break; 531#if 0 532 case TGSI_FILE_ADDRESS: 533 emit_addrs( 534 func, 535 xmm, 536 reg->Register.Index, 537 chan_index ); 538 break; 539#endif 540 default: 541 assert( 0 ); 542 } 543 544#if 0 545 switch( inst->Instruction.Saturate ) { 546 case TGSI_SAT_NONE: 547 break; 548 549 case TGSI_SAT_ZERO_ONE: 550 /* assert( 0 ); */ 551 break; 552 553 case TGSI_SAT_MINUS_PLUS_ONE: 554 assert( 0 ); 555 break; 556 } 557#endif 558 559 if (free_vec) 560 ppc_release_vec_register(gen->f, src_vec); 561} 562 563 564static void 565emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 566{ 567 int v0, v1; 568 uint chan_index; 569 570 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_X); 571 v1 = ppc_allocate_vec_register(gen->f); 572 573 switch (inst->Instruction.Opcode) { 574 case TGSI_OPCODE_RSQ: 575 /* v1 = 1.0 / sqrt(v0) */ 576 ppc_vrsqrtefp(gen->f, v1, v0); 577 break; 578 case TGSI_OPCODE_RCP: 579 /* v1 = 1.0 / v0 */ 580 ppc_vrefp(gen->f, v1, v0); 581 break; 582 default: 583 assert(0); 584 } 585 586 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { 587 emit_store(gen, v1, inst, chan_index, FALSE); 588 } 589 590 release_src_vecs(gen); 591 ppc_release_vec_register(gen->f, v1); 592} 593 594 595static void 596emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 597{ 598 uint chan_index; 599 600 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 601 int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ 602 int v1 = get_dst_vec(gen, inst, chan_index); 603 switch (inst->Instruction.Opcode) { 604 case TGSI_OPCODE_ABS: 605 /* turn off the most significant bit of each vector float word */ 606 { 607 int bit31_vec = gen_get_bit31_vec(gen); 608 ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */ 609 } 610 break; 611 case TGSI_OPCODE_FLR: 612 ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ 613 break; 614 case TGSI_OPCODE_FRC: 615 ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */ 616 ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */ 617 break; 618 case TGSI_OPCODE_EX2: 619 ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */ 620 break; 621 case TGSI_OPCODE_LG2: 622 /* XXX this may be broken! */ 623 ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ 624 break; 625 case TGSI_OPCODE_MOV: 626 if (v0 != v1) 627 ppc_vmove(gen->f, v1, v0); 628 break; 629 default: 630 assert(0); 631 } 632 emit_store(gen, v1, inst, chan_index, TRUE); /* store v0 */ 633 } 634 635 release_src_vecs(gen); 636} 637 638 639static void 640emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) 641{ 642 int zero_vec = -1; 643 uint chan; 644 645 if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { 646 zero_vec = ppc_allocate_vec_register(gen->f); 647 ppc_vzero(gen->f, zero_vec); 648 } 649 650 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 651 /* fetch src operands */ 652 int v0 = get_src_vec(gen, inst, 0, chan); 653 int v1 = get_src_vec(gen, inst, 1, chan); 654 int v2 = get_dst_vec(gen, inst, chan); 655 656 /* emit binop */ 657 switch (inst->Instruction.Opcode) { 658 case TGSI_OPCODE_ADD: 659 ppc_vaddfp(gen->f, v2, v0, v1); 660 break; 661 case TGSI_OPCODE_SUB: 662 ppc_vsubfp(gen->f, v2, v0, v1); 663 break; 664 case TGSI_OPCODE_MUL: 665 ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec); 666 break; 667 case TGSI_OPCODE_MIN: 668 ppc_vminfp(gen->f, v2, v0, v1); 669 break; 670 case TGSI_OPCODE_MAX: 671 ppc_vmaxfp(gen->f, v2, v0, v1); 672 break; 673 default: 674 assert(0); 675 } 676 677 /* store v2 */ 678 emit_store(gen, v2, inst, chan, TRUE); 679 } 680 681 if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) 682 ppc_release_vec_register(gen->f, zero_vec); 683 684 release_src_vecs(gen); 685} 686 687 688static void 689emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) 690{ 691 uint chan; 692 693 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 694 /* fetch src operands */ 695 int v0 = get_src_vec(gen, inst, 0, chan); 696 int v1 = get_src_vec(gen, inst, 1, chan); 697 int v2 = get_src_vec(gen, inst, 2, chan); 698 int v3 = get_dst_vec(gen, inst, chan); 699 700 /* emit ALU */ 701 switch (inst->Instruction.Opcode) { 702 case TGSI_OPCODE_MAD: 703 ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ 704 break; 705 case TGSI_OPCODE_LRP: 706 ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ 707 ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ 708 break; 709 default: 710 assert(0); 711 } 712 713 /* store v3 */ 714 emit_store(gen, v3, inst, chan, TRUE); 715 } 716 717 release_src_vecs(gen); 718} 719 720 721/** 722 * Vector comparisons, resulting in 1.0 or 0.0 values. 723 */ 724static void 725emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) 726{ 727 uint chan; 728 int one_vec = gen_one_vec(gen); 729 730 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 731 /* fetch src operands */ 732 int v0 = get_src_vec(gen, inst, 0, chan); 733 int v1 = get_src_vec(gen, inst, 1, chan); 734 int v2 = get_dst_vec(gen, inst, chan); 735 boolean complement = FALSE; 736 737 switch (inst->Instruction.Opcode) { 738 case TGSI_OPCODE_SNE: 739 complement = TRUE; 740 /* fall-through */ 741 case TGSI_OPCODE_SEQ: 742 ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ 743 break; 744 745 case TGSI_OPCODE_SGE: 746 complement = TRUE; 747 /* fall-through */ 748 case TGSI_OPCODE_SLT: 749 ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ 750 break; 751 752 case TGSI_OPCODE_SLE: 753 complement = TRUE; 754 /* fall-through */ 755 case TGSI_OPCODE_SGT: 756 ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ 757 break; 758 default: 759 assert(0); 760 } 761 762 /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ 763 764 if (complement) 765 ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ 766 else 767 ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ 768 769 /* store v2 */ 770 emit_store(gen, v2, inst, chan, TRUE); 771 } 772 773 release_src_vecs(gen); 774} 775 776 777static void 778emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) 779{ 780 int v0, v1, v2; 781 uint chan_index; 782 783 v2 = ppc_allocate_vec_register(gen->f); 784 785 ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */ 786 787 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_X); /* v0 = src0.XXXX */ 788 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_X); /* v1 = src1.XXXX */ 789 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 790 791 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_Y); /* v0 = src0.YYYY */ 792 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_Y); /* v1 = src1.YYYY */ 793 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 794 795 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_Z); /* v0 = src0.ZZZZ */ 796 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_Z); /* v1 = src1.ZZZZ */ 797 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 798 799 if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { 800 v0 = get_src_vec(gen, inst, 0, TGSI_CHAN_W); /* v0 = src0.WWWW */ 801 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_W); /* v1 = src1.WWWW */ 802 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 803 } 804 else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { 805 v1 = get_src_vec(gen, inst, 1, TGSI_CHAN_W); /* v1 = src1.WWWW */ 806 ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ 807 } 808 809 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 810 emit_store(gen, v2, inst, chan_index, FALSE); /* store v2, free v2 later */ 811 } 812 813 release_src_vecs(gen); 814 815 ppc_release_vec_register(gen->f, v2); 816} 817 818 819/** Approximation for vr = pow(va, vb) */ 820static void 821ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) 822{ 823 /* pow(a,b) ~= exp2(log2(a) * b) */ 824 int t_vec = ppc_allocate_vec_register(f); 825 int zero_vec = ppc_allocate_vec_register(f); 826 827 ppc_vzero(f, zero_vec); 828 829 ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ 830 ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */ 831 ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ 832 833 ppc_release_vec_register(f, t_vec); 834 ppc_release_vec_register(f, zero_vec); 835} 836 837 838static void 839emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) 840{ 841 int one_vec = gen_one_vec(gen); 842 843 /* Compute X */ 844 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_X)) { 845 emit_store(gen, one_vec, inst, TGSI_CHAN_X, FALSE); 846 } 847 848 /* Compute Y, Z */ 849 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Y) || 850 IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Z)) { 851 int x_vec; 852 int zero_vec = ppc_allocate_vec_register(gen->f); 853 854 x_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X); /* x_vec = src[0].x */ 855 856 ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ 857 ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ 858 859 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Y)) { 860 emit_store(gen, x_vec, inst, TGSI_CHAN_Y, FALSE); 861 } 862 863 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Z)) { 864 int y_vec, w_vec; 865 int z_vec = ppc_allocate_vec_register(gen->f); 866 int pow_vec = ppc_allocate_vec_register(gen->f); 867 int pos_vec = ppc_allocate_vec_register(gen->f); 868 int p128_vec = ppc_allocate_vec_register(gen->f); 869 int n128_vec = ppc_allocate_vec_register(gen->f); 870 871 y_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_Y); /* y_vec = src[0].y */ 872 ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ 873 874 w_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_W); /* w_vec = src[0].w */ 875 876 /* clamp W to [-128, 128] */ 877 load_constant_vec(gen, p128_vec, 128.0f); 878 load_constant_vec(gen, n128_vec, -128.0f); 879 ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */ 880 ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */ 881 882 /* if temp.x > 0 883 * z = pow(tmp.y, tmp.w) 884 * else 885 * z = 0.0 886 */ 887 ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ 888 ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ 889 ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ 890 891 emit_store(gen, z_vec, inst, TGSI_CHAN_Z, FALSE); 892 893 ppc_release_vec_register(gen->f, z_vec); 894 ppc_release_vec_register(gen->f, pow_vec); 895 ppc_release_vec_register(gen->f, pos_vec); 896 ppc_release_vec_register(gen->f, p128_vec); 897 ppc_release_vec_register(gen->f, n128_vec); 898 } 899 900 ppc_release_vec_register(gen->f, zero_vec); 901 } 902 903 /* Compute W */ 904 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_W)) { 905 emit_store(gen, one_vec, inst, TGSI_CHAN_W, FALSE); 906 } 907 908 release_src_vecs(gen); 909} 910 911 912static void 913emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst) 914{ 915 const int one_vec = gen_one_vec(gen); 916 int src_vec; 917 918 /* get src arg */ 919 src_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X); 920 921 /* Compute X = 2^floor(src) */ 922 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_X)) { 923 int dst_vec = get_dst_vec(gen, inst, TGSI_CHAN_X); 924 int tmp_vec = ppc_allocate_vec_register(gen->f); 925 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ 926 ppc_vexptefp(gen->f, dst_vec, tmp_vec); /* dst = 2 ^ tmp */ 927 emit_store(gen, dst_vec, inst, TGSI_CHAN_X, TRUE); 928 ppc_release_vec_register(gen->f, tmp_vec); 929 } 930 931 /* Compute Y = src - floor(src) */ 932 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Y)) { 933 int dst_vec = get_dst_vec(gen, inst, TGSI_CHAN_Y); 934 int tmp_vec = ppc_allocate_vec_register(gen->f); 935 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ 936 ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec); /* dst = src - tmp */ 937 emit_store(gen, dst_vec, inst, TGSI_CHAN_Y, TRUE); 938 ppc_release_vec_register(gen->f, tmp_vec); 939 } 940 941 /* Compute Z = RoughApprox2ToX(src) */ 942 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Z)) { 943 int dst_vec = get_dst_vec(gen, inst, TGSI_CHAN_Z); 944 ppc_vexptefp(gen->f, dst_vec, src_vec); /* dst = 2 ^ src */ 945 emit_store(gen, dst_vec, inst, TGSI_CHAN_Z, TRUE); 946 } 947 948 /* Compute W = 1.0 */ 949 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_W)) { 950 emit_store(gen, one_vec, inst, TGSI_CHAN_W, FALSE); 951 } 952 953 release_src_vecs(gen); 954} 955 956 957static void 958emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst) 959{ 960 const int bit31_vec = gen_get_bit31_vec(gen); 961 const int one_vec = gen_one_vec(gen); 962 int src_vec, abs_vec; 963 964 /* get src arg */ 965 src_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X); 966 967 /* compute abs(src) */ 968 abs_vec = ppc_allocate_vec_register(gen->f); 969 ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec); /* abs = src & ~bit31 */ 970 971 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_X) && 972 IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Y)) { 973 974 /* compute tmp = floor(log2(abs)) */ 975 int tmp_vec = ppc_allocate_vec_register(gen->f); 976 ppc_vlogefp(gen->f, tmp_vec, abs_vec); /* tmp = log2(abs) */ 977 ppc_vrfim(gen->f, tmp_vec, tmp_vec); /* tmp = floor(tmp); */ 978 979 /* Compute X = tmp */ 980 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_X)) { 981 emit_store(gen, tmp_vec, inst, TGSI_CHAN_X, FALSE); 982 } 983 984 /* Compute Y = abs / 2^tmp */ 985 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Y)) { 986 const int zero_vec = ppc_allocate_vec_register(gen->f); 987 ppc_vzero(gen->f, zero_vec); 988 ppc_vexptefp(gen->f, tmp_vec, tmp_vec); /* tmp = 2 ^ tmp */ 989 ppc_vrefp(gen->f, tmp_vec, tmp_vec); /* tmp = 1 / tmp */ 990 /* tmp = abs * tmp + zero */ 991 ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec); 992 emit_store(gen, tmp_vec, inst, TGSI_CHAN_Y, FALSE); 993 ppc_release_vec_register(gen->f, zero_vec); 994 } 995 996 ppc_release_vec_register(gen->f, tmp_vec); 997 } 998 999 /* Compute Z = RoughApproxLog2(abs) */ 1000 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Z)) { 1001 int dst_vec = get_dst_vec(gen, inst, TGSI_CHAN_Z); 1002 ppc_vlogefp(gen->f, dst_vec, abs_vec); /* dst = log2(abs) */ 1003 emit_store(gen, dst_vec, inst, TGSI_CHAN_Z, TRUE); 1004 } 1005 1006 /* Compute W = 1.0 */ 1007 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_W)) { 1008 emit_store(gen, one_vec, inst, TGSI_CHAN_W, FALSE); 1009 } 1010 1011 ppc_release_vec_register(gen->f, abs_vec); 1012 release_src_vecs(gen); 1013} 1014 1015 1016static void 1017emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst) 1018{ 1019 int s0_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X); 1020 int s1_vec = get_src_vec(gen, inst, 1, TGSI_CHAN_X); 1021 int pow_vec = ppc_allocate_vec_register(gen->f); 1022 int chan; 1023 1024 ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec); 1025 1026 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 1027 emit_store(gen, pow_vec, inst, chan, FALSE); 1028 } 1029 1030 ppc_release_vec_register(gen->f, pow_vec); 1031 1032 release_src_vecs(gen); 1033} 1034 1035 1036static void 1037emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst) 1038{ 1039 int x0_vec, y0_vec, z0_vec; 1040 int x1_vec, y1_vec, z1_vec; 1041 int zero_vec, tmp_vec; 1042 int tmp2_vec; 1043 1044 zero_vec = ppc_allocate_vec_register(gen->f); 1045 ppc_vzero(gen->f, zero_vec); 1046 1047 tmp_vec = ppc_allocate_vec_register(gen->f); 1048 tmp2_vec = ppc_allocate_vec_register(gen->f); 1049 1050 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Y) || 1051 IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Z)) { 1052 x0_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_X); 1053 x1_vec = get_src_vec(gen, inst, 1, TGSI_CHAN_X); 1054 } 1055 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_X) || 1056 IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Z)) { 1057 y0_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_Y); 1058 y1_vec = get_src_vec(gen, inst, 1, TGSI_CHAN_Y); 1059 } 1060 if (IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_X) || 1061 IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Y)) { 1062 z0_vec = get_src_vec(gen, inst, 0, TGSI_CHAN_Z); 1063 z1_vec = get_src_vec(gen, inst, 1, TGSI_CHAN_Z); 1064 } 1065 1066 IF_IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_X) { 1067 /* tmp = y0 * z1 */ 1068 ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec); 1069 /* tmp = tmp - z0 * y1*/ 1070 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec); 1071 emit_store(gen, tmp_vec, inst, TGSI_CHAN_X, FALSE); 1072 } 1073 IF_IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Y) { 1074 /* tmp = z0 * x1 */ 1075 ppc_vmaddfp(gen->f, tmp_vec, z0_vec, x1_vec, zero_vec); 1076 /* tmp = tmp - x0 * z1 */ 1077 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec); 1078 emit_store(gen, tmp_vec, inst, TGSI_CHAN_Y, FALSE); 1079 } 1080 IF_IS_DST0_CHANNEL_ENABLED(*inst, TGSI_CHAN_Z) { 1081 /* tmp = x0 * y1 */ 1082 ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec); 1083 /* tmp = tmp - y0 * x1 */ 1084 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec); 1085 emit_store(gen, tmp_vec, inst, TGSI_CHAN_Z, FALSE); 1086 } 1087 /* W is undefined */ 1088 1089 ppc_release_vec_register(gen->f, tmp_vec); 1090 ppc_release_vec_register(gen->f, zero_vec); 1091 release_src_vecs(gen); 1092} 1093 1094static int 1095emit_instruction(struct gen_context *gen, 1096 struct tgsi_full_instruction *inst) 1097{ 1098 1099 /* we don't handle saturation/clamping yet */ 1100 if (inst->Instruction.Saturate != TGSI_SAT_NONE) 1101 return 0; 1102 1103 /* need to use extra temps to fix SOA dependencies : */ 1104 if (tgsi_check_soa_dependencies(inst)) 1105 return FALSE; 1106 1107 switch (inst->Instruction.Opcode) { 1108 case TGSI_OPCODE_MOV: 1109 case TGSI_OPCODE_ABS: 1110 case TGSI_OPCODE_FLR: 1111 case TGSI_OPCODE_FRC: 1112 case TGSI_OPCODE_EX2: 1113 case TGSI_OPCODE_LG2: 1114 emit_unaryop(gen, inst); 1115 break; 1116 case TGSI_OPCODE_RSQ: 1117 case TGSI_OPCODE_RCP: 1118 emit_scalar_unaryop(gen, inst); 1119 break; 1120 case TGSI_OPCODE_ADD: 1121 case TGSI_OPCODE_SUB: 1122 case TGSI_OPCODE_MUL: 1123 case TGSI_OPCODE_MIN: 1124 case TGSI_OPCODE_MAX: 1125 emit_binop(gen, inst); 1126 break; 1127 case TGSI_OPCODE_SEQ: 1128 case TGSI_OPCODE_SNE: 1129 case TGSI_OPCODE_SLT: 1130 case TGSI_OPCODE_SGT: 1131 case TGSI_OPCODE_SLE: 1132 case TGSI_OPCODE_SGE: 1133 emit_inequality(gen, inst); 1134 break; 1135 case TGSI_OPCODE_MAD: 1136 case TGSI_OPCODE_LRP: 1137 emit_triop(gen, inst); 1138 break; 1139 case TGSI_OPCODE_DP3: 1140 case TGSI_OPCODE_DP4: 1141 case TGSI_OPCODE_DPH: 1142 emit_dotprod(gen, inst); 1143 break; 1144 case TGSI_OPCODE_LIT: 1145 emit_lit(gen, inst); 1146 break; 1147 case TGSI_OPCODE_LOG: 1148 emit_log(gen, inst); 1149 break; 1150 case TGSI_OPCODE_EXP: 1151 emit_exp(gen, inst); 1152 break; 1153 case TGSI_OPCODE_POW: 1154 emit_pow(gen, inst); 1155 break; 1156 case TGSI_OPCODE_XPD: 1157 emit_xpd(gen, inst); 1158 break; 1159 case TGSI_OPCODE_END: 1160 /* normal end */ 1161 return 1; 1162 default: 1163 return 0; 1164 } 1165 return 1; 1166} 1167 1168 1169static void 1170emit_declaration( 1171 struct ppc_function *func, 1172 struct tgsi_full_declaration *decl ) 1173{ 1174 if( decl->Declaration.File == TGSI_FILE_INPUT || 1175 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) { 1176#if 0 1177 unsigned first, last, mask; 1178 unsigned i, j; 1179 1180 first = decl->Range.First; 1181 last = decl->Range.Last; 1182 mask = decl->Declaration.UsageMask; 1183 1184 for( i = first; i <= last; i++ ) { 1185 for( j = 0; j < NUM_CHANNELS; j++ ) { 1186 if( mask & (1 << j) ) { 1187 switch( decl->Declaration.Interpolate ) { 1188 case TGSI_INTERPOLATE_CONSTANT: 1189 emit_coef_a0( func, 0, i, j ); 1190 emit_inputs( func, 0, i, j ); 1191 break; 1192 1193 case TGSI_INTERPOLATE_LINEAR: 1194 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 1195 emit_coef_dadx( func, 1, i, j ); 1196 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 1197 emit_coef_dady( func, 3, i, j ); 1198 emit_mul( func, 0, 1 ); /* x * dadx */ 1199 emit_coef_a0( func, 4, i, j ); 1200 emit_mul( func, 2, 3 ); /* y * dady */ 1201 emit_add( func, 0, 4 ); /* x * dadx + a0 */ 1202 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 1203 emit_inputs( func, 0, i, j ); 1204 break; 1205 1206 case TGSI_INTERPOLATE_PERSPECTIVE: 1207 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 1208 emit_coef_dadx( func, 1, i, j ); 1209 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 1210 emit_coef_dady( func, 3, i, j ); 1211 emit_mul( func, 0, 1 ); /* x * dadx */ 1212 emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); 1213 emit_coef_a0( func, 5, i, j ); 1214 emit_rcp( func, 4, 4 ); /* 1.0 / w */ 1215 emit_mul( func, 2, 3 ); /* y * dady */ 1216 emit_add( func, 0, 5 ); /* x * dadx + a0 */ 1217 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 1218 emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ 1219 emit_inputs( func, 0, i, j ); 1220 break; 1221 1222 default: 1223 assert( 0 ); 1224 break; 1225 } 1226 } 1227 } 1228 } 1229#endif 1230 } 1231} 1232 1233 1234 1235static void 1236emit_prologue(struct ppc_function *func) 1237{ 1238 /* XXX set up stack frame */ 1239} 1240 1241 1242static void 1243emit_epilogue(struct ppc_function *func) 1244{ 1245 ppc_comment(func, -4, "Epilogue:"); 1246 ppc_return(func); 1247 /* XXX restore prev stack frame */ 1248#if 0 1249 debug_printf("PPC: Emitted %u instructions\n", func->num_inst); 1250#endif 1251} 1252 1253 1254 1255/** 1256 * Translate a TGSI vertex/fragment shader to PPC code. 1257 * 1258 * \param tokens the TGSI input shader 1259 * \param func the output PPC code/function 1260 * \param immediates buffer to place immediates, later passed to PPC func 1261 * \return TRUE for success, FALSE if translation failed 1262 */ 1263boolean 1264tgsi_emit_ppc(const struct tgsi_token *tokens, 1265 struct ppc_function *func, 1266 float (*immediates)[4], 1267 boolean do_swizzles ) 1268{ 1269 static int use_ppc_asm = -1; 1270 struct tgsi_parse_context parse; 1271 /*boolean instruction_phase = FALSE;*/ 1272 unsigned ok = 1; 1273 uint num_immediates = 0; 1274 struct gen_context gen; 1275 uint ic = 0; 1276 1277 if (use_ppc_asm < 0) { 1278 /* If GALLIUM_NOPPC is set, don't use PPC codegen */ 1279 use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE); 1280 } 1281 if (!use_ppc_asm) 1282 return FALSE; 1283 1284 if (0) { 1285 debug_printf("\n********* TGSI->PPC ********\n"); 1286 tgsi_dump(tokens, 0); 1287 } 1288 1289 util_init_math(); 1290 1291 init_gen_context(&gen, func); 1292 1293 emit_prologue(func); 1294 1295 tgsi_parse_init( &parse, tokens ); 1296 1297 while (!tgsi_parse_end_of_tokens(&parse) && ok) { 1298 tgsi_parse_token(&parse); 1299 1300 switch (parse.FullToken.Token.Type) { 1301 case TGSI_TOKEN_TYPE_DECLARATION: 1302 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { 1303 emit_declaration(func, &parse.FullToken.FullDeclaration ); 1304 } 1305 break; 1306 1307 case TGSI_TOKEN_TYPE_INSTRUCTION: 1308 if (func->print) { 1309 _debug_printf("# "); 1310 ic++; 1311 tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); 1312 } 1313 1314 ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); 1315 1316 if (!ok) { 1317 uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 1318 debug_printf("failed to translate tgsi opcode %d (%s) to PPC (%s)\n", 1319 opcode, 1320 tgsi_get_opcode_name(opcode), 1321 parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? 1322 "vertex shader" : "fragment shader"); 1323 } 1324 break; 1325 1326 case TGSI_TOKEN_TYPE_IMMEDIATE: 1327 /* splat each immediate component into a float[4] vector for SoA */ 1328 { 1329 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1330 uint i; 1331 assert(size <= 4); 1332 assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); 1333 for (i = 0; i < size; i++) { 1334 immediates[num_immediates][i] = 1335 parse.FullToken.FullImmediate.u[i].Float; 1336 } 1337 num_immediates++; 1338 } 1339 break; 1340 1341 case TGSI_TOKEN_TYPE_PROPERTY: 1342 break; 1343 1344 default: 1345 ok = 0; 1346 assert( 0 ); 1347 } 1348 } 1349 1350 emit_epilogue(func); 1351 1352 tgsi_parse_free( &parse ); 1353 1354 if (ppc_num_instructions(func) == 0) { 1355 /* ran out of memory for instructions */ 1356 ok = FALSE; 1357 } 1358 1359 if (!ok) 1360 debug_printf("TGSI->PPC translation failed\n"); 1361 1362 return ok; 1363} 1364 1365#else 1366 1367void ppc_dummy_func(void); 1368 1369void ppc_dummy_func(void) 1370{ 1371} 1372 1373#endif /* PIPE_ARCH_PPC */ 1374