tgsi_ppc.c revision d1f76b1d852863b6802cb1c2676c698881a6246a
1/************************************************************************** 2 * 3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI to PowerPC code generation. 30 */ 31 32#include "pipe/p_config.h" 33 34#if defined(PIPE_ARCH_PPC) 35 36#include "util/u_debug.h" 37#include "pipe/p_shader_tokens.h" 38#include "util/u_math.h" 39#include "util/u_memory.h" 40#include "util/u_sse.h" 41#include "tgsi/tgsi_info.h" 42#include "tgsi/tgsi_parse.h" 43#include "tgsi/tgsi_util.h" 44#include "tgsi_dump.h" 45#include "tgsi_exec.h" 46#include "tgsi_ppc.h" 47#include "rtasm/rtasm_ppc.h" 48 49 50/** 51 * Since it's pretty much impossible to form PPC vector immediates, load 52 * them from memory here: 53 */ 54PIPE_ALIGN_VAR(16) const float 55ppc_builtin_constants[] = { 56 1.0f, -128.0f, 128.0, 0.0 57}; 58 59 60#define FOR_EACH_CHANNEL( CHAN )\ 61 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 62 63#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 64 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) 65 66#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 67 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 68 69#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 70 FOR_EACH_CHANNEL( CHAN )\ 71 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 72 73#define CHAN_X 0 74#define CHAN_Y 1 75#define CHAN_Z 2 76#define CHAN_W 3 77 78 79/** 80 * How many TGSI temps should be implemented with real PPC vector registers 81 * rather than memory. 82 */ 83#define MAX_PPC_TEMPS 3 84 85 86/** 87 * Context/state used during code gen. 88 */ 89struct gen_context 90{ 91 struct ppc_function *f; 92 int inputs_reg; /**< GP register pointing to input params */ 93 int outputs_reg; /**< GP register pointing to output params */ 94 int temps_reg; /**< GP register pointing to temporary "registers" */ 95 int immed_reg; /**< GP register pointing to immediates buffer */ 96 int const_reg; /**< GP register pointing to constants buffer */ 97 int builtins_reg; /**< GP register pointint to built-in constants */ 98 99 int offset_reg; /**< used to reduce redundant li instructions */ 100 int offset_value; 101 102 int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ 103 int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ 104 105 /** 106 * Map TGSI temps to PPC vector temps. 107 * We have 32 PPC vector regs. Use 16 of them for storing 4 TGSI temps. 108 * XXX currently only do this for TGSI temps [0..MAX_PPC_TEMPS-1]. 109 */ 110 int temps_map[MAX_PPC_TEMPS][4]; 111 112 /** 113 * Cache of src registers. 114 * This is used to avoid redundant load instructions. 115 */ 116 struct { 117 struct tgsi_full_src_register src; 118 uint chan; 119 uint vec; 120 } regs[12]; /* 3 src regs, 4 channels */ 121 uint num_regs; 122}; 123 124 125/** 126 * Initialize code generation context. 127 */ 128static void 129init_gen_context(struct gen_context *gen, struct ppc_function *func) 130{ 131 uint i; 132 133 memset(gen, 0, sizeof(*gen)); 134 gen->f = func; 135 gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */ 136 gen->outputs_reg = ppc_reserve_register(func, 4); /* second function param */ 137 gen->temps_reg = ppc_reserve_register(func, 5); /* ... */ 138 gen->immed_reg = ppc_reserve_register(func, 6); 139 gen->const_reg = ppc_reserve_register(func, 7); 140 gen->builtins_reg = ppc_reserve_register(func, 8); 141 gen->one_vec = -1; 142 gen->bit31_vec = -1; 143 gen->offset_reg = -1; 144 gen->offset_value = -9999999; 145 for (i = 0; i < MAX_PPC_TEMPS; i++) { 146 gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f); 147 gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f); 148 gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f); 149 gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f); 150 } 151} 152 153 154/** 155 * Is the given TGSI register stored as a real PPC vector register? 156 */ 157static boolean 158is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) 159{ 160 return (reg->Register.File == TGSI_FILE_TEMPORARY && 161 reg->Register.Index < MAX_PPC_TEMPS); 162} 163 164 165/** 166 * Is the given TGSI register stored as a real PPC vector register? 167 */ 168static boolean 169is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) 170{ 171 return (reg->Register.File == TGSI_FILE_TEMPORARY && 172 reg->Register.Index < MAX_PPC_TEMPS); 173} 174 175 176 177/** 178 * All PPC vector load/store instructions form an effective address 179 * by adding the contents of two registers. For example: 180 * lvx v2,r8,r9 # v2 = memory[r8 + r9] 181 * stvx v2,r8,r9 # memory[r8 + r9] = v2; 182 * So our lvx/stvx instructions are typically preceded by an 'li' instruction 183 * to load r9 (above) with an immediate (an offset). 184 * This code emits that 'li' instruction, but only if the offset value is 185 * different than the previous 'li'. 186 * This optimization seems to save about 10% in the instruction count. 187 * Note that we need to unconditionally emit an 'li' inside basic blocks 188 * (such as inside loops). 189 */ 190static int 191emit_li_offset(struct gen_context *gen, int offset) 192{ 193 if (gen->offset_reg <= 0) { 194 /* allocate a GP register for storing load/store offset */ 195 gen->offset_reg = ppc_allocate_register(gen->f); 196 } 197 198 /* emit new 'li' if offset is changing */ 199 if (gen->offset_value < 0 || gen->offset_value != offset) { 200 gen->offset_value = offset; 201 ppc_li(gen->f, gen->offset_reg, offset); 202 } 203 204 return gen->offset_reg; 205} 206 207 208/** 209 * Forces subsequent emit_li_offset() calls to emit an 'li'. 210 * To be called at the top of basic blocks. 211 */ 212static void 213reset_li_offset(struct gen_context *gen) 214{ 215 gen->offset_value = -9999999; 216} 217 218 219 220/** 221 * Load the given vector register with {value, value, value, value}. 222 * The value must be in the ppu_builtin_constants[] array. 223 * We wouldn't need this if there was a simple way to load PPC vector 224 * registers with immediate values! 225 */ 226static void 227load_constant_vec(struct gen_context *gen, int dst_vec, float value) 228{ 229 uint pos; 230 for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { 231 if (ppc_builtin_constants[pos] == value) { 232 int offset = pos * 4; 233 int offset_reg = emit_li_offset(gen, offset); 234 235 /* Load 4-byte word into vector register. 236 * The vector slot depends on the effective address we load from. 237 * We know that our builtins start at a 16-byte boundary so we 238 * know that 'swizzle' tells us which vector slot will have the 239 * loaded word. The other vector slots will be undefined. 240 */ 241 ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); 242 /* splat word[pos % 4] across the vector reg */ 243 ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); 244 return; 245 } 246 } 247 assert(0 && "Need to add new constant to ppc_builtin_constants array"); 248} 249 250 251/** 252 * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. 253 */ 254static int 255gen_one_vec(struct gen_context *gen) 256{ 257 if (gen->one_vec < 0) { 258 gen->one_vec = ppc_allocate_vec_register(gen->f); 259 load_constant_vec(gen, gen->one_vec, 1.0f); 260 } 261 return gen->one_vec; 262} 263 264/** 265 * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. 266 */ 267static int 268gen_get_bit31_vec(struct gen_context *gen) 269{ 270 if (gen->bit31_vec < 0) { 271 gen->bit31_vec = ppc_allocate_vec_register(gen->f); 272 ppc_vspltisw(gen->f, gen->bit31_vec, -1); 273 ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); 274 } 275 return gen->bit31_vec; 276} 277 278 279/** 280 * Register fetch. Return PPC vector register with result. 281 */ 282static int 283emit_fetch(struct gen_context *gen, 284 const struct tgsi_full_src_register *reg, 285 const unsigned chan_index) 286{ 287 uint swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); 288 int dst_vec = -1; 289 290 switch (swizzle) { 291 case TGSI_SWIZZLE_X: 292 case TGSI_SWIZZLE_Y: 293 case TGSI_SWIZZLE_Z: 294 case TGSI_SWIZZLE_W: 295 switch (reg->Register.File) { 296 case TGSI_FILE_INPUT: 297 case TGSI_FILE_SYSTEM_VALUE: 298 { 299 int offset = (reg->Register.Index * 4 + swizzle) * 16; 300 int offset_reg = emit_li_offset(gen, offset); 301 dst_vec = ppc_allocate_vec_register(gen->f); 302 ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); 303 } 304 break; 305 case TGSI_FILE_TEMPORARY: 306 if (is_ppc_vec_temporary(reg)) { 307 /* use PPC vec register */ 308 dst_vec = gen->temps_map[reg->Register.Index][swizzle]; 309 } 310 else { 311 /* use memory-based temp register "file" */ 312 int offset = (reg->Register.Index * 4 + swizzle) * 16; 313 int offset_reg = emit_li_offset(gen, offset); 314 dst_vec = ppc_allocate_vec_register(gen->f); 315 ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); 316 } 317 break; 318 case TGSI_FILE_IMMEDIATE: 319 { 320 int offset = (reg->Register.Index * 4 + swizzle) * 4; 321 int offset_reg = emit_li_offset(gen, offset); 322 dst_vec = ppc_allocate_vec_register(gen->f); 323 /* Load 4-byte word into vector register. 324 * The vector slot depends on the effective address we load from. 325 * We know that our immediates start at a 16-byte boundary so we 326 * know that 'swizzle' tells us which vector slot will have the 327 * loaded word. The other vector slots will be undefined. 328 */ 329 ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg); 330 /* splat word[swizzle] across the vector reg */ 331 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 332 } 333 break; 334 case TGSI_FILE_CONSTANT: 335 { 336 int offset = (reg->Register.Index * 4 + swizzle) * 4; 337 int offset_reg = emit_li_offset(gen, offset); 338 dst_vec = ppc_allocate_vec_register(gen->f); 339 /* Load 4-byte word into vector register. 340 * The vector slot depends on the effective address we load from. 341 * We know that our constants start at a 16-byte boundary so we 342 * know that 'swizzle' tells us which vector slot will have the 343 * loaded word. The other vector slots will be undefined. 344 */ 345 ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); 346 /* splat word[swizzle] across the vector reg */ 347 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 348 } 349 break; 350 default: 351 assert( 0 ); 352 } 353 break; 354 default: 355 assert( 0 ); 356 } 357 358 assert(dst_vec >= 0); 359 360 { 361 uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); 362 if (sign_op != TGSI_UTIL_SIGN_KEEP) { 363 int bit31_vec = gen_get_bit31_vec(gen); 364 int dst_vec2; 365 366 if (is_ppc_vec_temporary(reg)) { 367 /* need to use a new temp */ 368 dst_vec2 = ppc_allocate_vec_register(gen->f); 369 } 370 else { 371 dst_vec2 = dst_vec; 372 } 373 374 switch (sign_op) { 375 case TGSI_UTIL_SIGN_CLEAR: 376 /* vec = vec & ~bit31 */ 377 ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec); 378 break; 379 case TGSI_UTIL_SIGN_SET: 380 /* vec = vec | bit31 */ 381 ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec); 382 break; 383 case TGSI_UTIL_SIGN_TOGGLE: 384 /* vec = vec ^ bit31 */ 385 ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec); 386 break; 387 default: 388 assert(0); 389 } 390 return dst_vec2; 391 } 392 } 393 394 return dst_vec; 395} 396 397 398 399/** 400 * Test if two TGSI src registers refer to the same memory location. 401 * We use this to avoid redundant register loads. 402 */ 403static boolean 404equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, 405 const struct tgsi_full_src_register *b, uint chan_b) 406{ 407 int swz_a, swz_b; 408 int sign_a, sign_b; 409 if (a->Register.File != b->Register.File) 410 return FALSE; 411 if (a->Register.Index != b->Register.Index) 412 return FALSE; 413 swz_a = tgsi_util_get_full_src_register_swizzle(a, chan_a); 414 swz_b = tgsi_util_get_full_src_register_swizzle(b, chan_b); 415 if (swz_a != swz_b) 416 return FALSE; 417 sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a); 418 sign_b = tgsi_util_get_full_src_register_sign_mode(b, chan_b); 419 if (sign_a != sign_b) 420 return FALSE; 421 return TRUE; 422} 423 424 425/** 426 * Given a TGSI src register and channel index, return the PPC vector 427 * register containing the value. We use a cache to prevent re-loading 428 * the same register multiple times. 429 * \return index of PPC vector register with the desired src operand 430 */ 431static int 432get_src_vec(struct gen_context *gen, 433 struct tgsi_full_instruction *inst, int src_reg, uint chan) 434{ 435 const const struct tgsi_full_src_register *src = 436 &inst->Src[src_reg]; 437 int vec; 438 uint i; 439 440 /* check the cache */ 441 for (i = 0; i < gen->num_regs; i++) { 442 if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) { 443 /* cache hit */ 444 assert(gen->regs[i].vec >= 0); 445 return gen->regs[i].vec; 446 } 447 } 448 449 /* cache miss: allocate new vec reg and emit fetch/load code */ 450 vec = emit_fetch(gen, src, chan); 451 gen->regs[gen->num_regs].src = *src; 452 gen->regs[gen->num_regs].chan = chan; 453 gen->regs[gen->num_regs].vec = vec; 454 gen->num_regs++; 455 456 assert(gen->num_regs <= Elements(gen->regs)); 457 458 assert(vec >= 0); 459 460 return vec; 461} 462 463 464/** 465 * Clear the src operand cache. To be called at the end of each emit function. 466 */ 467static void 468release_src_vecs(struct gen_context *gen) 469{ 470 uint i; 471 for (i = 0; i < gen->num_regs; i++) { 472 const const struct tgsi_full_src_register src = gen->regs[i].src; 473 if (!is_ppc_vec_temporary(&src)) { 474 ppc_release_vec_register(gen->f, gen->regs[i].vec); 475 } 476 } 477 gen->num_regs = 0; 478} 479 480 481 482static int 483get_dst_vec(struct gen_context *gen, 484 const struct tgsi_full_instruction *inst, 485 unsigned chan_index) 486{ 487 const struct tgsi_full_dst_register *reg = &inst->Dst[0]; 488 489 if (is_ppc_vec_temporary_dst(reg)) { 490 int vec = gen->temps_map[reg->Register.Index][chan_index]; 491 return vec; 492 } 493 else { 494 return ppc_allocate_vec_register(gen->f); 495 } 496} 497 498 499/** 500 * Register store. Store 'src_vec' at location indicated by 'reg'. 501 * \param free_vec Should the src_vec be released when done? 502 */ 503static void 504emit_store(struct gen_context *gen, 505 int src_vec, 506 const struct tgsi_full_instruction *inst, 507 unsigned chan_index, 508 boolean free_vec) 509{ 510 const struct tgsi_full_dst_register *reg = &inst->Dst[0]; 511 512 switch (reg->Register.File) { 513 case TGSI_FILE_OUTPUT: 514 { 515 int offset = (reg->Register.Index * 4 + chan_index) * 16; 516 int offset_reg = emit_li_offset(gen, offset); 517 ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); 518 } 519 break; 520 case TGSI_FILE_TEMPORARY: 521 if (is_ppc_vec_temporary_dst(reg)) { 522 if (!free_vec) { 523 int dst_vec = gen->temps_map[reg->Register.Index][chan_index]; 524 if (dst_vec != src_vec) 525 ppc_vmove(gen->f, dst_vec, src_vec); 526 } 527 free_vec = FALSE; 528 } 529 else { 530 int offset = (reg->Register.Index * 4 + chan_index) * 16; 531 int offset_reg = emit_li_offset(gen, offset); 532 ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); 533 } 534 break; 535#if 0 536 case TGSI_FILE_ADDRESS: 537 emit_addrs( 538 func, 539 xmm, 540 reg->Register.Index, 541 chan_index ); 542 break; 543#endif 544 default: 545 assert( 0 ); 546 } 547 548#if 0 549 switch( inst->Instruction.Saturate ) { 550 case TGSI_SAT_NONE: 551 break; 552 553 case TGSI_SAT_ZERO_ONE: 554 /* assert( 0 ); */ 555 break; 556 557 case TGSI_SAT_MINUS_PLUS_ONE: 558 assert( 0 ); 559 break; 560 } 561#endif 562 563 if (free_vec) 564 ppc_release_vec_register(gen->f, src_vec); 565} 566 567 568static void 569emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 570{ 571 int v0, v1; 572 uint chan_index; 573 574 v0 = get_src_vec(gen, inst, 0, CHAN_X); 575 v1 = ppc_allocate_vec_register(gen->f); 576 577 switch (inst->Instruction.Opcode) { 578 case TGSI_OPCODE_RSQ: 579 /* v1 = 1.0 / sqrt(v0) */ 580 ppc_vrsqrtefp(gen->f, v1, v0); 581 break; 582 case TGSI_OPCODE_RCP: 583 /* v1 = 1.0 / v0 */ 584 ppc_vrefp(gen->f, v1, v0); 585 break; 586 default: 587 assert(0); 588 } 589 590 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { 591 emit_store(gen, v1, inst, chan_index, FALSE); 592 } 593 594 release_src_vecs(gen); 595 ppc_release_vec_register(gen->f, v1); 596} 597 598 599static void 600emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 601{ 602 uint chan_index; 603 604 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 605 int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ 606 int v1 = get_dst_vec(gen, inst, chan_index); 607 switch (inst->Instruction.Opcode) { 608 case TGSI_OPCODE_ABS: 609 /* turn off the most significant bit of each vector float word */ 610 { 611 int bit31_vec = gen_get_bit31_vec(gen); 612 ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */ 613 } 614 break; 615 case TGSI_OPCODE_FLR: 616 ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ 617 break; 618 case TGSI_OPCODE_FRC: 619 ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */ 620 ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */ 621 break; 622 case TGSI_OPCODE_EX2: 623 ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */ 624 break; 625 case TGSI_OPCODE_LG2: 626 /* XXX this may be broken! */ 627 ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ 628 break; 629 case TGSI_OPCODE_MOV: 630 if (v0 != v1) 631 ppc_vmove(gen->f, v1, v0); 632 break; 633 default: 634 assert(0); 635 } 636 emit_store(gen, v1, inst, chan_index, TRUE); /* store v0 */ 637 } 638 639 release_src_vecs(gen); 640} 641 642 643static void 644emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) 645{ 646 int zero_vec = -1; 647 uint chan; 648 649 if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { 650 zero_vec = ppc_allocate_vec_register(gen->f); 651 ppc_vzero(gen->f, zero_vec); 652 } 653 654 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 655 /* fetch src operands */ 656 int v0 = get_src_vec(gen, inst, 0, chan); 657 int v1 = get_src_vec(gen, inst, 1, chan); 658 int v2 = get_dst_vec(gen, inst, chan); 659 660 /* emit binop */ 661 switch (inst->Instruction.Opcode) { 662 case TGSI_OPCODE_ADD: 663 ppc_vaddfp(gen->f, v2, v0, v1); 664 break; 665 case TGSI_OPCODE_SUB: 666 ppc_vsubfp(gen->f, v2, v0, v1); 667 break; 668 case TGSI_OPCODE_MUL: 669 ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec); 670 break; 671 case TGSI_OPCODE_MIN: 672 ppc_vminfp(gen->f, v2, v0, v1); 673 break; 674 case TGSI_OPCODE_MAX: 675 ppc_vmaxfp(gen->f, v2, v0, v1); 676 break; 677 default: 678 assert(0); 679 } 680 681 /* store v2 */ 682 emit_store(gen, v2, inst, chan, TRUE); 683 } 684 685 if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) 686 ppc_release_vec_register(gen->f, zero_vec); 687 688 release_src_vecs(gen); 689} 690 691 692static void 693emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) 694{ 695 uint chan; 696 697 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 698 /* fetch src operands */ 699 int v0 = get_src_vec(gen, inst, 0, chan); 700 int v1 = get_src_vec(gen, inst, 1, chan); 701 int v2 = get_src_vec(gen, inst, 2, chan); 702 int v3 = get_dst_vec(gen, inst, chan); 703 704 /* emit ALU */ 705 switch (inst->Instruction.Opcode) { 706 case TGSI_OPCODE_MAD: 707 ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ 708 break; 709 case TGSI_OPCODE_LRP: 710 ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ 711 ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ 712 break; 713 default: 714 assert(0); 715 } 716 717 /* store v3 */ 718 emit_store(gen, v3, inst, chan, TRUE); 719 } 720 721 release_src_vecs(gen); 722} 723 724 725/** 726 * Vector comparisons, resulting in 1.0 or 0.0 values. 727 */ 728static void 729emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) 730{ 731 uint chan; 732 int one_vec = gen_one_vec(gen); 733 734 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 735 /* fetch src operands */ 736 int v0 = get_src_vec(gen, inst, 0, chan); 737 int v1 = get_src_vec(gen, inst, 1, chan); 738 int v2 = get_dst_vec(gen, inst, chan); 739 boolean complement = FALSE; 740 741 switch (inst->Instruction.Opcode) { 742 case TGSI_OPCODE_SNE: 743 complement = TRUE; 744 /* fall-through */ 745 case TGSI_OPCODE_SEQ: 746 ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ 747 break; 748 749 case TGSI_OPCODE_SGE: 750 complement = TRUE; 751 /* fall-through */ 752 case TGSI_OPCODE_SLT: 753 ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ 754 break; 755 756 case TGSI_OPCODE_SLE: 757 complement = TRUE; 758 /* fall-through */ 759 case TGSI_OPCODE_SGT: 760 ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ 761 break; 762 default: 763 assert(0); 764 } 765 766 /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ 767 768 if (complement) 769 ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ 770 else 771 ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ 772 773 /* store v2 */ 774 emit_store(gen, v2, inst, chan, TRUE); 775 } 776 777 release_src_vecs(gen); 778} 779 780 781static void 782emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) 783{ 784 int v0, v1, v2; 785 uint chan_index; 786 787 v2 = ppc_allocate_vec_register(gen->f); 788 789 ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */ 790 791 v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */ 792 v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */ 793 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 794 795 v0 = get_src_vec(gen, inst, 0, CHAN_Y); /* v0 = src0.YYYY */ 796 v1 = get_src_vec(gen, inst, 1, CHAN_Y); /* v1 = src1.YYYY */ 797 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 798 799 v0 = get_src_vec(gen, inst, 0, CHAN_Z); /* v0 = src0.ZZZZ */ 800 v1 = get_src_vec(gen, inst, 1, CHAN_Z); /* v1 = src1.ZZZZ */ 801 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 802 803 if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { 804 v0 = get_src_vec(gen, inst, 0, CHAN_W); /* v0 = src0.WWWW */ 805 v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ 806 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 807 } 808 else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { 809 v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ 810 ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ 811 } 812 813 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 814 emit_store(gen, v2, inst, chan_index, FALSE); /* store v2, free v2 later */ 815 } 816 817 release_src_vecs(gen); 818 819 ppc_release_vec_register(gen->f, v2); 820} 821 822 823/** Approximation for vr = pow(va, vb) */ 824static void 825ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) 826{ 827 /* pow(a,b) ~= exp2(log2(a) * b) */ 828 int t_vec = ppc_allocate_vec_register(f); 829 int zero_vec = ppc_allocate_vec_register(f); 830 831 ppc_vzero(f, zero_vec); 832 833 ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ 834 ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */ 835 ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ 836 837 ppc_release_vec_register(f, t_vec); 838 ppc_release_vec_register(f, zero_vec); 839} 840 841 842static void 843emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) 844{ 845 int one_vec = gen_one_vec(gen); 846 847 /* Compute X */ 848 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 849 emit_store(gen, one_vec, inst, CHAN_X, FALSE); 850 } 851 852 /* Compute Y, Z */ 853 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || 854 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 855 int x_vec; 856 int zero_vec = ppc_allocate_vec_register(gen->f); 857 858 x_vec = get_src_vec(gen, inst, 0, CHAN_X); /* x_vec = src[0].x */ 859 860 ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ 861 ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ 862 863 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 864 emit_store(gen, x_vec, inst, CHAN_Y, FALSE); 865 } 866 867 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 868 int y_vec, w_vec; 869 int z_vec = ppc_allocate_vec_register(gen->f); 870 int pow_vec = ppc_allocate_vec_register(gen->f); 871 int pos_vec = ppc_allocate_vec_register(gen->f); 872 int p128_vec = ppc_allocate_vec_register(gen->f); 873 int n128_vec = ppc_allocate_vec_register(gen->f); 874 875 y_vec = get_src_vec(gen, inst, 0, CHAN_Y); /* y_vec = src[0].y */ 876 ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ 877 878 w_vec = get_src_vec(gen, inst, 0, CHAN_W); /* w_vec = src[0].w */ 879 880 /* clamp W to [-128, 128] */ 881 load_constant_vec(gen, p128_vec, 128.0f); 882 load_constant_vec(gen, n128_vec, -128.0f); 883 ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */ 884 ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */ 885 886 /* if temp.x > 0 887 * z = pow(tmp.y, tmp.w) 888 * else 889 * z = 0.0 890 */ 891 ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ 892 ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ 893 ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ 894 895 emit_store(gen, z_vec, inst, CHAN_Z, FALSE); 896 897 ppc_release_vec_register(gen->f, z_vec); 898 ppc_release_vec_register(gen->f, pow_vec); 899 ppc_release_vec_register(gen->f, pos_vec); 900 ppc_release_vec_register(gen->f, p128_vec); 901 ppc_release_vec_register(gen->f, n128_vec); 902 } 903 904 ppc_release_vec_register(gen->f, zero_vec); 905 } 906 907 /* Compute W */ 908 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 909 emit_store(gen, one_vec, inst, CHAN_W, FALSE); 910 } 911 912 release_src_vecs(gen); 913} 914 915 916static void 917emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst) 918{ 919 const int one_vec = gen_one_vec(gen); 920 int src_vec; 921 922 /* get src arg */ 923 src_vec = get_src_vec(gen, inst, 0, CHAN_X); 924 925 /* Compute X = 2^floor(src) */ 926 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 927 int dst_vec = get_dst_vec(gen, inst, CHAN_X); 928 int tmp_vec = ppc_allocate_vec_register(gen->f); 929 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ 930 ppc_vexptefp(gen->f, dst_vec, tmp_vec); /* dst = 2 ^ tmp */ 931 emit_store(gen, dst_vec, inst, CHAN_X, TRUE); 932 ppc_release_vec_register(gen->f, tmp_vec); 933 } 934 935 /* Compute Y = src - floor(src) */ 936 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 937 int dst_vec = get_dst_vec(gen, inst, CHAN_Y); 938 int tmp_vec = ppc_allocate_vec_register(gen->f); 939 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ 940 ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec); /* dst = src - tmp */ 941 emit_store(gen, dst_vec, inst, CHAN_Y, TRUE); 942 ppc_release_vec_register(gen->f, tmp_vec); 943 } 944 945 /* Compute Z = RoughApprox2ToX(src) */ 946 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 947 int dst_vec = get_dst_vec(gen, inst, CHAN_Z); 948 ppc_vexptefp(gen->f, dst_vec, src_vec); /* dst = 2 ^ src */ 949 emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); 950 } 951 952 /* Compute W = 1.0 */ 953 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 954 emit_store(gen, one_vec, inst, CHAN_W, FALSE); 955 } 956 957 release_src_vecs(gen); 958} 959 960 961static void 962emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst) 963{ 964 const int bit31_vec = gen_get_bit31_vec(gen); 965 const int one_vec = gen_one_vec(gen); 966 int src_vec, abs_vec; 967 968 /* get src arg */ 969 src_vec = get_src_vec(gen, inst, 0, CHAN_X); 970 971 /* compute abs(src) */ 972 abs_vec = ppc_allocate_vec_register(gen->f); 973 ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec); /* abs = src & ~bit31 */ 974 975 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && 976 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 977 978 /* compute tmp = floor(log2(abs)) */ 979 int tmp_vec = ppc_allocate_vec_register(gen->f); 980 ppc_vlogefp(gen->f, tmp_vec, abs_vec); /* tmp = log2(abs) */ 981 ppc_vrfim(gen->f, tmp_vec, tmp_vec); /* tmp = floor(tmp); */ 982 983 /* Compute X = tmp */ 984 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 985 emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); 986 } 987 988 /* Compute Y = abs / 2^tmp */ 989 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 990 const int zero_vec = ppc_allocate_vec_register(gen->f); 991 ppc_vzero(gen->f, zero_vec); 992 ppc_vexptefp(gen->f, tmp_vec, tmp_vec); /* tmp = 2 ^ tmp */ 993 ppc_vrefp(gen->f, tmp_vec, tmp_vec); /* tmp = 1 / tmp */ 994 /* tmp = abs * tmp + zero */ 995 ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec); 996 emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); 997 ppc_release_vec_register(gen->f, zero_vec); 998 } 999 1000 ppc_release_vec_register(gen->f, tmp_vec); 1001 } 1002 1003 /* Compute Z = RoughApproxLog2(abs) */ 1004 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1005 int dst_vec = get_dst_vec(gen, inst, CHAN_Z); 1006 ppc_vlogefp(gen->f, dst_vec, abs_vec); /* dst = log2(abs) */ 1007 emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); 1008 } 1009 1010 /* Compute W = 1.0 */ 1011 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 1012 emit_store(gen, one_vec, inst, CHAN_W, FALSE); 1013 } 1014 1015 ppc_release_vec_register(gen->f, abs_vec); 1016 release_src_vecs(gen); 1017} 1018 1019 1020static void 1021emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst) 1022{ 1023 int s0_vec = get_src_vec(gen, inst, 0, CHAN_X); 1024 int s1_vec = get_src_vec(gen, inst, 1, CHAN_X); 1025 int pow_vec = ppc_allocate_vec_register(gen->f); 1026 int chan; 1027 1028 ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec); 1029 1030 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 1031 emit_store(gen, pow_vec, inst, chan, FALSE); 1032 } 1033 1034 ppc_release_vec_register(gen->f, pow_vec); 1035 1036 release_src_vecs(gen); 1037} 1038 1039 1040static void 1041emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst) 1042{ 1043 int x0_vec, y0_vec, z0_vec; 1044 int x1_vec, y1_vec, z1_vec; 1045 int zero_vec, tmp_vec; 1046 int tmp2_vec; 1047 1048 zero_vec = ppc_allocate_vec_register(gen->f); 1049 ppc_vzero(gen->f, zero_vec); 1050 1051 tmp_vec = ppc_allocate_vec_register(gen->f); 1052 tmp2_vec = ppc_allocate_vec_register(gen->f); 1053 1054 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || 1055 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1056 x0_vec = get_src_vec(gen, inst, 0, CHAN_X); 1057 x1_vec = get_src_vec(gen, inst, 1, CHAN_X); 1058 } 1059 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || 1060 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1061 y0_vec = get_src_vec(gen, inst, 0, CHAN_Y); 1062 y1_vec = get_src_vec(gen, inst, 1, CHAN_Y); 1063 } 1064 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || 1065 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 1066 z0_vec = get_src_vec(gen, inst, 0, CHAN_Z); 1067 z1_vec = get_src_vec(gen, inst, 1, CHAN_Z); 1068 } 1069 1070 IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) { 1071 /* tmp = y0 * z1 */ 1072 ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec); 1073 /* tmp = tmp - z0 * y1*/ 1074 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec); 1075 emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); 1076 } 1077 IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) { 1078 /* tmp = z0 * x1 */ 1079 ppc_vmaddfp(gen->f, tmp_vec, z0_vec, x1_vec, zero_vec); 1080 /* tmp = tmp - x0 * z1 */ 1081 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec); 1082 emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); 1083 } 1084 IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) { 1085 /* tmp = x0 * y1 */ 1086 ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec); 1087 /* tmp = tmp - y0 * x1 */ 1088 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec); 1089 emit_store(gen, tmp_vec, inst, CHAN_Z, FALSE); 1090 } 1091 /* W is undefined */ 1092 1093 ppc_release_vec_register(gen->f, tmp_vec); 1094 ppc_release_vec_register(gen->f, zero_vec); 1095 release_src_vecs(gen); 1096} 1097 1098static int 1099emit_instruction(struct gen_context *gen, 1100 struct tgsi_full_instruction *inst) 1101{ 1102 1103 /* we don't handle saturation/clamping yet */ 1104 if (inst->Instruction.Saturate != TGSI_SAT_NONE) 1105 return 0; 1106 1107 /* need to use extra temps to fix SOA dependencies : */ 1108 if (tgsi_check_soa_dependencies(inst)) 1109 return FALSE; 1110 1111 switch (inst->Instruction.Opcode) { 1112 case TGSI_OPCODE_MOV: 1113 case TGSI_OPCODE_ABS: 1114 case TGSI_OPCODE_FLR: 1115 case TGSI_OPCODE_FRC: 1116 case TGSI_OPCODE_EX2: 1117 case TGSI_OPCODE_LG2: 1118 emit_unaryop(gen, inst); 1119 break; 1120 case TGSI_OPCODE_RSQ: 1121 case TGSI_OPCODE_RCP: 1122 emit_scalar_unaryop(gen, inst); 1123 break; 1124 case TGSI_OPCODE_ADD: 1125 case TGSI_OPCODE_SUB: 1126 case TGSI_OPCODE_MUL: 1127 case TGSI_OPCODE_MIN: 1128 case TGSI_OPCODE_MAX: 1129 emit_binop(gen, inst); 1130 break; 1131 case TGSI_OPCODE_SEQ: 1132 case TGSI_OPCODE_SNE: 1133 case TGSI_OPCODE_SLT: 1134 case TGSI_OPCODE_SGT: 1135 case TGSI_OPCODE_SLE: 1136 case TGSI_OPCODE_SGE: 1137 emit_inequality(gen, inst); 1138 break; 1139 case TGSI_OPCODE_MAD: 1140 case TGSI_OPCODE_LRP: 1141 emit_triop(gen, inst); 1142 break; 1143 case TGSI_OPCODE_DP3: 1144 case TGSI_OPCODE_DP4: 1145 case TGSI_OPCODE_DPH: 1146 emit_dotprod(gen, inst); 1147 break; 1148 case TGSI_OPCODE_LIT: 1149 emit_lit(gen, inst); 1150 break; 1151 case TGSI_OPCODE_LOG: 1152 emit_log(gen, inst); 1153 break; 1154 case TGSI_OPCODE_EXP: 1155 emit_exp(gen, inst); 1156 break; 1157 case TGSI_OPCODE_POW: 1158 emit_pow(gen, inst); 1159 break; 1160 case TGSI_OPCODE_XPD: 1161 emit_xpd(gen, inst); 1162 break; 1163 case TGSI_OPCODE_END: 1164 /* normal end */ 1165 return 1; 1166 default: 1167 return 0; 1168 } 1169 return 1; 1170} 1171 1172 1173static void 1174emit_declaration( 1175 struct ppc_function *func, 1176 struct tgsi_full_declaration *decl ) 1177{ 1178 if( decl->Declaration.File == TGSI_FILE_INPUT || 1179 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) { 1180#if 0 1181 unsigned first, last, mask; 1182 unsigned i, j; 1183 1184 first = decl->Range.First; 1185 last = decl->Range.Last; 1186 mask = decl->Declaration.UsageMask; 1187 1188 for( i = first; i <= last; i++ ) { 1189 for( j = 0; j < NUM_CHANNELS; j++ ) { 1190 if( mask & (1 << j) ) { 1191 switch( decl->Declaration.Interpolate ) { 1192 case TGSI_INTERPOLATE_CONSTANT: 1193 emit_coef_a0( func, 0, i, j ); 1194 emit_inputs( func, 0, i, j ); 1195 break; 1196 1197 case TGSI_INTERPOLATE_LINEAR: 1198 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 1199 emit_coef_dadx( func, 1, i, j ); 1200 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 1201 emit_coef_dady( func, 3, i, j ); 1202 emit_mul( func, 0, 1 ); /* x * dadx */ 1203 emit_coef_a0( func, 4, i, j ); 1204 emit_mul( func, 2, 3 ); /* y * dady */ 1205 emit_add( func, 0, 4 ); /* x * dadx + a0 */ 1206 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 1207 emit_inputs( func, 0, i, j ); 1208 break; 1209 1210 case TGSI_INTERPOLATE_PERSPECTIVE: 1211 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 1212 emit_coef_dadx( func, 1, i, j ); 1213 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 1214 emit_coef_dady( func, 3, i, j ); 1215 emit_mul( func, 0, 1 ); /* x * dadx */ 1216 emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); 1217 emit_coef_a0( func, 5, i, j ); 1218 emit_rcp( func, 4, 4 ); /* 1.0 / w */ 1219 emit_mul( func, 2, 3 ); /* y * dady */ 1220 emit_add( func, 0, 5 ); /* x * dadx + a0 */ 1221 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 1222 emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ 1223 emit_inputs( func, 0, i, j ); 1224 break; 1225 1226 default: 1227 assert( 0 ); 1228 break; 1229 } 1230 } 1231 } 1232 } 1233#endif 1234 } 1235} 1236 1237 1238 1239static void 1240emit_prologue(struct ppc_function *func) 1241{ 1242 /* XXX set up stack frame */ 1243} 1244 1245 1246static void 1247emit_epilogue(struct ppc_function *func) 1248{ 1249 ppc_comment(func, -4, "Epilogue:"); 1250 ppc_return(func); 1251 /* XXX restore prev stack frame */ 1252#if 0 1253 debug_printf("PPC: Emitted %u instructions\n", func->num_inst); 1254#endif 1255} 1256 1257 1258 1259/** 1260 * Translate a TGSI vertex/fragment shader to PPC code. 1261 * 1262 * \param tokens the TGSI input shader 1263 * \param func the output PPC code/function 1264 * \param immediates buffer to place immediates, later passed to PPC func 1265 * \return TRUE for success, FALSE if translation failed 1266 */ 1267boolean 1268tgsi_emit_ppc(const struct tgsi_token *tokens, 1269 struct ppc_function *func, 1270 float (*immediates)[4], 1271 boolean do_swizzles ) 1272{ 1273 static int use_ppc_asm = -1; 1274 struct tgsi_parse_context parse; 1275 /*boolean instruction_phase = FALSE;*/ 1276 unsigned ok = 1; 1277 uint num_immediates = 0; 1278 struct gen_context gen; 1279 uint ic = 0; 1280 1281 if (use_ppc_asm < 0) { 1282 /* If GALLIUM_NOPPC is set, don't use PPC codegen */ 1283 use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE); 1284 } 1285 if (!use_ppc_asm) 1286 return FALSE; 1287 1288 if (0) { 1289 debug_printf("\n********* TGSI->PPC ********\n"); 1290 tgsi_dump(tokens, 0); 1291 } 1292 1293 util_init_math(); 1294 1295 init_gen_context(&gen, func); 1296 1297 emit_prologue(func); 1298 1299 tgsi_parse_init( &parse, tokens ); 1300 1301 while (!tgsi_parse_end_of_tokens(&parse) && ok) { 1302 tgsi_parse_token(&parse); 1303 1304 switch (parse.FullToken.Token.Type) { 1305 case TGSI_TOKEN_TYPE_DECLARATION: 1306 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { 1307 emit_declaration(func, &parse.FullToken.FullDeclaration ); 1308 } 1309 break; 1310 1311 case TGSI_TOKEN_TYPE_INSTRUCTION: 1312 if (func->print) { 1313 _debug_printf("# "); 1314 ic++; 1315 tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); 1316 } 1317 1318 ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); 1319 1320 if (!ok) { 1321 uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 1322 debug_printf("failed to translate tgsi opcode %d (%s) to PPC (%s)\n", 1323 opcode, 1324 tgsi_get_opcode_name(opcode), 1325 parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? 1326 "vertex shader" : "fragment shader"); 1327 } 1328 break; 1329 1330 case TGSI_TOKEN_TYPE_IMMEDIATE: 1331 /* splat each immediate component into a float[4] vector for SoA */ 1332 { 1333 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1334 uint i; 1335 assert(size <= 4); 1336 assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); 1337 for (i = 0; i < size; i++) { 1338 immediates[num_immediates][i] = 1339 parse.FullToken.FullImmediate.u[i].Float; 1340 } 1341 num_immediates++; 1342 } 1343 break; 1344 1345 case TGSI_TOKEN_TYPE_PROPERTY: 1346 break; 1347 1348 default: 1349 ok = 0; 1350 assert( 0 ); 1351 } 1352 } 1353 1354 emit_epilogue(func); 1355 1356 tgsi_parse_free( &parse ); 1357 1358 if (ppc_num_instructions(func) == 0) { 1359 /* ran out of memory for instructions */ 1360 ok = FALSE; 1361 } 1362 1363 if (!ok) 1364 debug_printf("TGSI->PPC translation failed\n"); 1365 1366 return ok; 1367} 1368 1369#else 1370 1371void ppc_dummy_func(void); 1372 1373void ppc_dummy_func(void) 1374{ 1375} 1376 1377#endif /* PIPE_ARCH_PPC */ 1378