tgsi_ppc.c revision 5b0824dfe5eaf59fa87134e7482b3d147b262901
1/************************************************************************** 2 * 3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI to PowerPC code generation. 30 */ 31 32#include "pipe/p_config.h" 33 34#if defined(PIPE_ARCH_PPC) 35 36#include "util/u_debug.h" 37#include "pipe/p_shader_tokens.h" 38#include "util/u_math.h" 39#include "util/u_memory.h" 40#include "util/u_sse.h" 41#include "tgsi/tgsi_info.h" 42#include "tgsi/tgsi_parse.h" 43#include "tgsi/tgsi_util.h" 44#include "tgsi_dump.h" 45#include "tgsi_exec.h" 46#include "tgsi_ppc.h" 47#include "rtasm/rtasm_ppc.h" 48 49 50/** 51 * Since it's pretty much impossible to form PPC vector immediates, load 52 * them from memory here: 53 */ 54const float ppc_builtin_constants[] ALIGN16_ATTRIB = { 55 1.0f, -128.0f, 128.0, 0.0 56}; 57 58 59#define FOR_EACH_CHANNEL( CHAN )\ 60 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 61 62#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 63 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) 64 65#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 66 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 67 68#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 69 FOR_EACH_CHANNEL( CHAN )\ 70 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 71 72#define CHAN_X 0 73#define CHAN_Y 1 74#define CHAN_Z 2 75#define CHAN_W 3 76 77 78/** 79 * How many TGSI temps should be implemented with real PPC vector registers 80 * rather than memory. 81 */ 82#define MAX_PPC_TEMPS 3 83 84 85/** 86 * Context/state used during code gen. 87 */ 88struct gen_context 89{ 90 struct ppc_function *f; 91 int inputs_reg; /**< GP register pointing to input params */ 92 int outputs_reg; /**< GP register pointing to output params */ 93 int temps_reg; /**< GP register pointing to temporary "registers" */ 94 int immed_reg; /**< GP register pointing to immediates buffer */ 95 int const_reg; /**< GP register pointing to constants buffer */ 96 int builtins_reg; /**< GP register pointint to built-in constants */ 97 98 int offset_reg; /**< used to reduce redundant li instructions */ 99 int offset_value; 100 101 int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ 102 int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ 103 104 /** 105 * Map TGSI temps to PPC vector temps. 106 * We have 32 PPC vector regs. Use 16 of them for storing 4 TGSI temps. 107 * XXX currently only do this for TGSI temps [0..MAX_PPC_TEMPS-1]. 108 */ 109 int temps_map[MAX_PPC_TEMPS][4]; 110 111 /** 112 * Cache of src registers. 113 * This is used to avoid redundant load instructions. 114 */ 115 struct { 116 struct tgsi_full_src_register src; 117 uint chan; 118 uint vec; 119 } regs[12]; /* 3 src regs, 4 channels */ 120 uint num_regs; 121}; 122 123 124/** 125 * Initialize code generation context. 126 */ 127static void 128init_gen_context(struct gen_context *gen, struct ppc_function *func) 129{ 130 uint i; 131 132 memset(gen, 0, sizeof(*gen)); 133 gen->f = func; 134 gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */ 135 gen->outputs_reg = ppc_reserve_register(func, 4); /* second function param */ 136 gen->temps_reg = ppc_reserve_register(func, 5); /* ... */ 137 gen->immed_reg = ppc_reserve_register(func, 6); 138 gen->const_reg = ppc_reserve_register(func, 7); 139 gen->builtins_reg = ppc_reserve_register(func, 8); 140 gen->one_vec = -1; 141 gen->bit31_vec = -1; 142 gen->offset_reg = -1; 143 gen->offset_value = -9999999; 144 for (i = 0; i < MAX_PPC_TEMPS; i++) { 145 gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f); 146 gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f); 147 gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f); 148 gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f); 149 } 150} 151 152 153/** 154 * Is the given TGSI register stored as a real PPC vector register? 155 */ 156static boolean 157is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) 158{ 159 return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && 160 reg->SrcRegister.Index < MAX_PPC_TEMPS); 161} 162 163 164/** 165 * Is the given TGSI register stored as a real PPC vector register? 166 */ 167static boolean 168is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) 169{ 170 return (reg->Register.File == TGSI_FILE_TEMPORARY && 171 reg->Register.Index < MAX_PPC_TEMPS); 172} 173 174 175 176/** 177 * All PPC vector load/store instructions form an effective address 178 * by adding the contents of two registers. For example: 179 * lvx v2,r8,r9 # v2 = memory[r8 + r9] 180 * stvx v2,r8,r9 # memory[r8 + r9] = v2; 181 * So our lvx/stvx instructions are typically preceded by an 'li' instruction 182 * to load r9 (above) with an immediate (an offset). 183 * This code emits that 'li' instruction, but only if the offset value is 184 * different than the previous 'li'. 185 * This optimization seems to save about 10% in the instruction count. 186 * Note that we need to unconditionally emit an 'li' inside basic blocks 187 * (such as inside loops). 188 */ 189static int 190emit_li_offset(struct gen_context *gen, int offset) 191{ 192 if (gen->offset_reg <= 0) { 193 /* allocate a GP register for storing load/store offset */ 194 gen->offset_reg = ppc_allocate_register(gen->f); 195 } 196 197 /* emit new 'li' if offset is changing */ 198 if (gen->offset_value < 0 || gen->offset_value != offset) { 199 gen->offset_value = offset; 200 ppc_li(gen->f, gen->offset_reg, offset); 201 } 202 203 return gen->offset_reg; 204} 205 206 207/** 208 * Forces subsequent emit_li_offset() calls to emit an 'li'. 209 * To be called at the top of basic blocks. 210 */ 211static void 212reset_li_offset(struct gen_context *gen) 213{ 214 gen->offset_value = -9999999; 215} 216 217 218 219/** 220 * Load the given vector register with {value, value, value, value}. 221 * The value must be in the ppu_builtin_constants[] array. 222 * We wouldn't need this if there was a simple way to load PPC vector 223 * registers with immediate values! 224 */ 225static void 226load_constant_vec(struct gen_context *gen, int dst_vec, float value) 227{ 228 uint pos; 229 for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { 230 if (ppc_builtin_constants[pos] == value) { 231 int offset = pos * 4; 232 int offset_reg = emit_li_offset(gen, offset); 233 234 /* Load 4-byte word into vector register. 235 * The vector slot depends on the effective address we load from. 236 * We know that our builtins start at a 16-byte boundary so we 237 * know that 'swizzle' tells us which vector slot will have the 238 * loaded word. The other vector slots will be undefined. 239 */ 240 ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); 241 /* splat word[pos % 4] across the vector reg */ 242 ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); 243 return; 244 } 245 } 246 assert(0 && "Need to add new constant to ppc_builtin_constants array"); 247} 248 249 250/** 251 * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. 252 */ 253static int 254gen_one_vec(struct gen_context *gen) 255{ 256 if (gen->one_vec < 0) { 257 gen->one_vec = ppc_allocate_vec_register(gen->f); 258 load_constant_vec(gen, gen->one_vec, 1.0f); 259 } 260 return gen->one_vec; 261} 262 263/** 264 * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. 265 */ 266static int 267gen_get_bit31_vec(struct gen_context *gen) 268{ 269 if (gen->bit31_vec < 0) { 270 gen->bit31_vec = ppc_allocate_vec_register(gen->f); 271 ppc_vspltisw(gen->f, gen->bit31_vec, -1); 272 ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); 273 } 274 return gen->bit31_vec; 275} 276 277 278/** 279 * Register fetch. Return PPC vector register with result. 280 */ 281static int 282emit_fetch(struct gen_context *gen, 283 const struct tgsi_full_src_register *reg, 284 const unsigned chan_index) 285{ 286 uint swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); 287 int dst_vec = -1; 288 289 switch (swizzle) { 290 case TGSI_SWIZZLE_X: 291 case TGSI_SWIZZLE_Y: 292 case TGSI_SWIZZLE_Z: 293 case TGSI_SWIZZLE_W: 294 switch (reg->SrcRegister.File) { 295 case TGSI_FILE_INPUT: 296 { 297 int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; 298 int offset_reg = emit_li_offset(gen, offset); 299 dst_vec = ppc_allocate_vec_register(gen->f); 300 ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); 301 } 302 break; 303 case TGSI_FILE_TEMPORARY: 304 if (is_ppc_vec_temporary(reg)) { 305 /* use PPC vec register */ 306 dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle]; 307 } 308 else { 309 /* use memory-based temp register "file" */ 310 int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; 311 int offset_reg = emit_li_offset(gen, offset); 312 dst_vec = ppc_allocate_vec_register(gen->f); 313 ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); 314 } 315 break; 316 case TGSI_FILE_IMMEDIATE: 317 { 318 int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; 319 int offset_reg = emit_li_offset(gen, offset); 320 dst_vec = ppc_allocate_vec_register(gen->f); 321 /* Load 4-byte word into vector register. 322 * The vector slot depends on the effective address we load from. 323 * We know that our immediates start at a 16-byte boundary so we 324 * know that 'swizzle' tells us which vector slot will have the 325 * loaded word. The other vector slots will be undefined. 326 */ 327 ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg); 328 /* splat word[swizzle] across the vector reg */ 329 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 330 } 331 break; 332 case TGSI_FILE_CONSTANT: 333 { 334 int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; 335 int offset_reg = emit_li_offset(gen, offset); 336 dst_vec = ppc_allocate_vec_register(gen->f); 337 /* Load 4-byte word into vector register. 338 * The vector slot depends on the effective address we load from. 339 * We know that our constants start at a 16-byte boundary so we 340 * know that 'swizzle' tells us which vector slot will have the 341 * loaded word. The other vector slots will be undefined. 342 */ 343 ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); 344 /* splat word[swizzle] across the vector reg */ 345 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 346 } 347 break; 348 default: 349 assert( 0 ); 350 } 351 break; 352 default: 353 assert( 0 ); 354 } 355 356 assert(dst_vec >= 0); 357 358 { 359 uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); 360 if (sign_op != TGSI_UTIL_SIGN_KEEP) { 361 int bit31_vec = gen_get_bit31_vec(gen); 362 int dst_vec2; 363 364 if (is_ppc_vec_temporary(reg)) { 365 /* need to use a new temp */ 366 dst_vec2 = ppc_allocate_vec_register(gen->f); 367 } 368 else { 369 dst_vec2 = dst_vec; 370 } 371 372 switch (sign_op) { 373 case TGSI_UTIL_SIGN_CLEAR: 374 /* vec = vec & ~bit31 */ 375 ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec); 376 break; 377 case TGSI_UTIL_SIGN_SET: 378 /* vec = vec | bit31 */ 379 ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec); 380 break; 381 case TGSI_UTIL_SIGN_TOGGLE: 382 /* vec = vec ^ bit31 */ 383 ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec); 384 break; 385 default: 386 assert(0); 387 } 388 return dst_vec2; 389 } 390 } 391 392 return dst_vec; 393} 394 395 396 397/** 398 * Test if two TGSI src registers refer to the same memory location. 399 * We use this to avoid redundant register loads. 400 */ 401static boolean 402equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, 403 const struct tgsi_full_src_register *b, uint chan_b) 404{ 405 int swz_a, swz_b; 406 int sign_a, sign_b; 407 if (a->SrcRegister.File != b->SrcRegister.File) 408 return FALSE; 409 if (a->SrcRegister.Index != b->SrcRegister.Index) 410 return FALSE; 411 swz_a = tgsi_util_get_full_src_register_swizzle(a, chan_a); 412 swz_b = tgsi_util_get_full_src_register_swizzle(b, chan_b); 413 if (swz_a != swz_b) 414 return FALSE; 415 sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a); 416 sign_b = tgsi_util_get_full_src_register_sign_mode(b, chan_b); 417 if (sign_a != sign_b) 418 return FALSE; 419 return TRUE; 420} 421 422 423/** 424 * Given a TGSI src register and channel index, return the PPC vector 425 * register containing the value. We use a cache to prevent re-loading 426 * the same register multiple times. 427 * \return index of PPC vector register with the desired src operand 428 */ 429static int 430get_src_vec(struct gen_context *gen, 431 struct tgsi_full_instruction *inst, int src_reg, uint chan) 432{ 433 const const struct tgsi_full_src_register *src = 434 &inst->Src[src_reg]; 435 int vec; 436 uint i; 437 438 /* check the cache */ 439 for (i = 0; i < gen->num_regs; i++) { 440 if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) { 441 /* cache hit */ 442 assert(gen->regs[i].vec >= 0); 443 return gen->regs[i].vec; 444 } 445 } 446 447 /* cache miss: allocate new vec reg and emit fetch/load code */ 448 vec = emit_fetch(gen, src, chan); 449 gen->regs[gen->num_regs].src = *src; 450 gen->regs[gen->num_regs].chan = chan; 451 gen->regs[gen->num_regs].vec = vec; 452 gen->num_regs++; 453 454 assert(gen->num_regs <= Elements(gen->regs)); 455 456 assert(vec >= 0); 457 458 return vec; 459} 460 461 462/** 463 * Clear the src operand cache. To be called at the end of each emit function. 464 */ 465static void 466release_src_vecs(struct gen_context *gen) 467{ 468 uint i; 469 for (i = 0; i < gen->num_regs; i++) { 470 const const struct tgsi_full_src_register src = gen->regs[i].src; 471 if (!is_ppc_vec_temporary(&src)) { 472 ppc_release_vec_register(gen->f, gen->regs[i].vec); 473 } 474 } 475 gen->num_regs = 0; 476} 477 478 479 480static int 481get_dst_vec(struct gen_context *gen, 482 const struct tgsi_full_instruction *inst, 483 unsigned chan_index) 484{ 485 const struct tgsi_full_dst_register *reg = &inst->Dst[0]; 486 487 if (is_ppc_vec_temporary_dst(reg)) { 488 int vec = gen->temps_map[reg->Register.Index][chan_index]; 489 return vec; 490 } 491 else { 492 return ppc_allocate_vec_register(gen->f); 493 } 494} 495 496 497/** 498 * Register store. Store 'src_vec' at location indicated by 'reg'. 499 * \param free_vec Should the src_vec be released when done? 500 */ 501static void 502emit_store(struct gen_context *gen, 503 int src_vec, 504 const struct tgsi_full_instruction *inst, 505 unsigned chan_index, 506 boolean free_vec) 507{ 508 const struct tgsi_full_dst_register *reg = &inst->Dst[0]; 509 510 switch (reg->Register.File) { 511 case TGSI_FILE_OUTPUT: 512 { 513 int offset = (reg->Register.Index * 4 + chan_index) * 16; 514 int offset_reg = emit_li_offset(gen, offset); 515 ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); 516 } 517 break; 518 case TGSI_FILE_TEMPORARY: 519 if (is_ppc_vec_temporary_dst(reg)) { 520 if (!free_vec) { 521 int dst_vec = gen->temps_map[reg->Register.Index][chan_index]; 522 if (dst_vec != src_vec) 523 ppc_vmove(gen->f, dst_vec, src_vec); 524 } 525 free_vec = FALSE; 526 } 527 else { 528 int offset = (reg->Register.Index * 4 + chan_index) * 16; 529 int offset_reg = emit_li_offset(gen, offset); 530 ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); 531 } 532 break; 533#if 0 534 case TGSI_FILE_ADDRESS: 535 emit_addrs( 536 func, 537 xmm, 538 reg->Register.Index, 539 chan_index ); 540 break; 541#endif 542 default: 543 assert( 0 ); 544 } 545 546#if 0 547 switch( inst->Instruction.Saturate ) { 548 case TGSI_SAT_NONE: 549 break; 550 551 case TGSI_SAT_ZERO_ONE: 552 /* assert( 0 ); */ 553 break; 554 555 case TGSI_SAT_MINUS_PLUS_ONE: 556 assert( 0 ); 557 break; 558 } 559#endif 560 561 if (free_vec) 562 ppc_release_vec_register(gen->f, src_vec); 563} 564 565 566static void 567emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 568{ 569 int v0, v1; 570 uint chan_index; 571 572 v0 = get_src_vec(gen, inst, 0, CHAN_X); 573 v1 = ppc_allocate_vec_register(gen->f); 574 575 switch (inst->Instruction.Opcode) { 576 case TGSI_OPCODE_RSQ: 577 /* v1 = 1.0 / sqrt(v0) */ 578 ppc_vrsqrtefp(gen->f, v1, v0); 579 break; 580 case TGSI_OPCODE_RCP: 581 /* v1 = 1.0 / v0 */ 582 ppc_vrefp(gen->f, v1, v0); 583 break; 584 default: 585 assert(0); 586 } 587 588 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { 589 emit_store(gen, v1, inst, chan_index, FALSE); 590 } 591 592 release_src_vecs(gen); 593 ppc_release_vec_register(gen->f, v1); 594} 595 596 597static void 598emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 599{ 600 uint chan_index; 601 602 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 603 int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ 604 int v1 = get_dst_vec(gen, inst, chan_index); 605 switch (inst->Instruction.Opcode) { 606 case TGSI_OPCODE_ABS: 607 /* turn off the most significant bit of each vector float word */ 608 { 609 int bit31_vec = gen_get_bit31_vec(gen); 610 ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */ 611 } 612 break; 613 case TGSI_OPCODE_FLR: 614 ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ 615 break; 616 case TGSI_OPCODE_FRC: 617 ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */ 618 ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */ 619 break; 620 case TGSI_OPCODE_EX2: 621 ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */ 622 break; 623 case TGSI_OPCODE_LG2: 624 /* XXX this may be broken! */ 625 ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ 626 break; 627 case TGSI_OPCODE_MOV: 628 if (v0 != v1) 629 ppc_vmove(gen->f, v1, v0); 630 break; 631 default: 632 assert(0); 633 } 634 emit_store(gen, v1, inst, chan_index, TRUE); /* store v0 */ 635 } 636 637 release_src_vecs(gen); 638} 639 640 641static void 642emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) 643{ 644 int zero_vec = -1; 645 uint chan; 646 647 if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { 648 zero_vec = ppc_allocate_vec_register(gen->f); 649 ppc_vzero(gen->f, zero_vec); 650 } 651 652 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 653 /* fetch src operands */ 654 int v0 = get_src_vec(gen, inst, 0, chan); 655 int v1 = get_src_vec(gen, inst, 1, chan); 656 int v2 = get_dst_vec(gen, inst, chan); 657 658 /* emit binop */ 659 switch (inst->Instruction.Opcode) { 660 case TGSI_OPCODE_ADD: 661 ppc_vaddfp(gen->f, v2, v0, v1); 662 break; 663 case TGSI_OPCODE_SUB: 664 ppc_vsubfp(gen->f, v2, v0, v1); 665 break; 666 case TGSI_OPCODE_MUL: 667 ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec); 668 break; 669 case TGSI_OPCODE_MIN: 670 ppc_vminfp(gen->f, v2, v0, v1); 671 break; 672 case TGSI_OPCODE_MAX: 673 ppc_vmaxfp(gen->f, v2, v0, v1); 674 break; 675 default: 676 assert(0); 677 } 678 679 /* store v2 */ 680 emit_store(gen, v2, inst, chan, TRUE); 681 } 682 683 if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) 684 ppc_release_vec_register(gen->f, zero_vec); 685 686 release_src_vecs(gen); 687} 688 689 690static void 691emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) 692{ 693 uint chan; 694 695 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 696 /* fetch src operands */ 697 int v0 = get_src_vec(gen, inst, 0, chan); 698 int v1 = get_src_vec(gen, inst, 1, chan); 699 int v2 = get_src_vec(gen, inst, 2, chan); 700 int v3 = get_dst_vec(gen, inst, chan); 701 702 /* emit ALU */ 703 switch (inst->Instruction.Opcode) { 704 case TGSI_OPCODE_MAD: 705 ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ 706 break; 707 case TGSI_OPCODE_LRP: 708 ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ 709 ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ 710 break; 711 default: 712 assert(0); 713 } 714 715 /* store v3 */ 716 emit_store(gen, v3, inst, chan, TRUE); 717 } 718 719 release_src_vecs(gen); 720} 721 722 723/** 724 * Vector comparisons, resulting in 1.0 or 0.0 values. 725 */ 726static void 727emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) 728{ 729 uint chan; 730 int one_vec = gen_one_vec(gen); 731 732 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 733 /* fetch src operands */ 734 int v0 = get_src_vec(gen, inst, 0, chan); 735 int v1 = get_src_vec(gen, inst, 1, chan); 736 int v2 = get_dst_vec(gen, inst, chan); 737 boolean complement = FALSE; 738 739 switch (inst->Instruction.Opcode) { 740 case TGSI_OPCODE_SNE: 741 complement = TRUE; 742 /* fall-through */ 743 case TGSI_OPCODE_SEQ: 744 ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ 745 break; 746 747 case TGSI_OPCODE_SGE: 748 complement = TRUE; 749 /* fall-through */ 750 case TGSI_OPCODE_SLT: 751 ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ 752 break; 753 754 case TGSI_OPCODE_SLE: 755 complement = TRUE; 756 /* fall-through */ 757 case TGSI_OPCODE_SGT: 758 ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ 759 break; 760 default: 761 assert(0); 762 } 763 764 /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ 765 766 if (complement) 767 ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ 768 else 769 ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ 770 771 /* store v2 */ 772 emit_store(gen, v2, inst, chan, TRUE); 773 } 774 775 release_src_vecs(gen); 776} 777 778 779static void 780emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) 781{ 782 int v0, v1, v2; 783 uint chan_index; 784 785 v2 = ppc_allocate_vec_register(gen->f); 786 787 ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */ 788 789 v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */ 790 v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */ 791 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 792 793 v0 = get_src_vec(gen, inst, 0, CHAN_Y); /* v0 = src0.YYYY */ 794 v1 = get_src_vec(gen, inst, 1, CHAN_Y); /* v1 = src1.YYYY */ 795 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 796 797 v0 = get_src_vec(gen, inst, 0, CHAN_Z); /* v0 = src0.ZZZZ */ 798 v1 = get_src_vec(gen, inst, 1, CHAN_Z); /* v1 = src1.ZZZZ */ 799 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 800 801 if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { 802 v0 = get_src_vec(gen, inst, 0, CHAN_W); /* v0 = src0.WWWW */ 803 v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ 804 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 805 } 806 else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { 807 v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ 808 ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ 809 } 810 811 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 812 emit_store(gen, v2, inst, chan_index, FALSE); /* store v2, free v2 later */ 813 } 814 815 release_src_vecs(gen); 816 817 ppc_release_vec_register(gen->f, v2); 818} 819 820 821/** Approximation for vr = pow(va, vb) */ 822static void 823ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) 824{ 825 /* pow(a,b) ~= exp2(log2(a) * b) */ 826 int t_vec = ppc_allocate_vec_register(f); 827 int zero_vec = ppc_allocate_vec_register(f); 828 829 ppc_vzero(f, zero_vec); 830 831 ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ 832 ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */ 833 ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ 834 835 ppc_release_vec_register(f, t_vec); 836 ppc_release_vec_register(f, zero_vec); 837} 838 839 840static void 841emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) 842{ 843 int one_vec = gen_one_vec(gen); 844 845 /* Compute X */ 846 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 847 emit_store(gen, one_vec, inst, CHAN_X, FALSE); 848 } 849 850 /* Compute Y, Z */ 851 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || 852 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 853 int x_vec; 854 int zero_vec = ppc_allocate_vec_register(gen->f); 855 856 x_vec = get_src_vec(gen, inst, 0, CHAN_X); /* x_vec = src[0].x */ 857 858 ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ 859 ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ 860 861 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 862 emit_store(gen, x_vec, inst, CHAN_Y, FALSE); 863 } 864 865 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 866 int y_vec, w_vec; 867 int z_vec = ppc_allocate_vec_register(gen->f); 868 int pow_vec = ppc_allocate_vec_register(gen->f); 869 int pos_vec = ppc_allocate_vec_register(gen->f); 870 int p128_vec = ppc_allocate_vec_register(gen->f); 871 int n128_vec = ppc_allocate_vec_register(gen->f); 872 873 y_vec = get_src_vec(gen, inst, 0, CHAN_Y); /* y_vec = src[0].y */ 874 ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ 875 876 w_vec = get_src_vec(gen, inst, 0, CHAN_W); /* w_vec = src[0].w */ 877 878 /* clamp W to [-128, 128] */ 879 load_constant_vec(gen, p128_vec, 128.0f); 880 load_constant_vec(gen, n128_vec, -128.0f); 881 ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */ 882 ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */ 883 884 /* if temp.x > 0 885 * z = pow(tmp.y, tmp.w) 886 * else 887 * z = 0.0 888 */ 889 ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ 890 ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ 891 ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ 892 893 emit_store(gen, z_vec, inst, CHAN_Z, FALSE); 894 895 ppc_release_vec_register(gen->f, z_vec); 896 ppc_release_vec_register(gen->f, pow_vec); 897 ppc_release_vec_register(gen->f, pos_vec); 898 ppc_release_vec_register(gen->f, p128_vec); 899 ppc_release_vec_register(gen->f, n128_vec); 900 } 901 902 ppc_release_vec_register(gen->f, zero_vec); 903 } 904 905 /* Compute W */ 906 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 907 emit_store(gen, one_vec, inst, CHAN_W, FALSE); 908 } 909 910 release_src_vecs(gen); 911} 912 913 914static void 915emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst) 916{ 917 const int one_vec = gen_one_vec(gen); 918 int src_vec; 919 920 /* get src arg */ 921 src_vec = get_src_vec(gen, inst, 0, CHAN_X); 922 923 /* Compute X = 2^floor(src) */ 924 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 925 int dst_vec = get_dst_vec(gen, inst, CHAN_X); 926 int tmp_vec = ppc_allocate_vec_register(gen->f); 927 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ 928 ppc_vexptefp(gen->f, dst_vec, tmp_vec); /* dst = 2 ^ tmp */ 929 emit_store(gen, dst_vec, inst, CHAN_X, TRUE); 930 ppc_release_vec_register(gen->f, tmp_vec); 931 } 932 933 /* Compute Y = src - floor(src) */ 934 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 935 int dst_vec = get_dst_vec(gen, inst, CHAN_Y); 936 int tmp_vec = ppc_allocate_vec_register(gen->f); 937 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ 938 ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec); /* dst = src - tmp */ 939 emit_store(gen, dst_vec, inst, CHAN_Y, TRUE); 940 ppc_release_vec_register(gen->f, tmp_vec); 941 } 942 943 /* Compute Z = RoughApprox2ToX(src) */ 944 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 945 int dst_vec = get_dst_vec(gen, inst, CHAN_Z); 946 ppc_vexptefp(gen->f, dst_vec, src_vec); /* dst = 2 ^ src */ 947 emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); 948 } 949 950 /* Compute W = 1.0 */ 951 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 952 emit_store(gen, one_vec, inst, CHAN_W, FALSE); 953 } 954 955 release_src_vecs(gen); 956} 957 958 959static void 960emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst) 961{ 962 const int bit31_vec = gen_get_bit31_vec(gen); 963 const int one_vec = gen_one_vec(gen); 964 int src_vec, abs_vec; 965 966 /* get src arg */ 967 src_vec = get_src_vec(gen, inst, 0, CHAN_X); 968 969 /* compute abs(src) */ 970 abs_vec = ppc_allocate_vec_register(gen->f); 971 ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec); /* abs = src & ~bit31 */ 972 973 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && 974 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 975 976 /* compute tmp = floor(log2(abs)) */ 977 int tmp_vec = ppc_allocate_vec_register(gen->f); 978 ppc_vlogefp(gen->f, tmp_vec, abs_vec); /* tmp = log2(abs) */ 979 ppc_vrfim(gen->f, tmp_vec, tmp_vec); /* tmp = floor(tmp); */ 980 981 /* Compute X = tmp */ 982 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 983 emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); 984 } 985 986 /* Compute Y = abs / 2^tmp */ 987 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 988 const int zero_vec = ppc_allocate_vec_register(gen->f); 989 ppc_vzero(gen->f, zero_vec); 990 ppc_vexptefp(gen->f, tmp_vec, tmp_vec); /* tmp = 2 ^ tmp */ 991 ppc_vrefp(gen->f, tmp_vec, tmp_vec); /* tmp = 1 / tmp */ 992 /* tmp = abs * tmp + zero */ 993 ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec); 994 emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); 995 ppc_release_vec_register(gen->f, zero_vec); 996 } 997 998 ppc_release_vec_register(gen->f, tmp_vec); 999 } 1000 1001 /* Compute Z = RoughApproxLog2(abs) */ 1002 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1003 int dst_vec = get_dst_vec(gen, inst, CHAN_Z); 1004 ppc_vlogefp(gen->f, dst_vec, abs_vec); /* dst = log2(abs) */ 1005 emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); 1006 } 1007 1008 /* Compute W = 1.0 */ 1009 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 1010 emit_store(gen, one_vec, inst, CHAN_W, FALSE); 1011 } 1012 1013 ppc_release_vec_register(gen->f, abs_vec); 1014 release_src_vecs(gen); 1015} 1016 1017 1018static void 1019emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst) 1020{ 1021 int s0_vec = get_src_vec(gen, inst, 0, CHAN_X); 1022 int s1_vec = get_src_vec(gen, inst, 1, CHAN_X); 1023 int pow_vec = ppc_allocate_vec_register(gen->f); 1024 int chan; 1025 1026 ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec); 1027 1028 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 1029 emit_store(gen, pow_vec, inst, chan, FALSE); 1030 } 1031 1032 ppc_release_vec_register(gen->f, pow_vec); 1033 1034 release_src_vecs(gen); 1035} 1036 1037 1038static void 1039emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst) 1040{ 1041 int x0_vec, y0_vec, z0_vec; 1042 int x1_vec, y1_vec, z1_vec; 1043 int zero_vec, tmp_vec; 1044 int tmp2_vec; 1045 1046 zero_vec = ppc_allocate_vec_register(gen->f); 1047 ppc_vzero(gen->f, zero_vec); 1048 1049 tmp_vec = ppc_allocate_vec_register(gen->f); 1050 tmp2_vec = ppc_allocate_vec_register(gen->f); 1051 1052 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || 1053 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1054 x0_vec = get_src_vec(gen, inst, 0, CHAN_X); 1055 x1_vec = get_src_vec(gen, inst, 1, CHAN_X); 1056 } 1057 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || 1058 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1059 y0_vec = get_src_vec(gen, inst, 0, CHAN_Y); 1060 y1_vec = get_src_vec(gen, inst, 1, CHAN_Y); 1061 } 1062 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || 1063 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 1064 z0_vec = get_src_vec(gen, inst, 0, CHAN_Z); 1065 z1_vec = get_src_vec(gen, inst, 1, CHAN_Z); 1066 } 1067 1068 IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) { 1069 /* tmp = y0 * z1 */ 1070 ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec); 1071 /* tmp = tmp - z0 * y1*/ 1072 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec); 1073 emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); 1074 } 1075 IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) { 1076 /* tmp = z0 * x1 */ 1077 ppc_vmaddfp(gen->f, tmp_vec, z0_vec, x1_vec, zero_vec); 1078 /* tmp = tmp - x0 * z1 */ 1079 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec); 1080 emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); 1081 } 1082 IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) { 1083 /* tmp = x0 * y1 */ 1084 ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec); 1085 /* tmp = tmp - y0 * x1 */ 1086 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec); 1087 emit_store(gen, tmp_vec, inst, CHAN_Z, FALSE); 1088 } 1089 /* W is undefined */ 1090 1091 ppc_release_vec_register(gen->f, tmp_vec); 1092 ppc_release_vec_register(gen->f, zero_vec); 1093 release_src_vecs(gen); 1094} 1095 1096static int 1097emit_instruction(struct gen_context *gen, 1098 struct tgsi_full_instruction *inst) 1099{ 1100 1101 /* we don't handle saturation/clamping yet */ 1102 if (inst->Instruction.Saturate != TGSI_SAT_NONE) 1103 return 0; 1104 1105 /* need to use extra temps to fix SOA dependencies : */ 1106 if (tgsi_check_soa_dependencies(inst)) 1107 return FALSE; 1108 1109 switch (inst->Instruction.Opcode) { 1110 case TGSI_OPCODE_MOV: 1111 case TGSI_OPCODE_ABS: 1112 case TGSI_OPCODE_FLR: 1113 case TGSI_OPCODE_FRC: 1114 case TGSI_OPCODE_EX2: 1115 case TGSI_OPCODE_LG2: 1116 emit_unaryop(gen, inst); 1117 break; 1118 case TGSI_OPCODE_RSQ: 1119 case TGSI_OPCODE_RCP: 1120 emit_scalar_unaryop(gen, inst); 1121 break; 1122 case TGSI_OPCODE_ADD: 1123 case TGSI_OPCODE_SUB: 1124 case TGSI_OPCODE_MUL: 1125 case TGSI_OPCODE_MIN: 1126 case TGSI_OPCODE_MAX: 1127 emit_binop(gen, inst); 1128 break; 1129 case TGSI_OPCODE_SEQ: 1130 case TGSI_OPCODE_SNE: 1131 case TGSI_OPCODE_SLT: 1132 case TGSI_OPCODE_SGT: 1133 case TGSI_OPCODE_SLE: 1134 case TGSI_OPCODE_SGE: 1135 emit_inequality(gen, inst); 1136 break; 1137 case TGSI_OPCODE_MAD: 1138 case TGSI_OPCODE_LRP: 1139 emit_triop(gen, inst); 1140 break; 1141 case TGSI_OPCODE_DP3: 1142 case TGSI_OPCODE_DP4: 1143 case TGSI_OPCODE_DPH: 1144 emit_dotprod(gen, inst); 1145 break; 1146 case TGSI_OPCODE_LIT: 1147 emit_lit(gen, inst); 1148 break; 1149 case TGSI_OPCODE_LOG: 1150 emit_log(gen, inst); 1151 break; 1152 case TGSI_OPCODE_EXP: 1153 emit_exp(gen, inst); 1154 break; 1155 case TGSI_OPCODE_POW: 1156 emit_pow(gen, inst); 1157 break; 1158 case TGSI_OPCODE_XPD: 1159 emit_xpd(gen, inst); 1160 break; 1161 case TGSI_OPCODE_END: 1162 /* normal end */ 1163 return 1; 1164 default: 1165 return 0; 1166 } 1167 return 1; 1168} 1169 1170 1171static void 1172emit_declaration( 1173 struct ppc_function *func, 1174 struct tgsi_full_declaration *decl ) 1175{ 1176 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1177#if 0 1178 unsigned first, last, mask; 1179 unsigned i, j; 1180 1181 first = decl->Range.First; 1182 last = decl->Range.Last; 1183 mask = decl->Declaration.UsageMask; 1184 1185 for( i = first; i <= last; i++ ) { 1186 for( j = 0; j < NUM_CHANNELS; j++ ) { 1187 if( mask & (1 << j) ) { 1188 switch( decl->Declaration.Interpolate ) { 1189 case TGSI_INTERPOLATE_CONSTANT: 1190 emit_coef_a0( func, 0, i, j ); 1191 emit_inputs( func, 0, i, j ); 1192 break; 1193 1194 case TGSI_INTERPOLATE_LINEAR: 1195 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 1196 emit_coef_dadx( func, 1, i, j ); 1197 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 1198 emit_coef_dady( func, 3, i, j ); 1199 emit_mul( func, 0, 1 ); /* x * dadx */ 1200 emit_coef_a0( func, 4, i, j ); 1201 emit_mul( func, 2, 3 ); /* y * dady */ 1202 emit_add( func, 0, 4 ); /* x * dadx + a0 */ 1203 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 1204 emit_inputs( func, 0, i, j ); 1205 break; 1206 1207 case TGSI_INTERPOLATE_PERSPECTIVE: 1208 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 1209 emit_coef_dadx( func, 1, i, j ); 1210 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 1211 emit_coef_dady( func, 3, i, j ); 1212 emit_mul( func, 0, 1 ); /* x * dadx */ 1213 emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); 1214 emit_coef_a0( func, 5, i, j ); 1215 emit_rcp( func, 4, 4 ); /* 1.0 / w */ 1216 emit_mul( func, 2, 3 ); /* y * dady */ 1217 emit_add( func, 0, 5 ); /* x * dadx + a0 */ 1218 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 1219 emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ 1220 emit_inputs( func, 0, i, j ); 1221 break; 1222 1223 default: 1224 assert( 0 ); 1225 break; 1226 } 1227 } 1228 } 1229 } 1230#endif 1231 } 1232} 1233 1234 1235 1236static void 1237emit_prologue(struct ppc_function *func) 1238{ 1239 /* XXX set up stack frame */ 1240} 1241 1242 1243static void 1244emit_epilogue(struct ppc_function *func) 1245{ 1246 ppc_comment(func, -4, "Epilogue:"); 1247 ppc_return(func); 1248 /* XXX restore prev stack frame */ 1249#if 0 1250 debug_printf("PPC: Emitted %u instructions\n", func->num_inst); 1251#endif 1252} 1253 1254 1255 1256/** 1257 * Translate a TGSI vertex/fragment shader to PPC code. 1258 * 1259 * \param tokens the TGSI input shader 1260 * \param func the output PPC code/function 1261 * \param immediates buffer to place immediates, later passed to PPC func 1262 * \return TRUE for success, FALSE if translation failed 1263 */ 1264boolean 1265tgsi_emit_ppc(const struct tgsi_token *tokens, 1266 struct ppc_function *func, 1267 float (*immediates)[4], 1268 boolean do_swizzles ) 1269{ 1270 static int use_ppc_asm = -1; 1271 struct tgsi_parse_context parse; 1272 /*boolean instruction_phase = FALSE;*/ 1273 unsigned ok = 1; 1274 uint num_immediates = 0; 1275 struct gen_context gen; 1276 uint ic = 0; 1277 1278 if (use_ppc_asm < 0) { 1279 /* If GALLIUM_NOPPC is set, don't use PPC codegen */ 1280 use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE); 1281 } 1282 if (!use_ppc_asm) 1283 return FALSE; 1284 1285 if (0) { 1286 debug_printf("\n********* TGSI->PPC ********\n"); 1287 tgsi_dump(tokens, 0); 1288 } 1289 1290 util_init_math(); 1291 1292 init_gen_context(&gen, func); 1293 1294 emit_prologue(func); 1295 1296 tgsi_parse_init( &parse, tokens ); 1297 1298 while (!tgsi_parse_end_of_tokens(&parse) && ok) { 1299 tgsi_parse_token(&parse); 1300 1301 switch (parse.FullToken.Token.Type) { 1302 case TGSI_TOKEN_TYPE_DECLARATION: 1303 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { 1304 emit_declaration(func, &parse.FullToken.FullDeclaration ); 1305 } 1306 break; 1307 1308 case TGSI_TOKEN_TYPE_INSTRUCTION: 1309 if (func->print) { 1310 _debug_printf("# "); 1311 ic++; 1312 tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); 1313 } 1314 1315 ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); 1316 1317 if (!ok) { 1318 uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 1319 debug_printf("failed to translate tgsi opcode %d (%s) to PPC (%s)\n", 1320 opcode, 1321 tgsi_get_opcode_name(opcode), 1322 parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? 1323 "vertex shader" : "fragment shader"); 1324 } 1325 break; 1326 1327 case TGSI_TOKEN_TYPE_IMMEDIATE: 1328 /* splat each immediate component into a float[4] vector for SoA */ 1329 { 1330 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1331 uint i; 1332 assert(size <= 4); 1333 assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); 1334 for (i = 0; i < size; i++) { 1335 immediates[num_immediates][i] = 1336 parse.FullToken.FullImmediate.u[i].Float; 1337 } 1338 num_immediates++; 1339 } 1340 break; 1341 1342 default: 1343 ok = 0; 1344 assert( 0 ); 1345 } 1346 } 1347 1348 emit_epilogue(func); 1349 1350 tgsi_parse_free( &parse ); 1351 1352 if (ppc_num_instructions(func) == 0) { 1353 /* ran out of memory for instructions */ 1354 ok = FALSE; 1355 } 1356 1357 if (!ok) 1358 debug_printf("TGSI->PPC translation failed\n"); 1359 1360 return ok; 1361} 1362 1363#endif /* PIPE_ARCH_PPC */ 1364