tgsi_ppc.c revision 86bfe974b880dc2cbf40b91ba0fde34e8a9c756e
1/************************************************************************** 2 * 3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI to PowerPC code generation. 30 */ 31 32#include "pipe/p_config.h" 33 34#if defined(PIPE_ARCH_PPC) 35 36#include "util/u_debug.h" 37#include "pipe/p_shader_tokens.h" 38#include "util/u_math.h" 39#include "util/u_memory.h" 40#include "util/u_sse.h" 41#include "tgsi/tgsi_info.h" 42#include "tgsi/tgsi_parse.h" 43#include "tgsi/tgsi_util.h" 44#include "tgsi_dump.h" 45#include "tgsi_exec.h" 46#include "tgsi_ppc.h" 47#include "rtasm/rtasm_ppc.h" 48 49 50/** 51 * Since it's pretty much impossible to form PPC vector immediates, load 52 * them from memory here: 53 */ 54PIPE_ALIGN_VAR(16, const float ppc_builtin_constants[]) = { 55 1.0f, -128.0f, 128.0, 0.0 56}; 57 58 59#define FOR_EACH_CHANNEL( CHAN )\ 60 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 61 62#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 63 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) 64 65#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 66 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 67 68#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 69 FOR_EACH_CHANNEL( CHAN )\ 70 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 71 72#define CHAN_X 0 73#define CHAN_Y 1 74#define CHAN_Z 2 75#define CHAN_W 3 76 77 78/** 79 * How many TGSI temps should be implemented with real PPC vector registers 80 * rather than memory. 81 */ 82#define MAX_PPC_TEMPS 3 83 84 85/** 86 * Context/state used during code gen. 87 */ 88struct gen_context 89{ 90 struct ppc_function *f; 91 int inputs_reg; /**< GP register pointing to input params */ 92 int outputs_reg; /**< GP register pointing to output params */ 93 int temps_reg; /**< GP register pointing to temporary "registers" */ 94 int immed_reg; /**< GP register pointing to immediates buffer */ 95 int const_reg; /**< GP register pointing to constants buffer */ 96 int builtins_reg; /**< GP register pointint to built-in constants */ 97 98 int offset_reg; /**< used to reduce redundant li instructions */ 99 int offset_value; 100 101 int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ 102 int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ 103 104 /** 105 * Map TGSI temps to PPC vector temps. 106 * We have 32 PPC vector regs. Use 16 of them for storing 4 TGSI temps. 107 * XXX currently only do this for TGSI temps [0..MAX_PPC_TEMPS-1]. 108 */ 109 int temps_map[MAX_PPC_TEMPS][4]; 110 111 /** 112 * Cache of src registers. 113 * This is used to avoid redundant load instructions. 114 */ 115 struct { 116 struct tgsi_full_src_register src; 117 uint chan; 118 uint vec; 119 } regs[12]; /* 3 src regs, 4 channels */ 120 uint num_regs; 121}; 122 123 124/** 125 * Initialize code generation context. 126 */ 127static void 128init_gen_context(struct gen_context *gen, struct ppc_function *func) 129{ 130 uint i; 131 132 memset(gen, 0, sizeof(*gen)); 133 gen->f = func; 134 gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */ 135 gen->outputs_reg = ppc_reserve_register(func, 4); /* second function param */ 136 gen->temps_reg = ppc_reserve_register(func, 5); /* ... */ 137 gen->immed_reg = ppc_reserve_register(func, 6); 138 gen->const_reg = ppc_reserve_register(func, 7); 139 gen->builtins_reg = ppc_reserve_register(func, 8); 140 gen->one_vec = -1; 141 gen->bit31_vec = -1; 142 gen->offset_reg = -1; 143 gen->offset_value = -9999999; 144 for (i = 0; i < MAX_PPC_TEMPS; i++) { 145 gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f); 146 gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f); 147 gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f); 148 gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f); 149 } 150} 151 152 153/** 154 * Is the given TGSI register stored as a real PPC vector register? 155 */ 156static boolean 157is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) 158{ 159 return (reg->Register.File == TGSI_FILE_TEMPORARY && 160 reg->Register.Index < MAX_PPC_TEMPS); 161} 162 163 164/** 165 * Is the given TGSI register stored as a real PPC vector register? 166 */ 167static boolean 168is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) 169{ 170 return (reg->Register.File == TGSI_FILE_TEMPORARY && 171 reg->Register.Index < MAX_PPC_TEMPS); 172} 173 174 175 176/** 177 * All PPC vector load/store instructions form an effective address 178 * by adding the contents of two registers. For example: 179 * lvx v2,r8,r9 # v2 = memory[r8 + r9] 180 * stvx v2,r8,r9 # memory[r8 + r9] = v2; 181 * So our lvx/stvx instructions are typically preceded by an 'li' instruction 182 * to load r9 (above) with an immediate (an offset). 183 * This code emits that 'li' instruction, but only if the offset value is 184 * different than the previous 'li'. 185 * This optimization seems to save about 10% in the instruction count. 186 * Note that we need to unconditionally emit an 'li' inside basic blocks 187 * (such as inside loops). 188 */ 189static int 190emit_li_offset(struct gen_context *gen, int offset) 191{ 192 if (gen->offset_reg <= 0) { 193 /* allocate a GP register for storing load/store offset */ 194 gen->offset_reg = ppc_allocate_register(gen->f); 195 } 196 197 /* emit new 'li' if offset is changing */ 198 if (gen->offset_value < 0 || gen->offset_value != offset) { 199 gen->offset_value = offset; 200 ppc_li(gen->f, gen->offset_reg, offset); 201 } 202 203 return gen->offset_reg; 204} 205 206 207/** 208 * Forces subsequent emit_li_offset() calls to emit an 'li'. 209 * To be called at the top of basic blocks. 210 */ 211static void 212reset_li_offset(struct gen_context *gen) 213{ 214 gen->offset_value = -9999999; 215} 216 217 218 219/** 220 * Load the given vector register with {value, value, value, value}. 221 * The value must be in the ppu_builtin_constants[] array. 222 * We wouldn't need this if there was a simple way to load PPC vector 223 * registers with immediate values! 224 */ 225static void 226load_constant_vec(struct gen_context *gen, int dst_vec, float value) 227{ 228 uint pos; 229 for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { 230 if (ppc_builtin_constants[pos] == value) { 231 int offset = pos * 4; 232 int offset_reg = emit_li_offset(gen, offset); 233 234 /* Load 4-byte word into vector register. 235 * The vector slot depends on the effective address we load from. 236 * We know that our builtins start at a 16-byte boundary so we 237 * know that 'swizzle' tells us which vector slot will have the 238 * loaded word. The other vector slots will be undefined. 239 */ 240 ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); 241 /* splat word[pos % 4] across the vector reg */ 242 ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); 243 return; 244 } 245 } 246 assert(0 && "Need to add new constant to ppc_builtin_constants array"); 247} 248 249 250/** 251 * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. 252 */ 253static int 254gen_one_vec(struct gen_context *gen) 255{ 256 if (gen->one_vec < 0) { 257 gen->one_vec = ppc_allocate_vec_register(gen->f); 258 load_constant_vec(gen, gen->one_vec, 1.0f); 259 } 260 return gen->one_vec; 261} 262 263/** 264 * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. 265 */ 266static int 267gen_get_bit31_vec(struct gen_context *gen) 268{ 269 if (gen->bit31_vec < 0) { 270 gen->bit31_vec = ppc_allocate_vec_register(gen->f); 271 ppc_vspltisw(gen->f, gen->bit31_vec, -1); 272 ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); 273 } 274 return gen->bit31_vec; 275} 276 277 278/** 279 * Register fetch. Return PPC vector register with result. 280 */ 281static int 282emit_fetch(struct gen_context *gen, 283 const struct tgsi_full_src_register *reg, 284 const unsigned chan_index) 285{ 286 uint swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index); 287 int dst_vec = -1; 288 289 switch (swizzle) { 290 case TGSI_SWIZZLE_X: 291 case TGSI_SWIZZLE_Y: 292 case TGSI_SWIZZLE_Z: 293 case TGSI_SWIZZLE_W: 294 switch (reg->Register.File) { 295 case TGSI_FILE_INPUT: 296 case TGSI_FILE_SYSTEM_VALUE: 297 { 298 int offset = (reg->Register.Index * 4 + swizzle) * 16; 299 int offset_reg = emit_li_offset(gen, offset); 300 dst_vec = ppc_allocate_vec_register(gen->f); 301 ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); 302 } 303 break; 304 case TGSI_FILE_TEMPORARY: 305 if (is_ppc_vec_temporary(reg)) { 306 /* use PPC vec register */ 307 dst_vec = gen->temps_map[reg->Register.Index][swizzle]; 308 } 309 else { 310 /* use memory-based temp register "file" */ 311 int offset = (reg->Register.Index * 4 + swizzle) * 16; 312 int offset_reg = emit_li_offset(gen, offset); 313 dst_vec = ppc_allocate_vec_register(gen->f); 314 ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); 315 } 316 break; 317 case TGSI_FILE_IMMEDIATE: 318 { 319 int offset = (reg->Register.Index * 4 + swizzle) * 4; 320 int offset_reg = emit_li_offset(gen, offset); 321 dst_vec = ppc_allocate_vec_register(gen->f); 322 /* Load 4-byte word into vector register. 323 * The vector slot depends on the effective address we load from. 324 * We know that our immediates start at a 16-byte boundary so we 325 * know that 'swizzle' tells us which vector slot will have the 326 * loaded word. The other vector slots will be undefined. 327 */ 328 ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg); 329 /* splat word[swizzle] across the vector reg */ 330 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 331 } 332 break; 333 case TGSI_FILE_CONSTANT: 334 { 335 int offset = (reg->Register.Index * 4 + swizzle) * 4; 336 int offset_reg = emit_li_offset(gen, offset); 337 dst_vec = ppc_allocate_vec_register(gen->f); 338 /* Load 4-byte word into vector register. 339 * The vector slot depends on the effective address we load from. 340 * We know that our constants start at a 16-byte boundary so we 341 * know that 'swizzle' tells us which vector slot will have the 342 * loaded word. The other vector slots will be undefined. 343 */ 344 ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); 345 /* splat word[swizzle] across the vector reg */ 346 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 347 } 348 break; 349 default: 350 assert( 0 ); 351 } 352 break; 353 default: 354 assert( 0 ); 355 } 356 357 assert(dst_vec >= 0); 358 359 { 360 uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); 361 if (sign_op != TGSI_UTIL_SIGN_KEEP) { 362 int bit31_vec = gen_get_bit31_vec(gen); 363 int dst_vec2; 364 365 if (is_ppc_vec_temporary(reg)) { 366 /* need to use a new temp */ 367 dst_vec2 = ppc_allocate_vec_register(gen->f); 368 } 369 else { 370 dst_vec2 = dst_vec; 371 } 372 373 switch (sign_op) { 374 case TGSI_UTIL_SIGN_CLEAR: 375 /* vec = vec & ~bit31 */ 376 ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec); 377 break; 378 case TGSI_UTIL_SIGN_SET: 379 /* vec = vec | bit31 */ 380 ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec); 381 break; 382 case TGSI_UTIL_SIGN_TOGGLE: 383 /* vec = vec ^ bit31 */ 384 ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec); 385 break; 386 default: 387 assert(0); 388 } 389 return dst_vec2; 390 } 391 } 392 393 return dst_vec; 394} 395 396 397 398/** 399 * Test if two TGSI src registers refer to the same memory location. 400 * We use this to avoid redundant register loads. 401 */ 402static boolean 403equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, 404 const struct tgsi_full_src_register *b, uint chan_b) 405{ 406 int swz_a, swz_b; 407 int sign_a, sign_b; 408 if (a->Register.File != b->Register.File) 409 return FALSE; 410 if (a->Register.Index != b->Register.Index) 411 return FALSE; 412 swz_a = tgsi_util_get_full_src_register_swizzle(a, chan_a); 413 swz_b = tgsi_util_get_full_src_register_swizzle(b, chan_b); 414 if (swz_a != swz_b) 415 return FALSE; 416 sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a); 417 sign_b = tgsi_util_get_full_src_register_sign_mode(b, chan_b); 418 if (sign_a != sign_b) 419 return FALSE; 420 return TRUE; 421} 422 423 424/** 425 * Given a TGSI src register and channel index, return the PPC vector 426 * register containing the value. We use a cache to prevent re-loading 427 * the same register multiple times. 428 * \return index of PPC vector register with the desired src operand 429 */ 430static int 431get_src_vec(struct gen_context *gen, 432 struct tgsi_full_instruction *inst, int src_reg, uint chan) 433{ 434 const const struct tgsi_full_src_register *src = 435 &inst->Src[src_reg]; 436 int vec; 437 uint i; 438 439 /* check the cache */ 440 for (i = 0; i < gen->num_regs; i++) { 441 if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) { 442 /* cache hit */ 443 assert(gen->regs[i].vec >= 0); 444 return gen->regs[i].vec; 445 } 446 } 447 448 /* cache miss: allocate new vec reg and emit fetch/load code */ 449 vec = emit_fetch(gen, src, chan); 450 gen->regs[gen->num_regs].src = *src; 451 gen->regs[gen->num_regs].chan = chan; 452 gen->regs[gen->num_regs].vec = vec; 453 gen->num_regs++; 454 455 assert(gen->num_regs <= Elements(gen->regs)); 456 457 assert(vec >= 0); 458 459 return vec; 460} 461 462 463/** 464 * Clear the src operand cache. To be called at the end of each emit function. 465 */ 466static void 467release_src_vecs(struct gen_context *gen) 468{ 469 uint i; 470 for (i = 0; i < gen->num_regs; i++) { 471 const const struct tgsi_full_src_register src = gen->regs[i].src; 472 if (!is_ppc_vec_temporary(&src)) { 473 ppc_release_vec_register(gen->f, gen->regs[i].vec); 474 } 475 } 476 gen->num_regs = 0; 477} 478 479 480 481static int 482get_dst_vec(struct gen_context *gen, 483 const struct tgsi_full_instruction *inst, 484 unsigned chan_index) 485{ 486 const struct tgsi_full_dst_register *reg = &inst->Dst[0]; 487 488 if (is_ppc_vec_temporary_dst(reg)) { 489 int vec = gen->temps_map[reg->Register.Index][chan_index]; 490 return vec; 491 } 492 else { 493 return ppc_allocate_vec_register(gen->f); 494 } 495} 496 497 498/** 499 * Register store. Store 'src_vec' at location indicated by 'reg'. 500 * \param free_vec Should the src_vec be released when done? 501 */ 502static void 503emit_store(struct gen_context *gen, 504 int src_vec, 505 const struct tgsi_full_instruction *inst, 506 unsigned chan_index, 507 boolean free_vec) 508{ 509 const struct tgsi_full_dst_register *reg = &inst->Dst[0]; 510 511 switch (reg->Register.File) { 512 case TGSI_FILE_OUTPUT: 513 { 514 int offset = (reg->Register.Index * 4 + chan_index) * 16; 515 int offset_reg = emit_li_offset(gen, offset); 516 ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); 517 } 518 break; 519 case TGSI_FILE_TEMPORARY: 520 if (is_ppc_vec_temporary_dst(reg)) { 521 if (!free_vec) { 522 int dst_vec = gen->temps_map[reg->Register.Index][chan_index]; 523 if (dst_vec != src_vec) 524 ppc_vmove(gen->f, dst_vec, src_vec); 525 } 526 free_vec = FALSE; 527 } 528 else { 529 int offset = (reg->Register.Index * 4 + chan_index) * 16; 530 int offset_reg = emit_li_offset(gen, offset); 531 ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); 532 } 533 break; 534#if 0 535 case TGSI_FILE_ADDRESS: 536 emit_addrs( 537 func, 538 xmm, 539 reg->Register.Index, 540 chan_index ); 541 break; 542#endif 543 default: 544 assert( 0 ); 545 } 546 547#if 0 548 switch( inst->Instruction.Saturate ) { 549 case TGSI_SAT_NONE: 550 break; 551 552 case TGSI_SAT_ZERO_ONE: 553 /* assert( 0 ); */ 554 break; 555 556 case TGSI_SAT_MINUS_PLUS_ONE: 557 assert( 0 ); 558 break; 559 } 560#endif 561 562 if (free_vec) 563 ppc_release_vec_register(gen->f, src_vec); 564} 565 566 567static void 568emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 569{ 570 int v0, v1; 571 uint chan_index; 572 573 v0 = get_src_vec(gen, inst, 0, CHAN_X); 574 v1 = ppc_allocate_vec_register(gen->f); 575 576 switch (inst->Instruction.Opcode) { 577 case TGSI_OPCODE_RSQ: 578 /* v1 = 1.0 / sqrt(v0) */ 579 ppc_vrsqrtefp(gen->f, v1, v0); 580 break; 581 case TGSI_OPCODE_RCP: 582 /* v1 = 1.0 / v0 */ 583 ppc_vrefp(gen->f, v1, v0); 584 break; 585 default: 586 assert(0); 587 } 588 589 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { 590 emit_store(gen, v1, inst, chan_index, FALSE); 591 } 592 593 release_src_vecs(gen); 594 ppc_release_vec_register(gen->f, v1); 595} 596 597 598static void 599emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 600{ 601 uint chan_index; 602 603 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 604 int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ 605 int v1 = get_dst_vec(gen, inst, chan_index); 606 switch (inst->Instruction.Opcode) { 607 case TGSI_OPCODE_ABS: 608 /* turn off the most significant bit of each vector float word */ 609 { 610 int bit31_vec = gen_get_bit31_vec(gen); 611 ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */ 612 } 613 break; 614 case TGSI_OPCODE_FLR: 615 ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ 616 break; 617 case TGSI_OPCODE_FRC: 618 ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */ 619 ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */ 620 break; 621 case TGSI_OPCODE_EX2: 622 ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */ 623 break; 624 case TGSI_OPCODE_LG2: 625 /* XXX this may be broken! */ 626 ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ 627 break; 628 case TGSI_OPCODE_MOV: 629 if (v0 != v1) 630 ppc_vmove(gen->f, v1, v0); 631 break; 632 default: 633 assert(0); 634 } 635 emit_store(gen, v1, inst, chan_index, TRUE); /* store v0 */ 636 } 637 638 release_src_vecs(gen); 639} 640 641 642static void 643emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) 644{ 645 int zero_vec = -1; 646 uint chan; 647 648 if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { 649 zero_vec = ppc_allocate_vec_register(gen->f); 650 ppc_vzero(gen->f, zero_vec); 651 } 652 653 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 654 /* fetch src operands */ 655 int v0 = get_src_vec(gen, inst, 0, chan); 656 int v1 = get_src_vec(gen, inst, 1, chan); 657 int v2 = get_dst_vec(gen, inst, chan); 658 659 /* emit binop */ 660 switch (inst->Instruction.Opcode) { 661 case TGSI_OPCODE_ADD: 662 ppc_vaddfp(gen->f, v2, v0, v1); 663 break; 664 case TGSI_OPCODE_SUB: 665 ppc_vsubfp(gen->f, v2, v0, v1); 666 break; 667 case TGSI_OPCODE_MUL: 668 ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec); 669 break; 670 case TGSI_OPCODE_MIN: 671 ppc_vminfp(gen->f, v2, v0, v1); 672 break; 673 case TGSI_OPCODE_MAX: 674 ppc_vmaxfp(gen->f, v2, v0, v1); 675 break; 676 default: 677 assert(0); 678 } 679 680 /* store v2 */ 681 emit_store(gen, v2, inst, chan, TRUE); 682 } 683 684 if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) 685 ppc_release_vec_register(gen->f, zero_vec); 686 687 release_src_vecs(gen); 688} 689 690 691static void 692emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) 693{ 694 uint chan; 695 696 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 697 /* fetch src operands */ 698 int v0 = get_src_vec(gen, inst, 0, chan); 699 int v1 = get_src_vec(gen, inst, 1, chan); 700 int v2 = get_src_vec(gen, inst, 2, chan); 701 int v3 = get_dst_vec(gen, inst, chan); 702 703 /* emit ALU */ 704 switch (inst->Instruction.Opcode) { 705 case TGSI_OPCODE_MAD: 706 ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ 707 break; 708 case TGSI_OPCODE_LRP: 709 ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ 710 ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ 711 break; 712 default: 713 assert(0); 714 } 715 716 /* store v3 */ 717 emit_store(gen, v3, inst, chan, TRUE); 718 } 719 720 release_src_vecs(gen); 721} 722 723 724/** 725 * Vector comparisons, resulting in 1.0 or 0.0 values. 726 */ 727static void 728emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) 729{ 730 uint chan; 731 int one_vec = gen_one_vec(gen); 732 733 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 734 /* fetch src operands */ 735 int v0 = get_src_vec(gen, inst, 0, chan); 736 int v1 = get_src_vec(gen, inst, 1, chan); 737 int v2 = get_dst_vec(gen, inst, chan); 738 boolean complement = FALSE; 739 740 switch (inst->Instruction.Opcode) { 741 case TGSI_OPCODE_SNE: 742 complement = TRUE; 743 /* fall-through */ 744 case TGSI_OPCODE_SEQ: 745 ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ 746 break; 747 748 case TGSI_OPCODE_SGE: 749 complement = TRUE; 750 /* fall-through */ 751 case TGSI_OPCODE_SLT: 752 ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ 753 break; 754 755 case TGSI_OPCODE_SLE: 756 complement = TRUE; 757 /* fall-through */ 758 case TGSI_OPCODE_SGT: 759 ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ 760 break; 761 default: 762 assert(0); 763 } 764 765 /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ 766 767 if (complement) 768 ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ 769 else 770 ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ 771 772 /* store v2 */ 773 emit_store(gen, v2, inst, chan, TRUE); 774 } 775 776 release_src_vecs(gen); 777} 778 779 780static void 781emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) 782{ 783 int v0, v1, v2; 784 uint chan_index; 785 786 v2 = ppc_allocate_vec_register(gen->f); 787 788 ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */ 789 790 v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */ 791 v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */ 792 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 793 794 v0 = get_src_vec(gen, inst, 0, CHAN_Y); /* v0 = src0.YYYY */ 795 v1 = get_src_vec(gen, inst, 1, CHAN_Y); /* v1 = src1.YYYY */ 796 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 797 798 v0 = get_src_vec(gen, inst, 0, CHAN_Z); /* v0 = src0.ZZZZ */ 799 v1 = get_src_vec(gen, inst, 1, CHAN_Z); /* v1 = src1.ZZZZ */ 800 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 801 802 if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { 803 v0 = get_src_vec(gen, inst, 0, CHAN_W); /* v0 = src0.WWWW */ 804 v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ 805 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 806 } 807 else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { 808 v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ 809 ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ 810 } 811 812 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 813 emit_store(gen, v2, inst, chan_index, FALSE); /* store v2, free v2 later */ 814 } 815 816 release_src_vecs(gen); 817 818 ppc_release_vec_register(gen->f, v2); 819} 820 821 822/** Approximation for vr = pow(va, vb) */ 823static void 824ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) 825{ 826 /* pow(a,b) ~= exp2(log2(a) * b) */ 827 int t_vec = ppc_allocate_vec_register(f); 828 int zero_vec = ppc_allocate_vec_register(f); 829 830 ppc_vzero(f, zero_vec); 831 832 ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ 833 ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */ 834 ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ 835 836 ppc_release_vec_register(f, t_vec); 837 ppc_release_vec_register(f, zero_vec); 838} 839 840 841static void 842emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) 843{ 844 int one_vec = gen_one_vec(gen); 845 846 /* Compute X */ 847 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 848 emit_store(gen, one_vec, inst, CHAN_X, FALSE); 849 } 850 851 /* Compute Y, Z */ 852 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || 853 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 854 int x_vec; 855 int zero_vec = ppc_allocate_vec_register(gen->f); 856 857 x_vec = get_src_vec(gen, inst, 0, CHAN_X); /* x_vec = src[0].x */ 858 859 ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ 860 ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ 861 862 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 863 emit_store(gen, x_vec, inst, CHAN_Y, FALSE); 864 } 865 866 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 867 int y_vec, w_vec; 868 int z_vec = ppc_allocate_vec_register(gen->f); 869 int pow_vec = ppc_allocate_vec_register(gen->f); 870 int pos_vec = ppc_allocate_vec_register(gen->f); 871 int p128_vec = ppc_allocate_vec_register(gen->f); 872 int n128_vec = ppc_allocate_vec_register(gen->f); 873 874 y_vec = get_src_vec(gen, inst, 0, CHAN_Y); /* y_vec = src[0].y */ 875 ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ 876 877 w_vec = get_src_vec(gen, inst, 0, CHAN_W); /* w_vec = src[0].w */ 878 879 /* clamp W to [-128, 128] */ 880 load_constant_vec(gen, p128_vec, 128.0f); 881 load_constant_vec(gen, n128_vec, -128.0f); 882 ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */ 883 ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */ 884 885 /* if temp.x > 0 886 * z = pow(tmp.y, tmp.w) 887 * else 888 * z = 0.0 889 */ 890 ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ 891 ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ 892 ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ 893 894 emit_store(gen, z_vec, inst, CHAN_Z, FALSE); 895 896 ppc_release_vec_register(gen->f, z_vec); 897 ppc_release_vec_register(gen->f, pow_vec); 898 ppc_release_vec_register(gen->f, pos_vec); 899 ppc_release_vec_register(gen->f, p128_vec); 900 ppc_release_vec_register(gen->f, n128_vec); 901 } 902 903 ppc_release_vec_register(gen->f, zero_vec); 904 } 905 906 /* Compute W */ 907 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 908 emit_store(gen, one_vec, inst, CHAN_W, FALSE); 909 } 910 911 release_src_vecs(gen); 912} 913 914 915static void 916emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst) 917{ 918 const int one_vec = gen_one_vec(gen); 919 int src_vec; 920 921 /* get src arg */ 922 src_vec = get_src_vec(gen, inst, 0, CHAN_X); 923 924 /* Compute X = 2^floor(src) */ 925 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 926 int dst_vec = get_dst_vec(gen, inst, CHAN_X); 927 int tmp_vec = ppc_allocate_vec_register(gen->f); 928 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ 929 ppc_vexptefp(gen->f, dst_vec, tmp_vec); /* dst = 2 ^ tmp */ 930 emit_store(gen, dst_vec, inst, CHAN_X, TRUE); 931 ppc_release_vec_register(gen->f, tmp_vec); 932 } 933 934 /* Compute Y = src - floor(src) */ 935 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 936 int dst_vec = get_dst_vec(gen, inst, CHAN_Y); 937 int tmp_vec = ppc_allocate_vec_register(gen->f); 938 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ 939 ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec); /* dst = src - tmp */ 940 emit_store(gen, dst_vec, inst, CHAN_Y, TRUE); 941 ppc_release_vec_register(gen->f, tmp_vec); 942 } 943 944 /* Compute Z = RoughApprox2ToX(src) */ 945 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 946 int dst_vec = get_dst_vec(gen, inst, CHAN_Z); 947 ppc_vexptefp(gen->f, dst_vec, src_vec); /* dst = 2 ^ src */ 948 emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); 949 } 950 951 /* Compute W = 1.0 */ 952 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 953 emit_store(gen, one_vec, inst, CHAN_W, FALSE); 954 } 955 956 release_src_vecs(gen); 957} 958 959 960static void 961emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst) 962{ 963 const int bit31_vec = gen_get_bit31_vec(gen); 964 const int one_vec = gen_one_vec(gen); 965 int src_vec, abs_vec; 966 967 /* get src arg */ 968 src_vec = get_src_vec(gen, inst, 0, CHAN_X); 969 970 /* compute abs(src) */ 971 abs_vec = ppc_allocate_vec_register(gen->f); 972 ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec); /* abs = src & ~bit31 */ 973 974 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && 975 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 976 977 /* compute tmp = floor(log2(abs)) */ 978 int tmp_vec = ppc_allocate_vec_register(gen->f); 979 ppc_vlogefp(gen->f, tmp_vec, abs_vec); /* tmp = log2(abs) */ 980 ppc_vrfim(gen->f, tmp_vec, tmp_vec); /* tmp = floor(tmp); */ 981 982 /* Compute X = tmp */ 983 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 984 emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); 985 } 986 987 /* Compute Y = abs / 2^tmp */ 988 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 989 const int zero_vec = ppc_allocate_vec_register(gen->f); 990 ppc_vzero(gen->f, zero_vec); 991 ppc_vexptefp(gen->f, tmp_vec, tmp_vec); /* tmp = 2 ^ tmp */ 992 ppc_vrefp(gen->f, tmp_vec, tmp_vec); /* tmp = 1 / tmp */ 993 /* tmp = abs * tmp + zero */ 994 ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec); 995 emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); 996 ppc_release_vec_register(gen->f, zero_vec); 997 } 998 999 ppc_release_vec_register(gen->f, tmp_vec); 1000 } 1001 1002 /* Compute Z = RoughApproxLog2(abs) */ 1003 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1004 int dst_vec = get_dst_vec(gen, inst, CHAN_Z); 1005 ppc_vlogefp(gen->f, dst_vec, abs_vec); /* dst = log2(abs) */ 1006 emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); 1007 } 1008 1009 /* Compute W = 1.0 */ 1010 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 1011 emit_store(gen, one_vec, inst, CHAN_W, FALSE); 1012 } 1013 1014 ppc_release_vec_register(gen->f, abs_vec); 1015 release_src_vecs(gen); 1016} 1017 1018 1019static void 1020emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst) 1021{ 1022 int s0_vec = get_src_vec(gen, inst, 0, CHAN_X); 1023 int s1_vec = get_src_vec(gen, inst, 1, CHAN_X); 1024 int pow_vec = ppc_allocate_vec_register(gen->f); 1025 int chan; 1026 1027 ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec); 1028 1029 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 1030 emit_store(gen, pow_vec, inst, chan, FALSE); 1031 } 1032 1033 ppc_release_vec_register(gen->f, pow_vec); 1034 1035 release_src_vecs(gen); 1036} 1037 1038 1039static void 1040emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst) 1041{ 1042 int x0_vec, y0_vec, z0_vec; 1043 int x1_vec, y1_vec, z1_vec; 1044 int zero_vec, tmp_vec; 1045 int tmp2_vec; 1046 1047 zero_vec = ppc_allocate_vec_register(gen->f); 1048 ppc_vzero(gen->f, zero_vec); 1049 1050 tmp_vec = ppc_allocate_vec_register(gen->f); 1051 tmp2_vec = ppc_allocate_vec_register(gen->f); 1052 1053 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || 1054 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1055 x0_vec = get_src_vec(gen, inst, 0, CHAN_X); 1056 x1_vec = get_src_vec(gen, inst, 1, CHAN_X); 1057 } 1058 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || 1059 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1060 y0_vec = get_src_vec(gen, inst, 0, CHAN_Y); 1061 y1_vec = get_src_vec(gen, inst, 1, CHAN_Y); 1062 } 1063 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || 1064 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 1065 z0_vec = get_src_vec(gen, inst, 0, CHAN_Z); 1066 z1_vec = get_src_vec(gen, inst, 1, CHAN_Z); 1067 } 1068 1069 IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) { 1070 /* tmp = y0 * z1 */ 1071 ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec); 1072 /* tmp = tmp - z0 * y1*/ 1073 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec); 1074 emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); 1075 } 1076 IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) { 1077 /* tmp = z0 * x1 */ 1078 ppc_vmaddfp(gen->f, tmp_vec, z0_vec, x1_vec, zero_vec); 1079 /* tmp = tmp - x0 * z1 */ 1080 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec); 1081 emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); 1082 } 1083 IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) { 1084 /* tmp = x0 * y1 */ 1085 ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec); 1086 /* tmp = tmp - y0 * x1 */ 1087 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec); 1088 emit_store(gen, tmp_vec, inst, CHAN_Z, FALSE); 1089 } 1090 /* W is undefined */ 1091 1092 ppc_release_vec_register(gen->f, tmp_vec); 1093 ppc_release_vec_register(gen->f, zero_vec); 1094 release_src_vecs(gen); 1095} 1096 1097static int 1098emit_instruction(struct gen_context *gen, 1099 struct tgsi_full_instruction *inst) 1100{ 1101 1102 /* we don't handle saturation/clamping yet */ 1103 if (inst->Instruction.Saturate != TGSI_SAT_NONE) 1104 return 0; 1105 1106 /* need to use extra temps to fix SOA dependencies : */ 1107 if (tgsi_check_soa_dependencies(inst)) 1108 return FALSE; 1109 1110 switch (inst->Instruction.Opcode) { 1111 case TGSI_OPCODE_MOV: 1112 case TGSI_OPCODE_ABS: 1113 case TGSI_OPCODE_FLR: 1114 case TGSI_OPCODE_FRC: 1115 case TGSI_OPCODE_EX2: 1116 case TGSI_OPCODE_LG2: 1117 emit_unaryop(gen, inst); 1118 break; 1119 case TGSI_OPCODE_RSQ: 1120 case TGSI_OPCODE_RCP: 1121 emit_scalar_unaryop(gen, inst); 1122 break; 1123 case TGSI_OPCODE_ADD: 1124 case TGSI_OPCODE_SUB: 1125 case TGSI_OPCODE_MUL: 1126 case TGSI_OPCODE_MIN: 1127 case TGSI_OPCODE_MAX: 1128 emit_binop(gen, inst); 1129 break; 1130 case TGSI_OPCODE_SEQ: 1131 case TGSI_OPCODE_SNE: 1132 case TGSI_OPCODE_SLT: 1133 case TGSI_OPCODE_SGT: 1134 case TGSI_OPCODE_SLE: 1135 case TGSI_OPCODE_SGE: 1136 emit_inequality(gen, inst); 1137 break; 1138 case TGSI_OPCODE_MAD: 1139 case TGSI_OPCODE_LRP: 1140 emit_triop(gen, inst); 1141 break; 1142 case TGSI_OPCODE_DP3: 1143 case TGSI_OPCODE_DP4: 1144 case TGSI_OPCODE_DPH: 1145 emit_dotprod(gen, inst); 1146 break; 1147 case TGSI_OPCODE_LIT: 1148 emit_lit(gen, inst); 1149 break; 1150 case TGSI_OPCODE_LOG: 1151 emit_log(gen, inst); 1152 break; 1153 case TGSI_OPCODE_EXP: 1154 emit_exp(gen, inst); 1155 break; 1156 case TGSI_OPCODE_POW: 1157 emit_pow(gen, inst); 1158 break; 1159 case TGSI_OPCODE_XPD: 1160 emit_xpd(gen, inst); 1161 break; 1162 case TGSI_OPCODE_END: 1163 /* normal end */ 1164 return 1; 1165 default: 1166 return 0; 1167 } 1168 return 1; 1169} 1170 1171 1172static void 1173emit_declaration( 1174 struct ppc_function *func, 1175 struct tgsi_full_declaration *decl ) 1176{ 1177 if( decl->Declaration.File == TGSI_FILE_INPUT || 1178 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE ) { 1179#if 0 1180 unsigned first, last, mask; 1181 unsigned i, j; 1182 1183 first = decl->Range.First; 1184 last = decl->Range.Last; 1185 mask = decl->Declaration.UsageMask; 1186 1187 for( i = first; i <= last; i++ ) { 1188 for( j = 0; j < NUM_CHANNELS; j++ ) { 1189 if( mask & (1 << j) ) { 1190 switch( decl->Declaration.Interpolate ) { 1191 case TGSI_INTERPOLATE_CONSTANT: 1192 emit_coef_a0( func, 0, i, j ); 1193 emit_inputs( func, 0, i, j ); 1194 break; 1195 1196 case TGSI_INTERPOLATE_LINEAR: 1197 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 1198 emit_coef_dadx( func, 1, i, j ); 1199 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 1200 emit_coef_dady( func, 3, i, j ); 1201 emit_mul( func, 0, 1 ); /* x * dadx */ 1202 emit_coef_a0( func, 4, i, j ); 1203 emit_mul( func, 2, 3 ); /* y * dady */ 1204 emit_add( func, 0, 4 ); /* x * dadx + a0 */ 1205 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 1206 emit_inputs( func, 0, i, j ); 1207 break; 1208 1209 case TGSI_INTERPOLATE_PERSPECTIVE: 1210 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 1211 emit_coef_dadx( func, 1, i, j ); 1212 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 1213 emit_coef_dady( func, 3, i, j ); 1214 emit_mul( func, 0, 1 ); /* x * dadx */ 1215 emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); 1216 emit_coef_a0( func, 5, i, j ); 1217 emit_rcp( func, 4, 4 ); /* 1.0 / w */ 1218 emit_mul( func, 2, 3 ); /* y * dady */ 1219 emit_add( func, 0, 5 ); /* x * dadx + a0 */ 1220 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 1221 emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ 1222 emit_inputs( func, 0, i, j ); 1223 break; 1224 1225 default: 1226 assert( 0 ); 1227 break; 1228 } 1229 } 1230 } 1231 } 1232#endif 1233 } 1234} 1235 1236 1237 1238static void 1239emit_prologue(struct ppc_function *func) 1240{ 1241 /* XXX set up stack frame */ 1242} 1243 1244 1245static void 1246emit_epilogue(struct ppc_function *func) 1247{ 1248 ppc_comment(func, -4, "Epilogue:"); 1249 ppc_return(func); 1250 /* XXX restore prev stack frame */ 1251#if 0 1252 debug_printf("PPC: Emitted %u instructions\n", func->num_inst); 1253#endif 1254} 1255 1256 1257 1258/** 1259 * Translate a TGSI vertex/fragment shader to PPC code. 1260 * 1261 * \param tokens the TGSI input shader 1262 * \param func the output PPC code/function 1263 * \param immediates buffer to place immediates, later passed to PPC func 1264 * \return TRUE for success, FALSE if translation failed 1265 */ 1266boolean 1267tgsi_emit_ppc(const struct tgsi_token *tokens, 1268 struct ppc_function *func, 1269 float (*immediates)[4], 1270 boolean do_swizzles ) 1271{ 1272 static int use_ppc_asm = -1; 1273 struct tgsi_parse_context parse; 1274 /*boolean instruction_phase = FALSE;*/ 1275 unsigned ok = 1; 1276 uint num_immediates = 0; 1277 struct gen_context gen; 1278 uint ic = 0; 1279 1280 if (use_ppc_asm < 0) { 1281 /* If GALLIUM_NOPPC is set, don't use PPC codegen */ 1282 use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE); 1283 } 1284 if (!use_ppc_asm) 1285 return FALSE; 1286 1287 if (0) { 1288 debug_printf("\n********* TGSI->PPC ********\n"); 1289 tgsi_dump(tokens, 0); 1290 } 1291 1292 util_init_math(); 1293 1294 init_gen_context(&gen, func); 1295 1296 emit_prologue(func); 1297 1298 tgsi_parse_init( &parse, tokens ); 1299 1300 while (!tgsi_parse_end_of_tokens(&parse) && ok) { 1301 tgsi_parse_token(&parse); 1302 1303 switch (parse.FullToken.Token.Type) { 1304 case TGSI_TOKEN_TYPE_DECLARATION: 1305 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { 1306 emit_declaration(func, &parse.FullToken.FullDeclaration ); 1307 } 1308 break; 1309 1310 case TGSI_TOKEN_TYPE_INSTRUCTION: 1311 if (func->print) { 1312 _debug_printf("# "); 1313 ic++; 1314 tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); 1315 } 1316 1317 ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); 1318 1319 if (!ok) { 1320 uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 1321 debug_printf("failed to translate tgsi opcode %d (%s) to PPC (%s)\n", 1322 opcode, 1323 tgsi_get_opcode_name(opcode), 1324 parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? 1325 "vertex shader" : "fragment shader"); 1326 } 1327 break; 1328 1329 case TGSI_TOKEN_TYPE_IMMEDIATE: 1330 /* splat each immediate component into a float[4] vector for SoA */ 1331 { 1332 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1333 uint i; 1334 assert(size <= 4); 1335 assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); 1336 for (i = 0; i < size; i++) { 1337 immediates[num_immediates][i] = 1338 parse.FullToken.FullImmediate.u[i].Float; 1339 } 1340 num_immediates++; 1341 } 1342 break; 1343 1344 case TGSI_TOKEN_TYPE_PROPERTY: 1345 break; 1346 1347 default: 1348 ok = 0; 1349 assert( 0 ); 1350 } 1351 } 1352 1353 emit_epilogue(func); 1354 1355 tgsi_parse_free( &parse ); 1356 1357 if (ppc_num_instructions(func) == 0) { 1358 /* ran out of memory for instructions */ 1359 ok = FALSE; 1360 } 1361 1362 if (!ok) 1363 debug_printf("TGSI->PPC translation failed\n"); 1364 1365 return ok; 1366} 1367 1368#endif /* PIPE_ARCH_PPC */ 1369