tgsi_ppc.c revision 2acf07983f8bb134d639c9e652e7e0e3307e20f3
1/************************************************************************** 2 * 3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI to PowerPC code generation. 30 */ 31 32#include "pipe/p_config.h" 33 34#if defined(PIPE_ARCH_PPC) 35 36#include "pipe/p_debug.h" 37#include "pipe/p_shader_tokens.h" 38#include "util/u_math.h" 39#include "util/u_memory.h" 40#include "util/u_sse.h" 41#include "tgsi/tgsi_parse.h" 42#include "tgsi/tgsi_util.h" 43#include "tgsi_dump.h" 44#include "tgsi_exec.h" 45#include "tgsi_ppc.h" 46#include "rtasm/rtasm_ppc.h" 47 48 49/** 50 * Since it's pretty much impossible to form PPC vector immediates, load 51 * them from memory here: 52 */ 53const float ppc_builtin_constants[] ALIGN16_ATTRIB = { 54 1.0f, -128.0f, 128.0, 0.0 55}; 56 57 58#define FOR_EACH_CHANNEL( CHAN )\ 59 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 60 61#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 62 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 63 64#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 65 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 66 67#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 68 FOR_EACH_CHANNEL( CHAN )\ 69 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 70 71#define CHAN_X 0 72#define CHAN_Y 1 73#define CHAN_Z 2 74#define CHAN_W 3 75 76 77/** 78 * How many TGSI temps should be implemented with real PPC vector registers 79 * rather than memory. 80 */ 81#define MAX_PPC_TEMPS 3 82 83 84/** 85 * Context/state used during code gen. 86 */ 87struct gen_context 88{ 89 struct ppc_function *f; 90 int inputs_reg; /**< GP register pointing to input params */ 91 int outputs_reg; /**< GP register pointing to output params */ 92 int temps_reg; /**< GP register pointing to temporary "registers" */ 93 int immed_reg; /**< GP register pointing to immediates buffer */ 94 int const_reg; /**< GP register pointing to constants buffer */ 95 int builtins_reg; /**< GP register pointint to built-in constants */ 96 97 int offset_reg; /**< used to reduce redundant li instructions */ 98 int offset_value; 99 100 int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ 101 int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ 102 103 /** 104 * Map TGSI temps to PPC vector temps. 105 * We have 32 PPC vector regs. Use 16 of them for storing 4 TGSI temps. 106 * XXX currently only do this for TGSI temps [0..MAX_PPC_TEMPS-1]. 107 */ 108 int temps_map[MAX_PPC_TEMPS][4]; 109 110 /** 111 * Cache of src registers. 112 * This is used to avoid redundant load instructions. 113 */ 114 struct { 115 struct tgsi_full_src_register src; 116 uint chan; 117 uint vec; 118 } regs[12]; /* 3 src regs, 4 channels */ 119 uint num_regs; 120}; 121 122 123/** 124 * Initialize code generation context. 125 */ 126static void 127init_gen_context(struct gen_context *gen, struct ppc_function *func) 128{ 129 uint i; 130 131 memset(gen, 0, sizeof(*gen)); 132 gen->f = func; 133 gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */ 134 gen->outputs_reg = ppc_reserve_register(func, 4); /* second function param */ 135 gen->temps_reg = ppc_reserve_register(func, 5); /* ... */ 136 gen->immed_reg = ppc_reserve_register(func, 6); 137 gen->const_reg = ppc_reserve_register(func, 7); 138 gen->builtins_reg = ppc_reserve_register(func, 8); 139 gen->one_vec = -1; 140 gen->bit31_vec = -1; 141 gen->offset_reg = -1; 142 gen->offset_value = -9999999; 143 for (i = 0; i < MAX_PPC_TEMPS; i++) { 144 gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f); 145 gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f); 146 gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f); 147 gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f); 148 } 149} 150 151 152/** 153 * Is the given TGSI register stored as a real PPC vector register? 154 */ 155static boolean 156is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) 157{ 158 return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && 159 reg->SrcRegister.Index < MAX_PPC_TEMPS); 160} 161 162 163/** 164 * Is the given TGSI register stored as a real PPC vector register? 165 */ 166static boolean 167is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) 168{ 169 return (reg->DstRegister.File == TGSI_FILE_TEMPORARY && 170 reg->DstRegister.Index < MAX_PPC_TEMPS); 171} 172 173 174 175/** 176 * All PPC vector load/store instructions form an effective address 177 * by adding the contents of two registers. For example: 178 * lvx v2,r8,r9 # v2 = memory[r8 + r9] 179 * stvx v2,r8,r9 # memory[r8 + r9] = v2; 180 * So our lvx/stvx instructions are typically preceded by an 'li' instruction 181 * to load r9 (above) with an immediate (an offset). 182 * This code emits that 'li' instruction, but only if the offset value is 183 * different than the previous 'li'. 184 * This optimization seems to save about 10% in the instruction count. 185 * Note that we need to unconditionally emit an 'li' inside basic blocks 186 * (such as inside loops). 187 */ 188static int 189emit_li_offset(struct gen_context *gen, int offset) 190{ 191 if (gen->offset_reg <= 0) { 192 /* allocate a GP register for storing load/store offset */ 193 gen->offset_reg = ppc_allocate_register(gen->f); 194 } 195 196 /* emit new 'li' if offset is changing */ 197 if (gen->offset_value < 0 || gen->offset_value != offset) { 198 gen->offset_value = offset; 199 ppc_li(gen->f, gen->offset_reg, offset); 200 } 201 202 return gen->offset_reg; 203} 204 205 206/** 207 * Forces subsequent emit_li_offset() calls to emit an 'li'. 208 * To be called at the top of basic blocks. 209 */ 210static void 211reset_li_offset(struct gen_context *gen) 212{ 213 gen->offset_value = -9999999; 214} 215 216 217 218/** 219 * Load the given vector register with {value, value, value, value}. 220 * The value must be in the ppu_builtin_constants[] array. 221 * We wouldn't need this if there was a simple way to load PPC vector 222 * registers with immediate values! 223 */ 224static void 225load_constant_vec(struct gen_context *gen, int dst_vec, float value) 226{ 227 uint pos; 228 for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { 229 if (ppc_builtin_constants[pos] == value) { 230 int offset = pos * 4; 231 int offset_reg = emit_li_offset(gen, offset); 232 233 /* Load 4-byte word into vector register. 234 * The vector slot depends on the effective address we load from. 235 * We know that our builtins start at a 16-byte boundary so we 236 * know that 'swizzle' tells us which vector slot will have the 237 * loaded word. The other vector slots will be undefined. 238 */ 239 ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); 240 /* splat word[pos % 4] across the vector reg */ 241 ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); 242 return; 243 } 244 } 245 assert(0 && "Need to add new constant to ppc_builtin_constants array"); 246} 247 248 249/** 250 * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. 251 */ 252static int 253gen_one_vec(struct gen_context *gen) 254{ 255 if (gen->one_vec < 0) { 256 gen->one_vec = ppc_allocate_vec_register(gen->f); 257 load_constant_vec(gen, gen->one_vec, 1.0f); 258 } 259 return gen->one_vec; 260} 261 262/** 263 * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. 264 */ 265static int 266gen_get_bit31_vec(struct gen_context *gen) 267{ 268 if (gen->bit31_vec < 0) { 269 gen->bit31_vec = ppc_allocate_vec_register(gen->f); 270 ppc_vspltisw(gen->f, gen->bit31_vec, -1); 271 ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); 272 } 273 return gen->bit31_vec; 274} 275 276 277/** 278 * Register fetch. Return PPC vector register with result. 279 */ 280static int 281emit_fetch(struct gen_context *gen, 282 const struct tgsi_full_src_register *reg, 283 const unsigned chan_index) 284{ 285 uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index); 286 int dst_vec = -1; 287 288 switch (swizzle) { 289 case TGSI_EXTSWIZZLE_X: 290 case TGSI_EXTSWIZZLE_Y: 291 case TGSI_EXTSWIZZLE_Z: 292 case TGSI_EXTSWIZZLE_W: 293 switch (reg->SrcRegister.File) { 294 case TGSI_FILE_INPUT: 295 { 296 int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; 297 int offset_reg = emit_li_offset(gen, offset); 298 dst_vec = ppc_allocate_vec_register(gen->f); 299 ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); 300 } 301 break; 302 case TGSI_FILE_TEMPORARY: 303 if (is_ppc_vec_temporary(reg)) { 304 /* use PPC vec register */ 305 dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle]; 306 } 307 else { 308 /* use memory-based temp register "file" */ 309 int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; 310 int offset_reg = emit_li_offset(gen, offset); 311 dst_vec = ppc_allocate_vec_register(gen->f); 312 ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); 313 } 314 break; 315 case TGSI_FILE_IMMEDIATE: 316 { 317 int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; 318 int offset_reg = emit_li_offset(gen, offset); 319 dst_vec = ppc_allocate_vec_register(gen->f); 320 /* Load 4-byte word into vector register. 321 * The vector slot depends on the effective address we load from. 322 * We know that our immediates start at a 16-byte boundary so we 323 * know that 'swizzle' tells us which vector slot will have the 324 * loaded word. The other vector slots will be undefined. 325 */ 326 ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg); 327 /* splat word[swizzle] across the vector reg */ 328 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 329 } 330 break; 331 case TGSI_FILE_CONSTANT: 332 { 333 int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; 334 int offset_reg = emit_li_offset(gen, offset); 335 dst_vec = ppc_allocate_vec_register(gen->f); 336 /* Load 4-byte word into vector register. 337 * The vector slot depends on the effective address we load from. 338 * We know that our constants start at a 16-byte boundary so we 339 * know that 'swizzle' tells us which vector slot will have the 340 * loaded word. The other vector slots will be undefined. 341 */ 342 ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); 343 /* splat word[swizzle] across the vector reg */ 344 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 345 } 346 break; 347 default: 348 assert( 0 ); 349 } 350 break; 351 case TGSI_EXTSWIZZLE_ZERO: 352 ppc_vzero(gen->f, dst_vec); 353 break; 354 case TGSI_EXTSWIZZLE_ONE: 355 { 356 int one_vec = gen_one_vec(gen); 357 dst_vec = ppc_allocate_vec_register(gen->f); 358 ppc_vmove(gen->f, dst_vec, one_vec); 359 } 360 break; 361 default: 362 assert( 0 ); 363 } 364 365 assert(dst_vec >= 0); 366 367 { 368 uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); 369 if (sign_op != TGSI_UTIL_SIGN_KEEP) { 370 int bit31_vec = gen_get_bit31_vec(gen); 371 int dst_vec2; 372 373 if (is_ppc_vec_temporary(reg)) { 374 /* need to use a new temp */ 375 dst_vec2 = ppc_allocate_vec_register(gen->f); 376 } 377 else { 378 dst_vec2 = dst_vec; 379 } 380 381 switch (sign_op) { 382 case TGSI_UTIL_SIGN_CLEAR: 383 /* vec = vec & ~bit31 */ 384 ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec); 385 break; 386 case TGSI_UTIL_SIGN_SET: 387 /* vec = vec | bit31 */ 388 ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec); 389 break; 390 case TGSI_UTIL_SIGN_TOGGLE: 391 /* vec = vec ^ bit31 */ 392 ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec); 393 break; 394 default: 395 assert(0); 396 } 397 return dst_vec2; 398 } 399 } 400 401 return dst_vec; 402} 403 404 405 406/** 407 * Test if two TGSI src registers refer to the same memory location. 408 * We use this to avoid redundant register loads. 409 */ 410static boolean 411equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, 412 const struct tgsi_full_src_register *b, uint chan_b) 413{ 414 int swz_a, swz_b; 415 int sign_a, sign_b; 416 if (a->SrcRegister.File != b->SrcRegister.File) 417 return FALSE; 418 if (a->SrcRegister.Index != b->SrcRegister.Index) 419 return FALSE; 420 swz_a = tgsi_util_get_full_src_register_extswizzle(a, chan_a); 421 swz_b = tgsi_util_get_full_src_register_extswizzle(b, chan_b); 422 if (swz_a != swz_b) 423 return FALSE; 424 sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a); 425 sign_b = tgsi_util_get_full_src_register_sign_mode(b, chan_b); 426 if (sign_a != sign_b) 427 return FALSE; 428 return TRUE; 429} 430 431 432/** 433 * Given a TGSI src register and channel index, return the PPC vector 434 * register containing the value. We use a cache to prevent re-loading 435 * the same register multiple times. 436 * \return index of PPC vector register with the desired src operand 437 */ 438static int 439get_src_vec(struct gen_context *gen, 440 struct tgsi_full_instruction *inst, int src_reg, uint chan) 441{ 442 const const struct tgsi_full_src_register *src = 443 &inst->FullSrcRegisters[src_reg]; 444 int vec; 445 uint i; 446 447 /* check the cache */ 448 for (i = 0; i < gen->num_regs; i++) { 449 if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) { 450 /* cache hit */ 451 assert(gen->regs[i].vec >= 0); 452 return gen->regs[i].vec; 453 } 454 } 455 456 /* cache miss: allocate new vec reg and emit fetch/load code */ 457 vec = emit_fetch(gen, src, chan); 458 gen->regs[gen->num_regs].src = *src; 459 gen->regs[gen->num_regs].chan = chan; 460 gen->regs[gen->num_regs].vec = vec; 461 gen->num_regs++; 462 463 assert(gen->num_regs <= Elements(gen->regs)); 464 465 assert(vec >= 0); 466 467 return vec; 468} 469 470 471/** 472 * Clear the src operand cache. To be called at the end of each emit function. 473 */ 474static void 475release_src_vecs(struct gen_context *gen) 476{ 477 uint i; 478 for (i = 0; i < gen->num_regs; i++) { 479 const const struct tgsi_full_src_register src = gen->regs[i].src; 480 if (!is_ppc_vec_temporary(&src)) { 481 ppc_release_vec_register(gen->f, gen->regs[i].vec); 482 } 483 } 484 gen->num_regs = 0; 485} 486 487 488 489static int 490get_dst_vec(struct gen_context *gen, 491 const struct tgsi_full_instruction *inst, 492 unsigned chan_index) 493{ 494 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; 495 496 if (is_ppc_vec_temporary_dst(reg)) { 497 int vec = gen->temps_map[reg->DstRegister.Index][chan_index]; 498 return vec; 499 } 500 else { 501 return ppc_allocate_vec_register(gen->f); 502 } 503} 504 505 506/** 507 * Register store. Store 'src_vec' at location indicated by 'reg'. 508 * \param free_vec Should the src_vec be released when done? 509 */ 510static void 511emit_store(struct gen_context *gen, 512 int src_vec, 513 const struct tgsi_full_instruction *inst, 514 unsigned chan_index, 515 boolean free_vec) 516{ 517 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; 518 519 switch (reg->DstRegister.File) { 520 case TGSI_FILE_OUTPUT: 521 { 522 int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; 523 int offset_reg = emit_li_offset(gen, offset); 524 ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); 525 } 526 break; 527 case TGSI_FILE_TEMPORARY: 528 if (is_ppc_vec_temporary_dst(reg)) { 529 if (!free_vec) { 530 int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index]; 531 if (dst_vec != src_vec) 532 ppc_vmove(gen->f, dst_vec, src_vec); 533 } 534 free_vec = FALSE; 535 } 536 else { 537 int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; 538 int offset_reg = emit_li_offset(gen, offset); 539 ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); 540 } 541 break; 542#if 0 543 case TGSI_FILE_ADDRESS: 544 emit_addrs( 545 func, 546 xmm, 547 reg->DstRegister.Index, 548 chan_index ); 549 break; 550#endif 551 default: 552 assert( 0 ); 553 } 554 555#if 0 556 switch( inst->Instruction.Saturate ) { 557 case TGSI_SAT_NONE: 558 break; 559 560 case TGSI_SAT_ZERO_ONE: 561 /* assert( 0 ); */ 562 break; 563 564 case TGSI_SAT_MINUS_PLUS_ONE: 565 assert( 0 ); 566 break; 567 } 568#endif 569 570 if (free_vec) 571 ppc_release_vec_register(gen->f, src_vec); 572} 573 574 575static void 576emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 577{ 578 int v0, v1; 579 uint chan_index; 580 581 v0 = get_src_vec(gen, inst, 0, CHAN_X); 582 v1 = ppc_allocate_vec_register(gen->f); 583 584 switch (inst->Instruction.Opcode) { 585 case TGSI_OPCODE_RSQ: 586 /* v1 = 1.0 / sqrt(v0) */ 587 ppc_vrsqrtefp(gen->f, v1, v0); 588 break; 589 case TGSI_OPCODE_RCP: 590 /* v1 = 1.0 / v0 */ 591 ppc_vrefp(gen->f, v1, v0); 592 break; 593 default: 594 assert(0); 595 } 596 597 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { 598 emit_store(gen, v1, inst, chan_index, FALSE); 599 } 600 601 release_src_vecs(gen); 602 ppc_release_vec_register(gen->f, v1); 603} 604 605 606static void 607emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 608{ 609 uint chan_index; 610 611 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 612 int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ 613 int v1 = get_dst_vec(gen, inst, chan_index); 614 switch (inst->Instruction.Opcode) { 615 case TGSI_OPCODE_ABS: 616 /* turn off the most significant bit of each vector float word */ 617 { 618 int bit31_vec = gen_get_bit31_vec(gen); 619 ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */ 620 } 621 break; 622 case TGSI_OPCODE_FLOOR: 623 ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ 624 break; 625 case TGSI_OPCODE_FRAC: 626 ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */ 627 ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */ 628 break; 629 case TGSI_OPCODE_EXPBASE2: 630 ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */ 631 break; 632 case TGSI_OPCODE_LOGBASE2: 633 /* XXX this may be broken! */ 634 ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ 635 break; 636 case TGSI_OPCODE_MOV: 637 case TGSI_OPCODE_SWZ: 638 if (v0 != v1) 639 ppc_vmove(gen->f, v1, v0); 640 break; 641 default: 642 assert(0); 643 } 644 emit_store(gen, v1, inst, chan_index, TRUE); /* store v0 */ 645 } 646 647 release_src_vecs(gen); 648} 649 650 651static void 652emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) 653{ 654 int zero_vec = -1; 655 uint chan; 656 657 if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { 658 zero_vec = ppc_allocate_vec_register(gen->f); 659 ppc_vzero(gen->f, zero_vec); 660 } 661 662 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 663 /* fetch src operands */ 664 int v0 = get_src_vec(gen, inst, 0, chan); 665 int v1 = get_src_vec(gen, inst, 1, chan); 666 int v2 = get_dst_vec(gen, inst, chan); 667 668 /* emit binop */ 669 switch (inst->Instruction.Opcode) { 670 case TGSI_OPCODE_ADD: 671 ppc_vaddfp(gen->f, v2, v0, v1); 672 break; 673 case TGSI_OPCODE_SUB: 674 ppc_vsubfp(gen->f, v2, v0, v1); 675 break; 676 case TGSI_OPCODE_MUL: 677 ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec); 678 break; 679 case TGSI_OPCODE_MIN: 680 ppc_vminfp(gen->f, v2, v0, v1); 681 break; 682 case TGSI_OPCODE_MAX: 683 ppc_vmaxfp(gen->f, v2, v0, v1); 684 break; 685 default: 686 assert(0); 687 } 688 689 /* store v2 */ 690 emit_store(gen, v2, inst, chan, TRUE); 691 } 692 693 if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) 694 ppc_release_vec_register(gen->f, zero_vec); 695 696 release_src_vecs(gen); 697} 698 699 700static void 701emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) 702{ 703 uint chan; 704 705 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 706 /* fetch src operands */ 707 int v0 = get_src_vec(gen, inst, 0, chan); 708 int v1 = get_src_vec(gen, inst, 1, chan); 709 int v2 = get_src_vec(gen, inst, 2, chan); 710 int v3 = get_dst_vec(gen, inst, chan); 711 712 /* emit ALU */ 713 switch (inst->Instruction.Opcode) { 714 case TGSI_OPCODE_MAD: 715 ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ 716 break; 717 case TGSI_OPCODE_LRP: 718 ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ 719 ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ 720 break; 721 default: 722 assert(0); 723 } 724 725 /* store v3 */ 726 emit_store(gen, v3, inst, chan, TRUE); 727 } 728 729 release_src_vecs(gen); 730} 731 732 733/** 734 * Vector comparisons, resulting in 1.0 or 0.0 values. 735 */ 736static void 737emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) 738{ 739 uint chan; 740 int one_vec = gen_one_vec(gen); 741 742 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 743 /* fetch src operands */ 744 int v0 = get_src_vec(gen, inst, 0, chan); 745 int v1 = get_src_vec(gen, inst, 1, chan); 746 int v2 = get_dst_vec(gen, inst, chan); 747 boolean complement = FALSE; 748 749 switch (inst->Instruction.Opcode) { 750 case TGSI_OPCODE_SNE: 751 complement = TRUE; 752 /* fall-through */ 753 case TGSI_OPCODE_SEQ: 754 ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ 755 break; 756 757 case TGSI_OPCODE_SGE: 758 complement = TRUE; 759 /* fall-through */ 760 case TGSI_OPCODE_SLT: 761 ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ 762 break; 763 764 case TGSI_OPCODE_SLE: 765 complement = TRUE; 766 /* fall-through */ 767 case TGSI_OPCODE_SGT: 768 ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ 769 break; 770 default: 771 assert(0); 772 } 773 774 /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ 775 776 if (complement) 777 ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ 778 else 779 ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ 780 781 /* store v2 */ 782 emit_store(gen, v2, inst, chan, TRUE); 783 } 784 785 release_src_vecs(gen); 786} 787 788 789static void 790emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) 791{ 792 int v0, v1, v2; 793 uint chan_index; 794 795 v2 = ppc_allocate_vec_register(gen->f); 796 797 ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */ 798 799 v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */ 800 v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */ 801 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 802 803 v0 = get_src_vec(gen, inst, 0, CHAN_Y); /* v0 = src0.YYYY */ 804 v1 = get_src_vec(gen, inst, 1, CHAN_Y); /* v1 = src1.YYYY */ 805 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 806 807 v0 = get_src_vec(gen, inst, 0, CHAN_Z); /* v0 = src0.ZZZZ */ 808 v1 = get_src_vec(gen, inst, 1, CHAN_Z); /* v1 = src1.ZZZZ */ 809 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 810 811 if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { 812 v0 = get_src_vec(gen, inst, 0, CHAN_W); /* v0 = src0.WWWW */ 813 v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ 814 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 815 } 816 else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { 817 v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ 818 ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ 819 } 820 821 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 822 emit_store(gen, v2, inst, chan_index, FALSE); /* store v2, free v2 later */ 823 } 824 825 release_src_vecs(gen); 826 827 ppc_release_vec_register(gen->f, v2); 828} 829 830 831/** Approximation for vr = pow(va, vb) */ 832static void 833ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) 834{ 835 /* pow(a,b) ~= exp2(log2(a) * b) */ 836 int t_vec = ppc_allocate_vec_register(f); 837 int zero_vec = ppc_allocate_vec_register(f); 838 839 ppc_vzero(f, zero_vec); 840 841 ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ 842 ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */ 843 ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ 844 845 ppc_release_vec_register(f, t_vec); 846 ppc_release_vec_register(f, zero_vec); 847} 848 849 850static void 851emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) 852{ 853 int one_vec = gen_one_vec(gen); 854 855 /* Compute X */ 856 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 857 emit_store(gen, one_vec, inst, CHAN_X, FALSE); 858 } 859 860 /* Compute Y, Z */ 861 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || 862 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 863 int x_vec; 864 int zero_vec = ppc_allocate_vec_register(gen->f); 865 866 x_vec = get_src_vec(gen, inst, 0, CHAN_X); /* x_vec = src[0].x */ 867 868 ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ 869 ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ 870 871 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 872 emit_store(gen, x_vec, inst, CHAN_Y, FALSE); 873 } 874 875 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 876 int y_vec, w_vec; 877 int z_vec = ppc_allocate_vec_register(gen->f); 878 int pow_vec = ppc_allocate_vec_register(gen->f); 879 int pos_vec = ppc_allocate_vec_register(gen->f); 880 int p128_vec = ppc_allocate_vec_register(gen->f); 881 int n128_vec = ppc_allocate_vec_register(gen->f); 882 883 y_vec = get_src_vec(gen, inst, 0, CHAN_Y); /* y_vec = src[0].y */ 884 ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ 885 886 w_vec = get_src_vec(gen, inst, 0, CHAN_W); /* w_vec = src[0].w */ 887 888 /* clamp W to [-128, 128] */ 889 load_constant_vec(gen, p128_vec, 128.0f); 890 load_constant_vec(gen, n128_vec, -128.0f); 891 ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */ 892 ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */ 893 894 /* if temp.x > 0 895 * z = pow(tmp.y, tmp.w) 896 * else 897 * z = 0.0 898 */ 899 ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ 900 ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ 901 ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ 902 903 emit_store(gen, z_vec, inst, CHAN_Z, FALSE); 904 905 ppc_release_vec_register(gen->f, z_vec); 906 ppc_release_vec_register(gen->f, pow_vec); 907 ppc_release_vec_register(gen->f, pos_vec); 908 ppc_release_vec_register(gen->f, p128_vec); 909 ppc_release_vec_register(gen->f, n128_vec); 910 } 911 912 ppc_release_vec_register(gen->f, zero_vec); 913 } 914 915 /* Compute W */ 916 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 917 emit_store(gen, one_vec, inst, CHAN_W, FALSE); 918 } 919 920 release_src_vecs(gen); 921} 922 923 924static void 925emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst) 926{ 927 const int one_vec = gen_one_vec(gen); 928 int src_vec; 929 930 /* get src arg */ 931 src_vec = get_src_vec(gen, inst, 0, CHAN_X); 932 933 /* Compute X = 2^floor(src) */ 934 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 935 int dst_vec = get_dst_vec(gen, inst, CHAN_X); 936 int tmp_vec = ppc_allocate_vec_register(gen->f); 937 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ 938 ppc_vexptefp(gen->f, dst_vec, tmp_vec); /* dst = 2 ^ tmp */ 939 emit_store(gen, dst_vec, inst, CHAN_X, TRUE); 940 ppc_release_vec_register(gen->f, tmp_vec); 941 } 942 943 /* Compute Y = src - floor(src) */ 944 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 945 int dst_vec = get_dst_vec(gen, inst, CHAN_Y); 946 int tmp_vec = ppc_allocate_vec_register(gen->f); 947 ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ 948 ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec); /* dst = src - tmp */ 949 emit_store(gen, dst_vec, inst, CHAN_Y, TRUE); 950 ppc_release_vec_register(gen->f, tmp_vec); 951 } 952 953 /* Compute Z = RoughApprox2ToX(src) */ 954 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 955 int dst_vec = get_dst_vec(gen, inst, CHAN_Z); 956 ppc_vexptefp(gen->f, dst_vec, src_vec); /* dst = 2 ^ src */ 957 emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); 958 } 959 960 /* Compute W = 1.0 */ 961 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 962 emit_store(gen, one_vec, inst, CHAN_W, FALSE); 963 } 964 965 release_src_vecs(gen); 966} 967 968 969static void 970emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst) 971{ 972 const int bit31_vec = gen_get_bit31_vec(gen); 973 const int one_vec = gen_one_vec(gen); 974 int src_vec, abs_vec; 975 976 /* get src arg */ 977 src_vec = get_src_vec(gen, inst, 0, CHAN_X); 978 979 /* compute abs(src) */ 980 abs_vec = ppc_allocate_vec_register(gen->f); 981 ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec); /* abs = src & ~bit31 */ 982 983 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && 984 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 985 986 /* compute tmp = floor(log2(abs)) */ 987 int tmp_vec = ppc_allocate_vec_register(gen->f); 988 ppc_vlogefp(gen->f, tmp_vec, abs_vec); /* tmp = log2(abs) */ 989 ppc_vrfim(gen->f, tmp_vec, tmp_vec); /* tmp = floor(tmp); */ 990 991 /* Compute X = tmp */ 992 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 993 emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); 994 } 995 996 /* Compute Y = abs / 2^tmp */ 997 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 998 const int zero_vec = ppc_allocate_vec_register(gen->f); 999 ppc_vzero(gen->f, zero_vec); 1000 ppc_vexptefp(gen->f, tmp_vec, tmp_vec); /* tmp = 2 ^ tmp */ 1001 ppc_vrefp(gen->f, tmp_vec, tmp_vec); /* tmp = 1 / tmp */ 1002 /* tmp = abs * tmp + zero */ 1003 ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec); 1004 emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); 1005 ppc_release_vec_register(gen->f, zero_vec); 1006 } 1007 1008 ppc_release_vec_register(gen->f, tmp_vec); 1009 } 1010 1011 /* Compute Z = RoughApproxLog2(abs) */ 1012 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1013 int dst_vec = get_dst_vec(gen, inst, CHAN_Z); 1014 ppc_vlogefp(gen->f, dst_vec, abs_vec); /* dst = log2(abs) */ 1015 emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); 1016 } 1017 1018 /* Compute W = 1.0 */ 1019 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 1020 emit_store(gen, one_vec, inst, CHAN_W, FALSE); 1021 } 1022 1023 ppc_release_vec_register(gen->f, abs_vec); 1024 release_src_vecs(gen); 1025} 1026 1027 1028static void 1029emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst) 1030{ 1031 int s0_vec = get_src_vec(gen, inst, 0, CHAN_X); 1032 int s1_vec = get_src_vec(gen, inst, 1, CHAN_X); 1033 int pow_vec = ppc_allocate_vec_register(gen->f); 1034 int chan; 1035 1036 ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec); 1037 1038 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { 1039 emit_store(gen, pow_vec, inst, chan, FALSE); 1040 } 1041 1042 ppc_release_vec_register(gen->f, pow_vec); 1043 1044 release_src_vecs(gen); 1045} 1046 1047 1048static void 1049emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst) 1050{ 1051 int x0_vec, y0_vec, z0_vec; 1052 int x1_vec, y1_vec, z1_vec; 1053 int zero_vec, tmp_vec; 1054 int tmp2_vec; 1055 1056 zero_vec = ppc_allocate_vec_register(gen->f); 1057 ppc_vzero(gen->f, zero_vec); 1058 1059 tmp_vec = ppc_allocate_vec_register(gen->f); 1060 tmp2_vec = ppc_allocate_vec_register(gen->f); 1061 1062 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || 1063 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1064 x0_vec = get_src_vec(gen, inst, 0, CHAN_X); 1065 x1_vec = get_src_vec(gen, inst, 1, CHAN_X); 1066 } 1067 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || 1068 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 1069 y0_vec = get_src_vec(gen, inst, 0, CHAN_Y); 1070 y1_vec = get_src_vec(gen, inst, 1, CHAN_Y); 1071 } 1072 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || 1073 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 1074 z0_vec = get_src_vec(gen, inst, 0, CHAN_Z); 1075 z1_vec = get_src_vec(gen, inst, 1, CHAN_Z); 1076 } 1077 1078 IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) { 1079 /* tmp = y0 * z1 */ 1080 ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec); 1081 /* tmp = tmp - z0 * y1*/ 1082 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec); 1083 emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); 1084 } 1085 IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) { 1086 /* tmp = z0 * x1 */ 1087 ppc_vmaddfp(gen->f, tmp_vec, z0_vec, x1_vec, zero_vec); 1088 /* tmp = tmp - x0 * z1 */ 1089 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec); 1090 emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); 1091 } 1092 IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) { 1093 /* tmp = x0 * y1 */ 1094 ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec); 1095 /* tmp = tmp - y0 * x1 */ 1096 ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec); 1097 emit_store(gen, tmp_vec, inst, CHAN_Z, FALSE); 1098 } 1099 /* W is undefined */ 1100 1101 ppc_release_vec_register(gen->f, tmp_vec); 1102 ppc_release_vec_register(gen->f, zero_vec); 1103 release_src_vecs(gen); 1104} 1105 1106static int 1107emit_instruction(struct gen_context *gen, 1108 struct tgsi_full_instruction *inst) 1109{ 1110 switch (inst->Instruction.Opcode) { 1111 case TGSI_OPCODE_MOV: 1112 case TGSI_OPCODE_SWZ: 1113 case TGSI_OPCODE_ABS: 1114 case TGSI_OPCODE_FLOOR: 1115 case TGSI_OPCODE_FRAC: 1116 case TGSI_OPCODE_EXPBASE2: 1117 case TGSI_OPCODE_LOGBASE2: 1118 emit_unaryop(gen, inst); 1119 break; 1120 case TGSI_OPCODE_RSQ: 1121 case TGSI_OPCODE_RCP: 1122 emit_scalar_unaryop(gen, inst); 1123 break; 1124 case TGSI_OPCODE_ADD: 1125 case TGSI_OPCODE_SUB: 1126 case TGSI_OPCODE_MUL: 1127 case TGSI_OPCODE_MIN: 1128 case TGSI_OPCODE_MAX: 1129 emit_binop(gen, inst); 1130 break; 1131 case TGSI_OPCODE_SEQ: 1132 case TGSI_OPCODE_SNE: 1133 case TGSI_OPCODE_SLT: 1134 case TGSI_OPCODE_SGT: 1135 case TGSI_OPCODE_SLE: 1136 case TGSI_OPCODE_SGE: 1137 emit_inequality(gen, inst); 1138 break; 1139 case TGSI_OPCODE_MAD: 1140 case TGSI_OPCODE_LRP: 1141 emit_triop(gen, inst); 1142 break; 1143 case TGSI_OPCODE_DP3: 1144 case TGSI_OPCODE_DP4: 1145 case TGSI_OPCODE_DPH: 1146 emit_dotprod(gen, inst); 1147 break; 1148 case TGSI_OPCODE_LIT: 1149 emit_lit(gen, inst); 1150 break; 1151 case TGSI_OPCODE_LOG: 1152 emit_log(gen, inst); 1153 break; 1154 case TGSI_OPCODE_EXP: 1155 emit_exp(gen, inst); 1156 break; 1157 case TGSI_OPCODE_POW: 1158 emit_pow(gen, inst); 1159 break; 1160 case TGSI_OPCODE_XPD: 1161 emit_xpd(gen, inst); 1162 break; 1163 case TGSI_OPCODE_END: 1164 /* normal end */ 1165 return 1; 1166 default: 1167 return 0; 1168 } 1169 return 1; 1170} 1171 1172 1173static void 1174emit_declaration( 1175 struct ppc_function *func, 1176 struct tgsi_full_declaration *decl ) 1177{ 1178 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1179#if 0 1180 unsigned first, last, mask; 1181 unsigned i, j; 1182 1183 first = decl->DeclarationRange.First; 1184 last = decl->DeclarationRange.Last; 1185 mask = decl->Declaration.UsageMask; 1186 1187 for( i = first; i <= last; i++ ) { 1188 for( j = 0; j < NUM_CHANNELS; j++ ) { 1189 if( mask & (1 << j) ) { 1190 switch( decl->Declaration.Interpolate ) { 1191 case TGSI_INTERPOLATE_CONSTANT: 1192 emit_coef_a0( func, 0, i, j ); 1193 emit_inputs( func, 0, i, j ); 1194 break; 1195 1196 case TGSI_INTERPOLATE_LINEAR: 1197 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 1198 emit_coef_dadx( func, 1, i, j ); 1199 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 1200 emit_coef_dady( func, 3, i, j ); 1201 emit_mul( func, 0, 1 ); /* x * dadx */ 1202 emit_coef_a0( func, 4, i, j ); 1203 emit_mul( func, 2, 3 ); /* y * dady */ 1204 emit_add( func, 0, 4 ); /* x * dadx + a0 */ 1205 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 1206 emit_inputs( func, 0, i, j ); 1207 break; 1208 1209 case TGSI_INTERPOLATE_PERSPECTIVE: 1210 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 1211 emit_coef_dadx( func, 1, i, j ); 1212 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 1213 emit_coef_dady( func, 3, i, j ); 1214 emit_mul( func, 0, 1 ); /* x * dadx */ 1215 emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); 1216 emit_coef_a0( func, 5, i, j ); 1217 emit_rcp( func, 4, 4 ); /* 1.0 / w */ 1218 emit_mul( func, 2, 3 ); /* y * dady */ 1219 emit_add( func, 0, 5 ); /* x * dadx + a0 */ 1220 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 1221 emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ 1222 emit_inputs( func, 0, i, j ); 1223 break; 1224 1225 default: 1226 assert( 0 ); 1227 break; 1228 } 1229 } 1230 } 1231 } 1232#endif 1233 } 1234} 1235 1236 1237 1238static void 1239emit_prologue(struct ppc_function *func) 1240{ 1241 /* XXX set up stack frame */ 1242} 1243 1244 1245static void 1246emit_epilogue(struct ppc_function *func) 1247{ 1248 ppc_comment(func, -4, "Epilogue:"); 1249 ppc_return(func); 1250 /* XXX restore prev stack frame */ 1251#if 0 1252 debug_printf("PPC: Emitted %u instructions\n", func->num_inst); 1253#endif 1254} 1255 1256 1257 1258/** 1259 * Translate a TGSI vertex/fragment shader to PPC code. 1260 * 1261 * \param tokens the TGSI input shader 1262 * \param func the output PPC code/function 1263 * \param immediates buffer to place immediates, later passed to PPC func 1264 * \return TRUE for success, FALSE if translation failed 1265 */ 1266boolean 1267tgsi_emit_ppc(const struct tgsi_token *tokens, 1268 struct ppc_function *func, 1269 float (*immediates)[4], 1270 boolean do_swizzles ) 1271{ 1272 static int use_ppc_asm = -1; 1273 struct tgsi_parse_context parse; 1274 /*boolean instruction_phase = FALSE;*/ 1275 unsigned ok = 1; 1276 uint num_immediates = 0; 1277 struct gen_context gen; 1278 uint ic = 0; 1279 1280 if (use_ppc_asm < 0) { 1281 /* If GALLIUM_NOPPC is set, don't use PPC codegen */ 1282 use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE); 1283 } 1284 if (!use_ppc_asm) 1285 return FALSE; 1286 1287 if (0) { 1288 debug_printf("\n********* TGSI->PPC ********\n"); 1289 tgsi_dump(tokens, 0); 1290 } 1291 1292 util_init_math(); 1293 1294 init_gen_context(&gen, func); 1295 1296 emit_prologue(func); 1297 1298 tgsi_parse_init( &parse, tokens ); 1299 1300 while (!tgsi_parse_end_of_tokens(&parse) && ok) { 1301 tgsi_parse_token(&parse); 1302 1303 switch (parse.FullToken.Token.Type) { 1304 case TGSI_TOKEN_TYPE_DECLARATION: 1305 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { 1306 emit_declaration(func, &parse.FullToken.FullDeclaration ); 1307 } 1308 break; 1309 1310 case TGSI_TOKEN_TYPE_INSTRUCTION: 1311 if (func->print) { 1312 _debug_printf("# "); 1313 ic++; 1314 tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); 1315 } 1316 1317 ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); 1318 1319 if (!ok) { 1320 debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n", 1321 parse.FullToken.FullInstruction.Instruction.Opcode, 1322 parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? 1323 "vertex shader" : "fragment shader"); 1324 } 1325 break; 1326 1327 case TGSI_TOKEN_TYPE_IMMEDIATE: 1328 /* splat each immediate component into a float[4] vector for SoA */ 1329 { 1330 const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; 1331 uint i; 1332 assert(size <= 4); 1333 assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); 1334 for (i = 0; i < size; i++) { 1335 immediates[num_immediates][i] = 1336 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; 1337 } 1338 num_immediates++; 1339 } 1340 break; 1341 1342 default: 1343 ok = 0; 1344 assert( 0 ); 1345 } 1346 } 1347 1348 emit_epilogue(func); 1349 1350 tgsi_parse_free( &parse ); 1351 1352 if (ppc_num_instructions(func) == 0) { 1353 /* ran out of memory for instructions */ 1354 ok = FALSE; 1355 } 1356 1357 if (!ok) 1358 debug_printf("TGSI->PPC translation failed\n"); 1359 1360 return ok; 1361} 1362 1363#endif /* PIPE_ARCH_PPC */ 1364