tgsi_ppc.c revision f8ab4feb75f4a592e23859813c093dcdbd4b8988
1/************************************************************************** 2 * 3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI to PowerPC code generation. 30 */ 31 32#include "pipe/p_config.h" 33 34#if defined(PIPE_ARCH_PPC) 35 36#include "pipe/p_debug.h" 37#include "pipe/p_shader_tokens.h" 38#include "util/u_math.h" 39#include "util/u_memory.h" 40#include "util/u_sse.h" 41#include "tgsi/tgsi_parse.h" 42#include "tgsi/tgsi_util.h" 43#include "tgsi_exec.h" 44#include "tgsi_ppc.h" 45#include "rtasm/rtasm_ppc.h" 46 47 48/** 49 * Since it's pretty much impossible to form PPC vector immediates, load 50 * them from memory here: 51 */ 52const float ppc_builtin_constants[] ALIGN16_ATTRIB = { 53 1.0f, -128.0f, 128.0, 0.0 54}; 55 56 57#define FOR_EACH_CHANNEL( CHAN )\ 58 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 59 60#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 61 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 62 63#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 64 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 65 66#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 67 FOR_EACH_CHANNEL( CHAN )\ 68 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 69 70#define CHAN_X 0 71#define CHAN_Y 1 72#define CHAN_Z 2 73#define CHAN_W 3 74 75#define TEMP_ONE_I TGSI_EXEC_TEMP_ONE_I 76#define TEMP_ONE_C TGSI_EXEC_TEMP_ONE_C 77 78#define TEMP_R0 TGSI_EXEC_TEMP_R0 79#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR 80 81 82/** 83 * Context/state used during code gen. 84 */ 85struct gen_context 86{ 87 struct ppc_function *f; 88 int inputs_reg; /**< GP register pointing to input params */ 89 int outputs_reg; /**< GP register pointing to output params */ 90 int temps_reg; /**< GP register pointing to temporary "registers" */ 91 int immed_reg; /**< GP register pointing to immediates buffer */ 92 int const_reg; /**< GP register pointing to constants buffer */ 93 int builtins_reg; /**< GP register pointint to built-in constants */ 94 95 int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ 96 int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ 97}; 98 99 100/** 101 * Load the given vector register with {value, value, value, value}. 102 * The value must be in the ppu_builtin_constants[] array. 103 * We wouldn't need this if there was a simple way to load PPC vector 104 * registers with immediate values! 105 */ 106static void 107load_constant_vec(struct gen_context *gen, int dst_vec, float value) 108{ 109 uint pos; 110 for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { 111 if (ppc_builtin_constants[pos] == value) { 112 int offset_reg = ppc_allocate_register(gen->f); 113 int offset = pos * 4; 114 115 ppc_li(gen->f, offset_reg, offset); 116 /* Load 4-byte word into vector register. 117 * The vector slot depends on the effective address we load from. 118 * We know that our builtins start at a 16-byte boundary so we 119 * know that 'swizzle' tells us which vector slot will have the 120 * loaded word. The other vector slots will be undefined. 121 */ 122 ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); 123 /* splat word[pos % 4] across the vector reg */ 124 ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); 125 ppc_release_register(gen->f, offset_reg); 126 return; 127 } 128 } 129 assert(0 && "Need to add new constant to ppc_builtin_constants array"); 130} 131 132 133/** 134 * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. 135 */ 136static int 137gen_one_vec(struct gen_context *gen) 138{ 139 if (gen->one_vec < 0) { 140 gen->one_vec = ppc_allocate_vec_register(gen->f); 141 load_constant_vec(gen, gen->one_vec, 1.0f); 142 } 143 return gen->one_vec; 144} 145 146/** 147 * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. 148 */ 149static int 150gen_get_bit31_vec(struct gen_context *gen) 151{ 152 if (gen->bit31_vec < 0) { 153 gen->bit31_vec = ppc_allocate_vec_register(gen->f); 154 ppc_vspltisw(gen->f, gen->bit31_vec, -1); 155 ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); 156 } 157 return gen->bit31_vec; 158} 159 160 161/** 162 * Register fetch, put result in 'dst_vec'. 163 */ 164static void 165emit_fetch(struct gen_context *gen, 166 unsigned dst_vec, 167 const struct tgsi_full_src_register *reg, 168 const unsigned chan_index) 169{ 170 uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index); 171 172 switch (swizzle) { 173 case TGSI_EXTSWIZZLE_X: 174 case TGSI_EXTSWIZZLE_Y: 175 case TGSI_EXTSWIZZLE_Z: 176 case TGSI_EXTSWIZZLE_W: 177 switch (reg->SrcRegister.File) { 178 case TGSI_FILE_INPUT: 179 { 180 int offset_reg = ppc_allocate_register(gen->f); 181 int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; 182 ppc_li(gen->f, offset_reg, offset); 183 ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); 184 ppc_release_register(gen->f, offset_reg); 185 } 186 break; 187 case TGSI_FILE_TEMPORARY: 188 { 189 int offset_reg = ppc_allocate_register(gen->f); 190 int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; 191 ppc_li(gen->f, offset_reg, offset); 192 ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); 193 ppc_release_register(gen->f, offset_reg); 194 } 195 break; 196 case TGSI_FILE_IMMEDIATE: 197 { 198 int offset_reg = ppc_allocate_register(gen->f); 199 int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; 200 ppc_li(gen->f, offset_reg, offset); 201 ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg); 202 ppc_release_register(gen->f, offset_reg); 203 } 204 break; 205 case TGSI_FILE_CONSTANT: 206 { 207 int offset_reg = ppc_allocate_register(gen->f); 208 int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; 209 ppc_li(gen->f, offset_reg, offset); 210 /* Load 4-byte word into vector register. 211 * The vector slot depends on the effective address we load from. 212 * We know that our constants start at a 16-byte boundary so we 213 * know that 'swizzle' tells us which vector slot will have the 214 * loaded word. The other vector slots will be undefined. 215 */ 216 ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); 217 /* splat word[swizzle] across the vector reg */ 218 ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); 219 ppc_release_register(gen->f, offset_reg); 220 } 221 break; 222 default: 223 assert( 0 ); 224 } 225 break; 226 case TGSI_EXTSWIZZLE_ZERO: 227 ppc_vzero(gen->f, dst_vec); 228 break; 229 case TGSI_EXTSWIZZLE_ONE: 230 { 231 int one_vec = gen_one_vec(gen); 232 ppc_vmove(gen->f, dst_vec, one_vec); 233 } 234 break; 235 default: 236 assert( 0 ); 237 } 238 239 { 240 uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); 241 if (sign_op != TGSI_UTIL_SIGN_KEEP) { 242 int bit31_vec = gen_get_bit31_vec(gen); 243 244 switch (sign_op) { 245 case TGSI_UTIL_SIGN_CLEAR: 246 /* vec = vec & ~bit31 */ 247 ppc_vandc(gen->f, dst_vec, dst_vec, bit31_vec); 248 break; 249 case TGSI_UTIL_SIGN_SET: 250 /* vec = vec | bit31 */ 251 ppc_vor(gen->f, dst_vec, dst_vec, bit31_vec); 252 break; 253 case TGSI_UTIL_SIGN_TOGGLE: 254 /* vec = vec ^ bit31 */ 255 ppc_vxor(gen->f, dst_vec, dst_vec, bit31_vec); 256 break; 257 default: 258 assert(0); 259 } 260 } 261 } 262} 263 264#define FETCH( GEN, INST, DST_VEC, SRC_REG, CHAN ) \ 265 emit_fetch( GEN, DST_VEC, &(INST).FullSrcRegisters[SRC_REG], CHAN ) 266 267 268 269/** 270 * Register store. Store 'src_vec' at location indicated by 'reg'. 271 */ 272static void 273emit_store(struct gen_context *gen, 274 unsigned src_vec, 275 const struct tgsi_full_dst_register *reg, 276 const struct tgsi_full_instruction *inst, 277 unsigned chan_index) 278{ 279 switch (reg->DstRegister.File) { 280 case TGSI_FILE_OUTPUT: 281 { 282 int offset_reg = ppc_allocate_register(gen->f); 283 int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; 284 ppc_li(gen->f, offset_reg, offset); 285 ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); 286 ppc_release_register(gen->f, offset_reg); 287 } 288 break; 289 case TGSI_FILE_TEMPORARY: 290 { 291 int offset_reg = ppc_allocate_register(gen->f); 292 int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; 293 ppc_li(gen->f, offset_reg, offset); 294 ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); 295 ppc_release_register(gen->f, offset_reg); 296 } 297 break; 298#if 0 299 case TGSI_FILE_ADDRESS: 300 emit_addrs( 301 func, 302 xmm, 303 reg->DstRegister.Index, 304 chan_index ); 305 break; 306#endif 307 default: 308 assert( 0 ); 309 } 310 311#if 0 312 switch( inst->Instruction.Saturate ) { 313 case TGSI_SAT_NONE: 314 break; 315 316 case TGSI_SAT_ZERO_ONE: 317 /* assert( 0 ); */ 318 break; 319 320 case TGSI_SAT_MINUS_PLUS_ONE: 321 assert( 0 ); 322 break; 323 } 324#endif 325} 326 327 328#define STORE( GEN, INST, XMM, INDEX, CHAN )\ 329 emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) 330 331 332 333static void 334emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 335{ 336 int v0 = ppc_allocate_vec_register(gen->f); 337 int v1 = ppc_allocate_vec_register(gen->f); 338 uint chan_index; 339 340 FETCH(gen, *inst, v0, 0, CHAN_X); 341 342 switch (inst->Instruction.Opcode) { 343 case TGSI_OPCODE_RSQ: 344 /* v1 = 1.0 / sqrt(v0) */ 345 ppc_vrsqrtefp(gen->f, v1, v0); 346 break; 347 case TGSI_OPCODE_RCP: 348 /* v1 = 1.0 / v0 */ 349 ppc_vrefp(gen->f, v1, v0); 350 break; 351 default: 352 assert(0); 353 } 354 355 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { 356 STORE(gen, *inst, v1, 0, chan_index); 357 } 358 ppc_release_vec_register(gen->f, v0); 359 ppc_release_vec_register(gen->f, v1); 360} 361 362 363static void 364emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) 365{ 366 int v0 = ppc_allocate_vec_register(gen->f); 367 uint chan_index; 368 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 369 FETCH(gen, *inst, 0, 0, chan_index); /* v0 = srcreg[0] */ 370 switch (inst->Instruction.Opcode) { 371 case TGSI_OPCODE_ABS: 372 /* turn off the most significant bit of each vector float word */ 373 { 374 int v1 = ppc_allocate_vec_register(gen->f); 375 ppc_vspltisw(gen->f, v1, -1); /* v1 = {-1, -1, -1, -1} */ 376 ppc_vslw(gen->f, v1, v1, v1); /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */ 377 ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */ 378 ppc_release_vec_register(gen->f, v1); 379 } 380 break; 381 case TGSI_OPCODE_FLOOR: 382 ppc_vrfim(gen->f, v0, v0); /* v0 = floor(v0) */ 383 break; 384 case TGSI_OPCODE_FRAC: 385 { 386 int v1 = ppc_allocate_vec_register(gen->f); 387 ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ 388 ppc_vsubfp(gen->f, v0, v0, v1); /* v0 = v0 - v1 */ 389 ppc_release_vec_register(gen->f, v1); 390 } 391 break; 392 case TGSI_OPCODE_EXPBASE2: 393 ppc_vexptefp(gen->f, v0, v0); /* v0 = 2^v0 */ 394 break; 395 case TGSI_OPCODE_LOGBASE2: 396 /* XXX this may be broken! */ 397 ppc_vlogefp(gen->f, v0, v0); /* v0 = log2(v0) */ 398 break; 399 case TGSI_OPCODE_MOV: 400 /* nothing */ 401 break; 402 default: 403 assert(0); 404 } 405 STORE(gen, *inst, v0, 0, chan_index); /* store v0 */ 406 } 407 ppc_release_vec_register(gen->f, v0); 408} 409 410 411static void 412emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) 413{ 414 int v0 = ppc_allocate_vec_register(gen->f); 415 int v1 = ppc_allocate_vec_register(gen->f); 416 int v2 = ppc_allocate_vec_register(gen->f); 417 uint chan_index; 418 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 419 FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ 420 FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ 421 switch (inst->Instruction.Opcode) { 422 case TGSI_OPCODE_ADD: 423 ppc_vaddfp(gen->f, v2, v0, v1); 424 break; 425 case TGSI_OPCODE_SUB: 426 ppc_vsubfp(gen->f, v2, v0, v1); 427 break; 428 case TGSI_OPCODE_MUL: 429 ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ 430 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */ 431 break; 432 case TGSI_OPCODE_MIN: 433 ppc_vminfp(gen->f, v2, v0, v1); 434 break; 435 case TGSI_OPCODE_MAX: 436 ppc_vmaxfp(gen->f, v2, v0, v1); 437 break; 438 default: 439 assert(0); 440 } 441 STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ 442 } 443 ppc_release_vec_register(gen->f, v0); 444 ppc_release_vec_register(gen->f, v1); 445 ppc_release_vec_register(gen->f, v2); 446} 447 448 449/** 450 * Vector comparisons, resulting in 1.0 or 0.0 values. 451 */ 452static void 453emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) 454{ 455 int v0 = ppc_allocate_vec_register(gen->f); 456 int v1 = ppc_allocate_vec_register(gen->f); 457 int v2 = ppc_allocate_vec_register(gen->f); 458 uint chan_index; 459 boolean complement = FALSE; 460 int one_vec = gen_one_vec(gen); 461 462 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 463 FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ 464 FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ 465 466 switch (inst->Instruction.Opcode) { 467 case TGSI_OPCODE_SNE: 468 complement = TRUE; 469 /* fall-through */ 470 case TGSI_OPCODE_SEQ: 471 ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ 472 break; 473 474 case TGSI_OPCODE_SGE: 475 complement = TRUE; 476 /* fall-through */ 477 case TGSI_OPCODE_SLT: 478 ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ 479 break; 480 481 case TGSI_OPCODE_SLE: 482 complement = TRUE; 483 /* fall-through */ 484 case TGSI_OPCODE_SGT: 485 ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ 486 break; 487 default: 488 assert(0); 489 } 490 491 /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ 492 493 if (complement) 494 ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ 495 else 496 ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ 497 498 STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ 499 } 500 501 ppc_release_vec_register(gen->f, v0); 502 ppc_release_vec_register(gen->f, v1); 503 ppc_release_vec_register(gen->f, v2); 504} 505 506 507static void 508emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) 509{ 510 int v0 = ppc_allocate_vec_register(gen->f); 511 int v1 = ppc_allocate_vec_register(gen->f); 512 int v2 = ppc_allocate_vec_register(gen->f); 513 uint chan_index; 514 515 ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ 516 517 FETCH(gen, *inst, v0, 0, CHAN_X); /* v0 = src0.XXXX */ 518 FETCH(gen, *inst, v1, 1, CHAN_X); /* v1 = src1.XXXX */ 519 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 520 521 FETCH(gen, *inst, v0, 0, CHAN_Y); /* v0 = src0.YYYY */ 522 FETCH(gen, *inst, v1, 1, CHAN_Y); /* v1 = src1.YYYY */ 523 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 524 525 FETCH(gen, *inst, v0, 0, CHAN_Z); /* v0 = src0.ZZZZ */ 526 FETCH(gen, *inst, v1, 1, CHAN_Z); /* v1 = src1.ZZZZ */ 527 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 528 529 if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { 530 FETCH(gen, *inst, v0, 0, CHAN_W); /* v0 = src0.WWWW */ 531 FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ 532 ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ 533 } 534 else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { 535 FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ 536 ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ 537 } 538 539 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 540 STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ 541 } 542 ppc_release_vec_register(gen->f, v0); 543 ppc_release_vec_register(gen->f, v1); 544 ppc_release_vec_register(gen->f, v2); 545} 546 547 548static void 549emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) 550{ 551 int v0 = ppc_allocate_vec_register(gen->f); 552 int v1 = ppc_allocate_vec_register(gen->f); 553 int v2 = ppc_allocate_vec_register(gen->f); 554 int v3 = ppc_allocate_vec_register(gen->f); 555 uint chan_index; 556 FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { 557 FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ 558 FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ 559 FETCH(gen, *inst, v2, 2, chan_index); /* v2 = srcreg[2] */ 560 switch (inst->Instruction.Opcode) { 561 case TGSI_OPCODE_MAD: 562 ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ 563 break; 564 case TGSI_OPCODE_LRP: 565 ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ 566 ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ 567 break; 568 default: 569 assert(0); 570 } 571 STORE(gen, *inst, v3, 0, chan_index); /* store v3 */ 572 } 573 ppc_release_vec_register(gen->f, v0); 574 ppc_release_vec_register(gen->f, v1); 575 ppc_release_vec_register(gen->f, v2); 576 ppc_release_vec_register(gen->f, v3); 577} 578 579 580 581/** Approximation for vr = pow(va, vb) */ 582static void 583ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) 584{ 585 /* pow(a,b) ~= exp2(log2(a) * b) */ 586 int t_vec = ppc_allocate_vec_register(f); 587 int zero_vec = ppc_allocate_vec_register(f); 588 589 ppc_vzero(f, zero_vec); 590 591 ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ 592 ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb */ 593 ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ 594 595 ppc_release_vec_register(f, t_vec); 596 ppc_release_vec_register(f, zero_vec); 597} 598 599 600static void 601emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) 602{ 603 int one_vec = gen_one_vec(gen); 604 605 /* Compute X */ 606 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { 607 STORE(gen, *inst, one_vec, 0, CHAN_X); 608 } 609 610 /* Compute Y, Z */ 611 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || 612 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 613 int x_vec = ppc_allocate_vec_register(gen->f); 614 int zero_vec = ppc_allocate_vec_register(gen->f); 615 616 FETCH(gen, *inst, x_vec, 0, CHAN_X); /* x_vec = src[0].x */ 617 618 ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ 619 ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ 620 621 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { 622 STORE(gen, *inst, x_vec, 0, CHAN_Y); /* store Y */ 623 } 624 625 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { 626 int y_vec = ppc_allocate_vec_register(gen->f); 627 int z_vec = ppc_allocate_vec_register(gen->f); 628 int w_vec = ppc_allocate_vec_register(gen->f); 629 int pow_vec = ppc_allocate_vec_register(gen->f); 630 int pos_vec = ppc_allocate_vec_register(gen->f); 631 int p128_vec = ppc_allocate_vec_register(gen->f); 632 int n128_vec = ppc_allocate_vec_register(gen->f); 633 634 FETCH(gen, *inst, y_vec, 0, CHAN_Y); /* y_vec = src[0].y */ 635 ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ 636 637 FETCH(gen, *inst, w_vec, 0, CHAN_W); /* w_vec = src[0].w */ 638 639 /* XXX clamp Y to [-128, 128] */ 640 load_constant_vec(gen, p128_vec, 128.0f); 641 load_constant_vec(gen, n128_vec, -128.0f); 642 643 /* if temp.x > 0 644 * pow(tmp.y, tmp.w) 645 * else 646 * 0.0 647 */ 648 649 ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ 650 ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ 651 ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ 652 653 STORE(gen, *inst, z_vec, 0, CHAN_Z); /* store Z */ 654 655 ppc_release_vec_register(gen->f, y_vec); 656 ppc_release_vec_register(gen->f, z_vec); 657 ppc_release_vec_register(gen->f, w_vec); 658 ppc_release_vec_register(gen->f, pow_vec); 659 ppc_release_vec_register(gen->f, pos_vec); 660 ppc_release_vec_register(gen->f, p128_vec); 661 ppc_release_vec_register(gen->f, n128_vec); 662 } 663 664 ppc_release_vec_register(gen->f, x_vec); 665 ppc_release_vec_register(gen->f, zero_vec); 666 } 667 668 /* Compute W */ 669 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { 670 STORE(gen, *inst, one_vec, 0, CHAN_W); 671 } 672} 673 674 675static int 676emit_instruction(struct gen_context *gen, 677 struct tgsi_full_instruction *inst) 678{ 679 switch (inst->Instruction.Opcode) { 680 case TGSI_OPCODE_MOV: 681 case TGSI_OPCODE_ABS: 682 case TGSI_OPCODE_FLOOR: 683 case TGSI_OPCODE_FRAC: 684 case TGSI_OPCODE_EXPBASE2: 685 case TGSI_OPCODE_LOGBASE2: 686 emit_unaryop(gen, inst); 687 break; 688 case TGSI_OPCODE_RSQ: 689 case TGSI_OPCODE_RCP: 690 emit_scalar_unaryop(gen, inst); 691 break; 692 case TGSI_OPCODE_ADD: 693 case TGSI_OPCODE_SUB: 694 case TGSI_OPCODE_MUL: 695 case TGSI_OPCODE_MIN: 696 case TGSI_OPCODE_MAX: 697 emit_binop(gen, inst); 698 break; 699 case TGSI_OPCODE_SEQ: 700 case TGSI_OPCODE_SNE: 701 case TGSI_OPCODE_SLT: 702 case TGSI_OPCODE_SGT: 703 case TGSI_OPCODE_SLE: 704 case TGSI_OPCODE_SGE: 705 emit_inequality(gen, inst); 706 break; 707 case TGSI_OPCODE_MAD: 708 case TGSI_OPCODE_LRP: 709 emit_triop(gen, inst); 710 break; 711 case TGSI_OPCODE_DP3: 712 case TGSI_OPCODE_DP4: 713 case TGSI_OPCODE_DPH: 714 emit_dotprod(gen, inst); 715 break; 716 case TGSI_OPCODE_LIT: 717 emit_lit(gen, inst); 718 break; 719 case TGSI_OPCODE_END: 720 /* normal end */ 721 return 1; 722 default: 723 return 0; 724 } 725 726 727 return 1; 728} 729 730static void 731emit_declaration( 732 struct ppc_function *func, 733 struct tgsi_full_declaration *decl ) 734{ 735 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 736#if 0 737 unsigned first, last, mask; 738 unsigned i, j; 739 740 first = decl->DeclarationRange.First; 741 last = decl->DeclarationRange.Last; 742 mask = decl->Declaration.UsageMask; 743 744 for( i = first; i <= last; i++ ) { 745 for( j = 0; j < NUM_CHANNELS; j++ ) { 746 if( mask & (1 << j) ) { 747 switch( decl->Declaration.Interpolate ) { 748 case TGSI_INTERPOLATE_CONSTANT: 749 emit_coef_a0( func, 0, i, j ); 750 emit_inputs( func, 0, i, j ); 751 break; 752 753 case TGSI_INTERPOLATE_LINEAR: 754 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 755 emit_coef_dadx( func, 1, i, j ); 756 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 757 emit_coef_dady( func, 3, i, j ); 758 emit_mul( func, 0, 1 ); /* x * dadx */ 759 emit_coef_a0( func, 4, i, j ); 760 emit_mul( func, 2, 3 ); /* y * dady */ 761 emit_add( func, 0, 4 ); /* x * dadx + a0 */ 762 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 763 emit_inputs( func, 0, i, j ); 764 break; 765 766 case TGSI_INTERPOLATE_PERSPECTIVE: 767 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); 768 emit_coef_dadx( func, 1, i, j ); 769 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); 770 emit_coef_dady( func, 3, i, j ); 771 emit_mul( func, 0, 1 ); /* x * dadx */ 772 emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); 773 emit_coef_a0( func, 5, i, j ); 774 emit_rcp( func, 4, 4 ); /* 1.0 / w */ 775 emit_mul( func, 2, 3 ); /* y * dady */ 776 emit_add( func, 0, 5 ); /* x * dadx + a0 */ 777 emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ 778 emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ 779 emit_inputs( func, 0, i, j ); 780 break; 781 782 default: 783 assert( 0 ); 784 break; 785 } 786 } 787 } 788 } 789#endif 790 } 791} 792 793 794 795static void 796emit_prologue(struct ppc_function *func) 797{ 798 /* XXX set up stack frame */ 799} 800 801 802static void 803emit_epilogue(struct ppc_function *func) 804{ 805 ppc_return(func); 806 /* XXX restore prev stack frame */ 807} 808 809 810 811/** 812 * Translate a TGSI vertex/fragment shader to PPC code. 813 * 814 * \param tokens the TGSI input shader 815 * \param func the output PPC code/function 816 * \param immediates buffer to place immediates, later passed to PPC func 817 * \return TRUE for success, FALSE if translation failed 818 */ 819boolean 820tgsi_emit_ppc(const struct tgsi_token *tokens, 821 struct ppc_function *func, 822 float (*immediates)[4], 823 boolean do_swizzles ) 824{ 825 static int use_ppc_asm = -1; 826 struct tgsi_parse_context parse; 827 /*boolean instruction_phase = FALSE;*/ 828 unsigned ok = 1; 829 uint num_immediates = 0; 830 struct gen_context gen; 831 832 if (use_ppc_asm < 0) { 833 /* If GALLIUM_NOPPC is set, don't use PPC codegen */ 834 use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE); 835 } 836 if (!use_ppc_asm) 837 return FALSE; 838 839 util_init_math(); 840 841 gen.f = func; 842 gen.inputs_reg = ppc_reserve_register(func, 3); /* first function param */ 843 gen.outputs_reg = ppc_reserve_register(func, 4); /* second function param */ 844 gen.temps_reg = ppc_reserve_register(func, 5); /* ... */ 845 gen.immed_reg = ppc_reserve_register(func, 6); 846 gen.const_reg = ppc_reserve_register(func, 7); 847 gen.builtins_reg = ppc_reserve_register(func, 8); 848 gen.one_vec = -1; 849 gen.bit31_vec = -1; 850 851 emit_prologue(func); 852 853 tgsi_parse_init( &parse, tokens ); 854 855 while (!tgsi_parse_end_of_tokens(&parse) && ok) { 856 tgsi_parse_token(&parse); 857 858 switch (parse.FullToken.Token.Type) { 859 case TGSI_TOKEN_TYPE_DECLARATION: 860 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { 861 emit_declaration(func, &parse.FullToken.FullDeclaration ); 862 } 863 break; 864 865 case TGSI_TOKEN_TYPE_INSTRUCTION: 866 ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); 867 868 if (!ok) { 869 debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n", 870 parse.FullToken.FullInstruction.Instruction.Opcode, 871 parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? 872 "vertex shader" : "fragment shader"); 873 } 874 break; 875 876 case TGSI_TOKEN_TYPE_IMMEDIATE: 877 /* splat each immediate component into a float[4] vector for SoA */ 878 { 879 const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; 880 float *imm = (float *) immediates; 881 uint i; 882 assert(size <= 4); 883 assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); 884 for (i = 0; i < size; i++) { 885 const float value = 886 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; 887 imm[num_immediates * 4 + 0] = 888 imm[num_immediates * 4 + 1] = 889 imm[num_immediates * 4 + 2] = 890 imm[num_immediates * 4 + 3] = value; 891 num_immediates++; 892 } 893 } 894 break; 895 896 default: 897 ok = 0; 898 assert( 0 ); 899 } 900 } 901 902 emit_epilogue(func); 903 904 tgsi_parse_free( &parse ); 905 906 return ok; 907} 908 909#endif /* PIPE_ARCH_PPC */ 910