lp_bld_tgsi_soa.c revision 85c7ec70ad41c8ada75a4cbace83d16815d3e2c5
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_info.h" 45#include "tgsi/tgsi_parse.h" 46#include "tgsi/tgsi_util.h" 47#include "tgsi/tgsi_exec.h" 48#include "lp_bld_type.h" 49#include "lp_bld_const.h" 50#include "lp_bld_arit.h" 51#include "lp_bld_logic.h" 52#include "lp_bld_swizzle.h" 53#include "lp_bld_flow.h" 54#include "lp_bld_tgsi.h" 55 56 57#define LP_MAX_TEMPS 256 58#define LP_MAX_IMMEDIATES 256 59 60 61#define FOR_EACH_CHANNEL( CHAN )\ 62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 63 64#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 66 67#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 69 70#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 71 FOR_EACH_CHANNEL( CHAN )\ 72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 73 74#define CHAN_X 0 75#define CHAN_Y 1 76#define CHAN_Z 2 77#define CHAN_W 3 78 79#define QUAD_TOP_LEFT 0 80#define QUAD_TOP_RIGHT 1 81#define QUAD_BOTTOM_LEFT 2 82#define QUAD_BOTTOM_RIGHT 3 83 84 85struct lp_build_tgsi_soa_context 86{ 87 struct lp_build_context base; 88 89 LLVMValueRef consts_ptr; 90 const LLVMValueRef *pos; 91 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 92 LLVMValueRef (*outputs)[NUM_CHANNELS]; 93 94 struct lp_build_sampler_soa *sampler; 95 96 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; 97 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; 98 99 struct lp_build_mask_context *mask; 100}; 101 102 103static const unsigned char 104swizzle_left[4] = { 105 QUAD_TOP_LEFT, QUAD_TOP_LEFT, 106 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT 107}; 108 109static const unsigned char 110swizzle_right[4] = { 111 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, 112 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT 113}; 114 115static const unsigned char 116swizzle_top[4] = { 117 QUAD_TOP_LEFT, QUAD_TOP_RIGHT, 118 QUAD_TOP_LEFT, QUAD_TOP_RIGHT 119}; 120 121static const unsigned char 122swizzle_bottom[4] = { 123 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, 124 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT 125}; 126 127 128static LLVMValueRef 129emit_ddx(struct lp_build_tgsi_soa_context *bld, 130 LLVMValueRef src) 131{ 132 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); 133 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); 134 return lp_build_sub(&bld->base, src_right, src_left); 135} 136 137 138static LLVMValueRef 139emit_ddy(struct lp_build_tgsi_soa_context *bld, 140 LLVMValueRef src) 141{ 142 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); 143 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); 144 return lp_build_sub(&bld->base, src_top, src_bottom); 145} 146 147 148/** 149 * Register fetch. 150 */ 151static LLVMValueRef 152emit_fetch( 153 struct lp_build_tgsi_soa_context *bld, 154 const struct tgsi_full_instruction *inst, 155 unsigned index, 156 const unsigned chan_index ) 157{ 158 const struct tgsi_full_src_register *reg = &inst->Src[index]; 159 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 160 LLVMValueRef res; 161 162 switch (swizzle) { 163 case TGSI_SWIZZLE_X: 164 case TGSI_SWIZZLE_Y: 165 case TGSI_SWIZZLE_Z: 166 case TGSI_SWIZZLE_W: 167 168 switch (reg->Register.File) { 169 case TGSI_FILE_CONSTANT: { 170 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); 171 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); 172 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 173 res = lp_build_broadcast_scalar(&bld->base, scalar); 174 break; 175 } 176 177 case TGSI_FILE_IMMEDIATE: 178 res = bld->immediates[reg->Register.Index][swizzle]; 179 assert(res); 180 break; 181 182 case TGSI_FILE_INPUT: 183 res = bld->inputs[reg->Register.Index][swizzle]; 184 assert(res); 185 break; 186 187 case TGSI_FILE_TEMPORARY: 188 res = LLVMBuildLoad(bld->base.builder, bld->temps[reg->Register.Index][swizzle], ""); 189 if(!res) 190 return bld->base.undef; 191 break; 192 193 default: 194 assert( 0 ); 195 return bld->base.undef; 196 } 197 break; 198 199 default: 200 assert( 0 ); 201 return bld->base.undef; 202 } 203 204 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 205 case TGSI_UTIL_SIGN_CLEAR: 206 res = lp_build_abs( &bld->base, res ); 207 break; 208 209 case TGSI_UTIL_SIGN_SET: 210 /* TODO: Use bitwese OR for floating point */ 211 res = lp_build_abs( &bld->base, res ); 212 res = LLVMBuildNeg( bld->base.builder, res, "" ); 213 break; 214 215 case TGSI_UTIL_SIGN_TOGGLE: 216 res = LLVMBuildNeg( bld->base.builder, res, "" ); 217 break; 218 219 case TGSI_UTIL_SIGN_KEEP: 220 break; 221 } 222 223 return res; 224} 225 226 227/** 228 * Register fetch with derivatives. 229 */ 230static void 231emit_fetch_deriv( 232 struct lp_build_tgsi_soa_context *bld, 233 const struct tgsi_full_instruction *inst, 234 unsigned index, 235 const unsigned chan_index, 236 LLVMValueRef *res, 237 LLVMValueRef *ddx, 238 LLVMValueRef *ddy) 239{ 240 LLVMValueRef src; 241 242 src = emit_fetch(bld, inst, index, chan_index); 243 244 if(res) 245 *res = src; 246 247 /* TODO: use interpolation coeffs for inputs */ 248 249 if(ddx) 250 *ddx = emit_ddx(bld, src); 251 252 if(ddy) 253 *ddy = emit_ddy(bld, src); 254} 255 256 257/** 258 * Register store. 259 */ 260static void 261emit_store( 262 struct lp_build_tgsi_soa_context *bld, 263 const struct tgsi_full_instruction *inst, 264 unsigned index, 265 unsigned chan_index, 266 LLVMValueRef value) 267{ 268 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 269 270 switch( inst->Instruction.Saturate ) { 271 case TGSI_SAT_NONE: 272 break; 273 274 case TGSI_SAT_ZERO_ONE: 275 value = lp_build_max(&bld->base, value, bld->base.zero); 276 value = lp_build_min(&bld->base, value, bld->base.one); 277 break; 278 279 case TGSI_SAT_MINUS_PLUS_ONE: 280 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0)); 281 value = lp_build_min(&bld->base, value, bld->base.one); 282 break; 283 284 default: 285 assert(0); 286 } 287 288 switch( reg->Register.File ) { 289 case TGSI_FILE_OUTPUT: 290 LLVMBuildStore(bld->base.builder, value, 291 bld->outputs[reg->Register.Index][chan_index]); 292 break; 293 294 case TGSI_FILE_TEMPORARY: 295 LLVMBuildStore(bld->base.builder, value, 296 bld->temps[reg->Register.Index][chan_index]); 297 break; 298 299 case TGSI_FILE_ADDRESS: 300 /* FIXME */ 301 assert(0); 302 break; 303 304 default: 305 assert( 0 ); 306 } 307} 308 309 310/** 311 * High-level instruction translators. 312 */ 313 314 315static void 316emit_tex( struct lp_build_tgsi_soa_context *bld, 317 const struct tgsi_full_instruction *inst, 318 boolean apply_lodbias, 319 boolean projected, 320 LLVMValueRef *texel) 321{ 322 const uint unit = inst->Src[1].Register.Index; 323 LLVMValueRef lodbias; 324 LLVMValueRef oow = NULL; 325 LLVMValueRef coords[3]; 326 unsigned num_coords; 327 unsigned i; 328 329 switch (inst->Texture.Texture) { 330 case TGSI_TEXTURE_1D: 331 num_coords = 1; 332 break; 333 case TGSI_TEXTURE_2D: 334 case TGSI_TEXTURE_RECT: 335 num_coords = 2; 336 break; 337 case TGSI_TEXTURE_SHADOW1D: 338 case TGSI_TEXTURE_SHADOW2D: 339 case TGSI_TEXTURE_SHADOWRECT: 340 case TGSI_TEXTURE_3D: 341 case TGSI_TEXTURE_CUBE: 342 num_coords = 3; 343 break; 344 default: 345 assert(0); 346 return; 347 } 348 349 if(apply_lodbias) 350 lodbias = emit_fetch( bld, inst, 0, 3 ); 351 else 352 lodbias = bld->base.zero; 353 354 if (projected) { 355 oow = emit_fetch( bld, inst, 0, 3 ); 356 oow = lp_build_rcp(&bld->base, oow); 357 } 358 359 for (i = 0; i < num_coords; i++) { 360 coords[i] = emit_fetch( bld, inst, 0, i ); 361 if (projected) 362 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 363 } 364 for (i = num_coords; i < 3; i++) { 365 coords[i] = bld->base.undef; 366 } 367 368 bld->sampler->emit_fetch_texel(bld->sampler, 369 bld->base.builder, 370 bld->base.type, 371 unit, num_coords, coords, lodbias, 372 texel); 373} 374 375 376static void 377emit_kil( 378 struct lp_build_tgsi_soa_context *bld, 379 const struct tgsi_full_instruction *inst ) 380{ 381 const struct tgsi_full_src_register *reg = &inst->Src[0]; 382 LLVMValueRef terms[NUM_CHANNELS]; 383 LLVMValueRef mask; 384 unsigned chan_index; 385 386 memset(&terms, 0, sizeof terms); 387 388 FOR_EACH_CHANNEL( chan_index ) { 389 unsigned swizzle; 390 391 /* Unswizzle channel */ 392 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 393 394 /* Check if the component has not been already tested. */ 395 assert(swizzle < NUM_CHANNELS); 396 if( !terms[swizzle] ) 397 /* TODO: change the comparison operator instead of setting the sign */ 398 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 399 } 400 401 mask = NULL; 402 FOR_EACH_CHANNEL( chan_index ) { 403 if(terms[chan_index]) { 404 LLVMValueRef chan_mask; 405 406 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 407 408 if(mask) 409 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); 410 else 411 mask = chan_mask; 412 } 413 } 414 415 if(mask) 416 lp_build_mask_update(bld->mask, mask); 417} 418 419 420/** 421 * Check if inst src/dest regs use indirect addressing into temporary 422 * register file. 423 */ 424static boolean 425indirect_temp_reference(const struct tgsi_full_instruction *inst) 426{ 427 uint i; 428 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 429 const struct tgsi_full_src_register *reg = &inst->Src[i]; 430 if (reg->Register.File == TGSI_FILE_TEMPORARY && 431 reg->Register.Indirect) 432 return TRUE; 433 } 434 for (i = 0; i < inst->Instruction.NumDstRegs; i++) { 435 const struct tgsi_full_dst_register *reg = &inst->Dst[i]; 436 if (reg->Register.File == TGSI_FILE_TEMPORARY && 437 reg->Register.Indirect) 438 return TRUE; 439 } 440 return FALSE; 441} 442 443static int 444emit_declaration( 445 struct lp_build_tgsi_soa_context *bld, 446 const struct tgsi_full_declaration *decl) 447{ 448 unsigned first = decl->Range.First; 449 unsigned last = decl->Range.Last; 450 unsigned idx, i; 451 452 for (idx = first; idx <= last; ++idx) { 453 boolean ok; 454 455 switch (decl->Declaration.File) { 456 case TGSI_FILE_TEMPORARY: 457 for (i = 0; i < NUM_CHANNELS; i++) 458 bld->temps[idx][i] = lp_build_alloca(&bld->base); 459 ok = TRUE; 460 break; 461 462 case TGSI_FILE_OUTPUT: 463 for (i = 0; i < NUM_CHANNELS; i++) 464 bld->outputs[idx][i] = lp_build_alloca(&bld->base); 465 ok = TRUE; 466 break; 467 468 default: 469 /* don't need to declare other vars */ 470 ok = TRUE; 471 } 472 473 if (!ok) 474 return FALSE; 475 } 476 477 return TRUE; 478} 479 480static int 481emit_instruction( 482 struct lp_build_tgsi_soa_context *bld, 483 const struct tgsi_full_instruction *inst, 484 const struct tgsi_opcode_info *info) 485{ 486 unsigned chan_index; 487 LLVMValueRef src0, src1, src2; 488 LLVMValueRef tmp0, tmp1, tmp2; 489 LLVMValueRef tmp3 = NULL; 490 LLVMValueRef tmp4 = NULL; 491 LLVMValueRef tmp5 = NULL; 492 LLVMValueRef tmp6 = NULL; 493 LLVMValueRef tmp7 = NULL; 494 LLVMValueRef res; 495 LLVMValueRef dst0[NUM_CHANNELS]; 496 497 /* we can't handle indirect addressing into temp register file yet */ 498 if (indirect_temp_reference(inst)) 499 return FALSE; 500 501 assert(info->num_dst <= 1); 502 if(info->num_dst) { 503 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 504 dst0[chan_index] = bld->base.undef; 505 } 506 } 507 508 switch (inst->Instruction.Opcode) { 509#if 0 510 case TGSI_OPCODE_ARL: 511 /* FIXME */ 512 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 513 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 514 emit_flr(bld, 0, 0); 515 emit_f2it( bld, 0 ); 516 dst0[chan_index] = tmp0; 517 } 518 break; 519#endif 520 521 case TGSI_OPCODE_MOV: 522 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 523 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 524 } 525 break; 526 527 case TGSI_OPCODE_LIT: 528 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 529 dst0[CHAN_X] = bld->base.one; 530 } 531 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 532 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 533 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 534 } 535 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 536 /* XMM[1] = SrcReg[0].yyyy */ 537 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 538 /* XMM[1] = max(XMM[1], 0) */ 539 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 540 /* XMM[2] = SrcReg[0].wwww */ 541 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 542 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 543 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 544 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 545 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 546 } 547 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 548 dst0[CHAN_W] = bld->base.one; 549 } 550 break; 551 552 case TGSI_OPCODE_RCP: 553 /* TGSI_OPCODE_RECIP */ 554 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 555 res = lp_build_rcp(&bld->base, src0); 556 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 557 dst0[chan_index] = res; 558 } 559 break; 560 561 case TGSI_OPCODE_RSQ: 562 /* TGSI_OPCODE_RECIPSQRT */ 563 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 564 src0 = lp_build_abs(&bld->base, src0); 565 res = lp_build_rsqrt(&bld->base, src0); 566 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 567 dst0[chan_index] = res; 568 } 569 break; 570 571 case TGSI_OPCODE_EXP: 572 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 573 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 574 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 575 LLVMValueRef *p_exp2_int_part = NULL; 576 LLVMValueRef *p_frac_part = NULL; 577 LLVMValueRef *p_exp2 = NULL; 578 579 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 580 581 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 582 p_exp2_int_part = &tmp0; 583 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 584 p_frac_part = &tmp1; 585 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 586 p_exp2 = &tmp2; 587 588 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 589 590 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 591 dst0[CHAN_X] = tmp0; 592 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 593 dst0[CHAN_Y] = tmp1; 594 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 595 dst0[CHAN_Z] = tmp2; 596 } 597 /* dst.w = 1.0 */ 598 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 599 dst0[CHAN_W] = bld->base.one; 600 } 601 break; 602 603 case TGSI_OPCODE_LOG: 604 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 605 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 606 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 607 LLVMValueRef *p_floor_log2 = NULL; 608 LLVMValueRef *p_exp = NULL; 609 LLVMValueRef *p_log2 = NULL; 610 611 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 612 src0 = lp_build_abs( &bld->base, src0 ); 613 614 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 615 p_floor_log2 = &tmp0; 616 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 617 p_exp = &tmp1; 618 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 619 p_log2 = &tmp2; 620 621 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 622 623 /* dst.x = floor(lg2(abs(src.x))) */ 624 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 625 dst0[CHAN_X] = tmp0; 626 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 627 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 628 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 629 } 630 /* dst.z = lg2(abs(src.x)) */ 631 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 632 dst0[CHAN_Z] = tmp2; 633 } 634 /* dst.w = 1.0 */ 635 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 636 dst0[CHAN_W] = bld->base.one; 637 } 638 break; 639 640 case TGSI_OPCODE_MUL: 641 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 642 src0 = emit_fetch( bld, inst, 0, chan_index ); 643 src1 = emit_fetch( bld, inst, 1, chan_index ); 644 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 645 } 646 break; 647 648 case TGSI_OPCODE_ADD: 649 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 650 src0 = emit_fetch( bld, inst, 0, chan_index ); 651 src1 = emit_fetch( bld, inst, 1, chan_index ); 652 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 653 } 654 break; 655 656 case TGSI_OPCODE_DP3: 657 /* TGSI_OPCODE_DOT3 */ 658 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 659 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 660 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 661 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 662 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 663 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 664 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 665 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 666 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 667 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 668 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 669 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 670 dst0[chan_index] = tmp0; 671 } 672 break; 673 674 case TGSI_OPCODE_DP4: 675 /* TGSI_OPCODE_DOT4 */ 676 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 677 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 678 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 679 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 680 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 681 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 682 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 683 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 684 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 685 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 686 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 687 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 688 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 689 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 690 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 691 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 692 dst0[chan_index] = tmp0; 693 } 694 break; 695 696 case TGSI_OPCODE_DST: 697 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 698 dst0[CHAN_X] = bld->base.one; 699 } 700 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 701 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 702 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 703 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 704 } 705 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 706 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 707 } 708 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 709 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 710 } 711 break; 712 713 case TGSI_OPCODE_MIN: 714 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 715 src0 = emit_fetch( bld, inst, 0, chan_index ); 716 src1 = emit_fetch( bld, inst, 1, chan_index ); 717 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 718 } 719 break; 720 721 case TGSI_OPCODE_MAX: 722 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 723 src0 = emit_fetch( bld, inst, 0, chan_index ); 724 src1 = emit_fetch( bld, inst, 1, chan_index ); 725 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 726 } 727 break; 728 729 case TGSI_OPCODE_SLT: 730 /* TGSI_OPCODE_SETLT */ 731 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 732 src0 = emit_fetch( bld, inst, 0, chan_index ); 733 src1 = emit_fetch( bld, inst, 1, chan_index ); 734 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 735 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 736 } 737 break; 738 739 case TGSI_OPCODE_SGE: 740 /* TGSI_OPCODE_SETGE */ 741 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 742 src0 = emit_fetch( bld, inst, 0, chan_index ); 743 src1 = emit_fetch( bld, inst, 1, chan_index ); 744 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 745 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 746 } 747 break; 748 749 case TGSI_OPCODE_MAD: 750 /* TGSI_OPCODE_MADD */ 751 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 752 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 753 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 754 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 755 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 756 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 757 dst0[chan_index] = tmp0; 758 } 759 break; 760 761 case TGSI_OPCODE_SUB: 762 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 763 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 764 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 765 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 766 } 767 break; 768 769 case TGSI_OPCODE_LRP: 770 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 771 src0 = emit_fetch( bld, inst, 0, chan_index ); 772 src1 = emit_fetch( bld, inst, 1, chan_index ); 773 src2 = emit_fetch( bld, inst, 2, chan_index ); 774 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 775 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 776 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 777 } 778 break; 779 780 case TGSI_OPCODE_CND: 781 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 782 src0 = emit_fetch( bld, inst, 0, chan_index ); 783 src1 = emit_fetch( bld, inst, 1, chan_index ); 784 src2 = emit_fetch( bld, inst, 2, chan_index ); 785 tmp1 = lp_build_const_scalar(bld->base.type, 0.5); 786 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 787 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 788 } 789 break; 790 791 case TGSI_OPCODE_DP2A: 792 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 793 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 794 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 795 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 796 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 797 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 798 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 799 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 800 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 801 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 802 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 803 } 804 break; 805 806 case TGSI_OPCODE_FRC: 807 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 808 src0 = emit_fetch( bld, inst, 0, chan_index ); 809 tmp0 = lp_build_floor(&bld->base, src0); 810 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 811 dst0[chan_index] = tmp0; 812 } 813 break; 814 815 case TGSI_OPCODE_CLAMP: 816 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 817 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 818 src1 = emit_fetch( bld, inst, 1, chan_index ); 819 src2 = emit_fetch( bld, inst, 2, chan_index ); 820 tmp0 = lp_build_max(&bld->base, tmp0, src1); 821 tmp0 = lp_build_min(&bld->base, tmp0, src2); 822 dst0[chan_index] = tmp0; 823 } 824 break; 825 826 case TGSI_OPCODE_FLR: 827 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 828 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 829 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 830 } 831 break; 832 833 case TGSI_OPCODE_ROUND: 834 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 835 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 836 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 837 } 838 break; 839 840 case TGSI_OPCODE_EX2: { 841 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 842 tmp0 = lp_build_exp2( &bld->base, tmp0); 843 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 844 dst0[chan_index] = tmp0; 845 } 846 break; 847 } 848 849 case TGSI_OPCODE_LG2: 850 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 851 tmp0 = lp_build_log2( &bld->base, tmp0); 852 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 853 dst0[chan_index] = tmp0; 854 } 855 break; 856 857 case TGSI_OPCODE_POW: 858 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 859 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 860 res = lp_build_pow( &bld->base, src0, src1 ); 861 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 862 dst0[chan_index] = res; 863 } 864 break; 865 866 case TGSI_OPCODE_XPD: 867 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 868 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 869 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 870 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 871 } 872 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 873 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 874 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 875 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 876 } 877 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 878 tmp2 = tmp0; 879 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 880 tmp5 = tmp3; 881 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 882 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 883 dst0[CHAN_X] = tmp2; 884 } 885 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 886 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 887 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 888 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 889 } 890 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 891 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 892 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 893 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 894 dst0[CHAN_Y] = tmp3; 895 } 896 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 897 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 898 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 899 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 900 dst0[CHAN_Z] = tmp5; 901 } 902 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 903 dst0[CHAN_W] = bld->base.one; 904 } 905 break; 906 907 case TGSI_OPCODE_ABS: 908 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 909 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 910 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 911 } 912 break; 913 914 case TGSI_OPCODE_RCC: 915 /* deprecated? */ 916 assert(0); 917 return 0; 918 919 case TGSI_OPCODE_DPH: 920 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 921 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 922 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 923 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 924 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 925 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 926 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 927 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 928 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 929 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 930 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 931 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 932 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 933 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 934 dst0[chan_index] = tmp0; 935 } 936 break; 937 938 case TGSI_OPCODE_COS: 939 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 940 tmp0 = lp_build_cos( &bld->base, tmp0 ); 941 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 942 dst0[chan_index] = tmp0; 943 } 944 break; 945 946 case TGSI_OPCODE_DDX: 947 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 948 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 949 } 950 break; 951 952 case TGSI_OPCODE_DDY: 953 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 954 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 955 } 956 break; 957 958 case TGSI_OPCODE_KILP: 959 /* predicated kill */ 960 /* FIXME */ 961 return 0; 962 break; 963 964 case TGSI_OPCODE_KIL: 965 /* conditional kill */ 966 emit_kil( bld, inst ); 967 break; 968 969 case TGSI_OPCODE_PK2H: 970 return 0; 971 break; 972 973 case TGSI_OPCODE_PK2US: 974 return 0; 975 break; 976 977 case TGSI_OPCODE_PK4B: 978 return 0; 979 break; 980 981 case TGSI_OPCODE_PK4UB: 982 return 0; 983 break; 984 985 case TGSI_OPCODE_RFL: 986 return 0; 987 break; 988 989 case TGSI_OPCODE_SEQ: 990 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 991 src0 = emit_fetch( bld, inst, 0, chan_index ); 992 src1 = emit_fetch( bld, inst, 1, chan_index ); 993 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 994 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 995 } 996 break; 997 998 case TGSI_OPCODE_SFL: 999 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1000 dst0[chan_index] = bld->base.zero; 1001 } 1002 break; 1003 1004 case TGSI_OPCODE_SGT: 1005 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1006 src0 = emit_fetch( bld, inst, 0, chan_index ); 1007 src1 = emit_fetch( bld, inst, 1, chan_index ); 1008 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1009 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1010 } 1011 break; 1012 1013 case TGSI_OPCODE_SIN: 1014 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1015 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1016 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1017 dst0[chan_index] = tmp0; 1018 } 1019 break; 1020 1021 case TGSI_OPCODE_SLE: 1022 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1023 src0 = emit_fetch( bld, inst, 0, chan_index ); 1024 src1 = emit_fetch( bld, inst, 1, chan_index ); 1025 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1026 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1027 } 1028 break; 1029 1030 case TGSI_OPCODE_SNE: 1031 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1032 src0 = emit_fetch( bld, inst, 0, chan_index ); 1033 src1 = emit_fetch( bld, inst, 1, chan_index ); 1034 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1035 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1036 } 1037 break; 1038 1039 case TGSI_OPCODE_STR: 1040 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1041 dst0[chan_index] = bld->base.one; 1042 } 1043 break; 1044 1045 case TGSI_OPCODE_TEX: 1046 emit_tex( bld, inst, FALSE, FALSE, dst0 ); 1047 break; 1048 1049 case TGSI_OPCODE_TXD: 1050 /* FIXME */ 1051 return 0; 1052 break; 1053 1054 case TGSI_OPCODE_UP2H: 1055 /* deprecated */ 1056 assert (0); 1057 return 0; 1058 break; 1059 1060 case TGSI_OPCODE_UP2US: 1061 /* deprecated */ 1062 assert(0); 1063 return 0; 1064 break; 1065 1066 case TGSI_OPCODE_UP4B: 1067 /* deprecated */ 1068 assert(0); 1069 return 0; 1070 break; 1071 1072 case TGSI_OPCODE_UP4UB: 1073 /* deprecated */ 1074 assert(0); 1075 return 0; 1076 break; 1077 1078 case TGSI_OPCODE_X2D: 1079 /* deprecated? */ 1080 assert(0); 1081 return 0; 1082 break; 1083 1084 case TGSI_OPCODE_ARA: 1085 /* deprecated */ 1086 assert(0); 1087 return 0; 1088 break; 1089 1090#if 0 1091 case TGSI_OPCODE_ARR: 1092 /* FIXME */ 1093 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1094 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1095 emit_rnd( bld, 0, 0 ); 1096 emit_f2it( bld, 0 ); 1097 dst0[chan_index] = tmp0; 1098 } 1099 break; 1100#endif 1101 1102 case TGSI_OPCODE_BRA: 1103 /* deprecated */ 1104 assert(0); 1105 return 0; 1106 break; 1107 1108 case TGSI_OPCODE_CAL: 1109 /* FIXME */ 1110 return 0; 1111 break; 1112 1113 case TGSI_OPCODE_RET: 1114 /* FIXME */ 1115 return 0; 1116 break; 1117 1118 case TGSI_OPCODE_END: 1119 break; 1120 1121 case TGSI_OPCODE_SSG: 1122 /* TGSI_OPCODE_SGN */ 1123 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1124 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1125 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 1126 } 1127 break; 1128 1129 case TGSI_OPCODE_CMP: 1130 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1131 src0 = emit_fetch( bld, inst, 0, chan_index ); 1132 src1 = emit_fetch( bld, inst, 1, chan_index ); 1133 src2 = emit_fetch( bld, inst, 2, chan_index ); 1134 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 1135 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 1136 } 1137 break; 1138 1139 case TGSI_OPCODE_SCS: 1140 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1141 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1142 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 1143 } 1144 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1145 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1146 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 1147 } 1148 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1149 dst0[CHAN_Z] = bld->base.zero; 1150 } 1151 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1152 dst0[CHAN_W] = bld->base.one; 1153 } 1154 break; 1155 1156 case TGSI_OPCODE_TXB: 1157 emit_tex( bld, inst, TRUE, FALSE, dst0 ); 1158 break; 1159 1160 case TGSI_OPCODE_NRM: 1161 /* fall-through */ 1162 case TGSI_OPCODE_NRM4: 1163 /* 3 or 4-component normalization */ 1164 { 1165 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1166 1167 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 1168 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 1169 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 1170 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 1171 1172 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1173 1174 /* xmm4 = src.x */ 1175 /* xmm0 = src.x * src.x */ 1176 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1177 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1178 tmp4 = tmp0; 1179 } 1180 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 1181 1182 /* xmm5 = src.y */ 1183 /* xmm0 = xmm0 + src.y * src.y */ 1184 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 1185 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1186 tmp5 = tmp1; 1187 } 1188 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1189 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1190 1191 /* xmm6 = src.z */ 1192 /* xmm0 = xmm0 + src.z * src.z */ 1193 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 1194 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1195 tmp6 = tmp1; 1196 } 1197 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1198 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1199 1200 if (dims == 4) { 1201 /* xmm7 = src.w */ 1202 /* xmm0 = xmm0 + src.w * src.w */ 1203 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 1204 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 1205 tmp7 = tmp1; 1206 } 1207 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1208 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1209 } 1210 1211 /* xmm1 = 1 / sqrt(xmm0) */ 1212 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 1213 1214 /* dst.x = xmm1 * src.x */ 1215 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1216 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 1217 } 1218 1219 /* dst.y = xmm1 * src.y */ 1220 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1221 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 1222 } 1223 1224 /* dst.z = xmm1 * src.z */ 1225 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1226 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 1227 } 1228 1229 /* dst.w = xmm1 * src.w */ 1230 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 1231 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 1232 } 1233 } 1234 1235 /* dst.w = 1.0 */ 1236 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 1237 dst0[CHAN_W] = bld->base.one; 1238 } 1239 } 1240 break; 1241 1242 case TGSI_OPCODE_DIV: 1243 /* deprecated */ 1244 assert( 0 ); 1245 return 0; 1246 break; 1247 1248 case TGSI_OPCODE_DP2: 1249 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1250 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1251 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1252 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1253 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1254 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1255 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1256 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1257 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1258 } 1259 break; 1260 1261 case TGSI_OPCODE_TXL: 1262 emit_tex( bld, inst, TRUE, FALSE, dst0 ); 1263 break; 1264 1265 case TGSI_OPCODE_TXP: 1266 emit_tex( bld, inst, FALSE, TRUE, dst0 ); 1267 break; 1268 1269 case TGSI_OPCODE_BRK: 1270 /* FIXME */ 1271 return 0; 1272 break; 1273 1274 case TGSI_OPCODE_IF: 1275 /* FIXME */ 1276 return 0; 1277 break; 1278 1279 case TGSI_OPCODE_BGNFOR: 1280 /* deprecated */ 1281 assert(0); 1282 return 0; 1283 break; 1284 1285 case TGSI_OPCODE_REP: 1286 /* deprecated */ 1287 assert(0); 1288 return 0; 1289 break; 1290 1291 case TGSI_OPCODE_ELSE: 1292 /* FIXME */ 1293 return 0; 1294 break; 1295 1296 case TGSI_OPCODE_ENDIF: 1297 /* FIXME */ 1298 return 0; 1299 break; 1300 1301 case TGSI_OPCODE_ENDFOR: 1302 /* deprecated */ 1303 assert(0); 1304 return 0; 1305 break; 1306 1307 case TGSI_OPCODE_ENDREP: 1308 /* deprecated */ 1309 assert(0); 1310 return 0; 1311 break; 1312 1313 case TGSI_OPCODE_PUSHA: 1314 /* deprecated? */ 1315 assert(0); 1316 return 0; 1317 break; 1318 1319 case TGSI_OPCODE_POPA: 1320 /* deprecated? */ 1321 assert(0); 1322 return 0; 1323 break; 1324 1325 case TGSI_OPCODE_CEIL: 1326 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1327 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1328 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 1329 } 1330 break; 1331 1332 case TGSI_OPCODE_I2F: 1333 /* deprecated? */ 1334 assert(0); 1335 return 0; 1336 break; 1337 1338 case TGSI_OPCODE_NOT: 1339 /* deprecated? */ 1340 assert(0); 1341 return 0; 1342 break; 1343 1344 case TGSI_OPCODE_TRUNC: 1345 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1346 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1347 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 1348 } 1349 break; 1350 1351 case TGSI_OPCODE_SHL: 1352 /* deprecated? */ 1353 assert(0); 1354 return 0; 1355 break; 1356 1357 case TGSI_OPCODE_ISHR: 1358 /* deprecated? */ 1359 assert(0); 1360 return 0; 1361 break; 1362 1363 case TGSI_OPCODE_AND: 1364 /* deprecated? */ 1365 assert(0); 1366 return 0; 1367 break; 1368 1369 case TGSI_OPCODE_OR: 1370 /* deprecated? */ 1371 assert(0); 1372 return 0; 1373 break; 1374 1375 case TGSI_OPCODE_MOD: 1376 /* deprecated? */ 1377 assert(0); 1378 return 0; 1379 break; 1380 1381 case TGSI_OPCODE_XOR: 1382 /* deprecated? */ 1383 assert(0); 1384 return 0; 1385 break; 1386 1387 case TGSI_OPCODE_SAD: 1388 /* deprecated? */ 1389 assert(0); 1390 return 0; 1391 break; 1392 1393 case TGSI_OPCODE_TXF: 1394 /* deprecated? */ 1395 assert(0); 1396 return 0; 1397 break; 1398 1399 case TGSI_OPCODE_TXQ: 1400 /* deprecated? */ 1401 assert(0); 1402 return 0; 1403 break; 1404 1405 case TGSI_OPCODE_CONT: 1406 /* deprecated? */ 1407 assert(0); 1408 return 0; 1409 break; 1410 1411 case TGSI_OPCODE_EMIT: 1412 return 0; 1413 break; 1414 1415 case TGSI_OPCODE_ENDPRIM: 1416 return 0; 1417 break; 1418 1419 case TGSI_OPCODE_NOP: 1420 break; 1421 1422 default: 1423 return 0; 1424 } 1425 1426 if(info->num_dst) { 1427 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1428 emit_store( bld, inst, 0, chan_index, dst0[chan_index]); 1429 } 1430 } 1431 1432 return 1; 1433} 1434 1435 1436void 1437lp_build_tgsi_soa(LLVMBuilderRef builder, 1438 const struct tgsi_token *tokens, 1439 struct lp_type type, 1440 struct lp_build_mask_context *mask, 1441 LLVMValueRef consts_ptr, 1442 const LLVMValueRef *pos, 1443 const LLVMValueRef (*inputs)[NUM_CHANNELS], 1444 LLVMValueRef (*outputs)[NUM_CHANNELS], 1445 struct lp_build_sampler_soa *sampler) 1446{ 1447 struct lp_build_tgsi_soa_context bld; 1448 struct tgsi_parse_context parse; 1449 uint num_immediates = 0; 1450 unsigned i; 1451 1452 /* Setup build context */ 1453 memset(&bld, 0, sizeof bld); 1454 lp_build_context_init(&bld.base, builder, type); 1455 bld.mask = mask; 1456 bld.pos = pos; 1457 bld.inputs = inputs; 1458 bld.outputs = outputs; 1459 bld.consts_ptr = consts_ptr; 1460 bld.sampler = sampler; 1461 1462 tgsi_parse_init( &parse, tokens ); 1463 1464 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1465 tgsi_parse_token( &parse ); 1466 1467 switch( parse.FullToken.Token.Type ) { 1468 case TGSI_TOKEN_TYPE_DECLARATION: 1469 /* Inputs already interpolated */ 1470 { 1471 if (!emit_declaration( &bld, &parse.FullToken.FullDeclaration )) 1472 _debug_printf("warning: failed to define LLVM variable\n"); 1473 } 1474 break; 1475 1476 case TGSI_TOKEN_TYPE_INSTRUCTION: 1477 { 1478 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 1479 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); 1480 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) 1481 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 1482 info ? info->mnemonic : "<invalid>"); 1483 } 1484 1485 break; 1486 1487 case TGSI_TOKEN_TYPE_IMMEDIATE: 1488 /* simply copy the immediate values into the next immediates[] slot */ 1489 { 1490 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1491 assert(size <= 4); 1492 assert(num_immediates < LP_MAX_IMMEDIATES); 1493 for( i = 0; i < size; ++i ) 1494 bld.immediates[num_immediates][i] = 1495 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float); 1496 for( i = size; i < 4; ++i ) 1497 bld.immediates[num_immediates][i] = bld.base.undef; 1498 num_immediates++; 1499 } 1500 break; 1501 1502 default: 1503 assert( 0 ); 1504 } 1505 } 1506 1507 tgsi_parse_free( &parse ); 1508} 1509 1510