lp_bld_tgsi_soa.c revision c61bf363937f40624a5632745630d4f2b9907082
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_info.h" 45#include "tgsi/tgsi_parse.h" 46#include "tgsi/tgsi_util.h" 47#include "tgsi/tgsi_exec.h" 48#include "lp_bld_type.h" 49#include "lp_bld_const.h" 50#include "lp_bld_arit.h" 51#include "lp_bld_logic.h" 52#include "lp_bld_swizzle.h" 53#include "lp_bld_flow.h" 54#include "lp_bld_tgsi.h" 55 56 57#define LP_MAX_TEMPS 256 58#define LP_MAX_IMMEDIATES 256 59 60 61#define FOR_EACH_CHANNEL( CHAN )\ 62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 63 64#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 66 67#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 69 70#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 71 FOR_EACH_CHANNEL( CHAN )\ 72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 73 74#define CHAN_X 0 75#define CHAN_Y 1 76#define CHAN_Z 2 77#define CHAN_W 3 78 79#define QUAD_TOP_LEFT 0 80#define QUAD_TOP_RIGHT 1 81#define QUAD_BOTTOM_LEFT 2 82#define QUAD_BOTTOM_RIGHT 3 83 84 85struct lp_build_tgsi_soa_context 86{ 87 struct lp_build_context base; 88 89 LLVMValueRef consts_ptr; 90 const LLVMValueRef *pos; 91 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 92 LLVMValueRef (*outputs)[NUM_CHANNELS]; 93 94 struct lp_build_sampler_soa *sampler; 95 96 LLVMValueRef immediates[LP_MAX_IMMEDIATES][NUM_CHANNELS]; 97 LLVMValueRef temps[LP_MAX_TEMPS][NUM_CHANNELS]; 98 99 struct lp_build_mask_context *mask; 100}; 101 102 103static const unsigned char 104swizzle_left[4] = { 105 QUAD_TOP_LEFT, QUAD_TOP_LEFT, 106 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT 107}; 108 109static const unsigned char 110swizzle_right[4] = { 111 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, 112 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT 113}; 114 115static const unsigned char 116swizzle_top[4] = { 117 QUAD_TOP_LEFT, QUAD_TOP_RIGHT, 118 QUAD_TOP_LEFT, QUAD_TOP_RIGHT 119}; 120 121static const unsigned char 122swizzle_bottom[4] = { 123 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, 124 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT 125}; 126 127 128static LLVMValueRef 129emit_ddx(struct lp_build_tgsi_soa_context *bld, 130 LLVMValueRef src) 131{ 132 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); 133 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); 134 return lp_build_sub(&bld->base, src_right, src_left); 135} 136 137 138static LLVMValueRef 139emit_ddy(struct lp_build_tgsi_soa_context *bld, 140 LLVMValueRef src) 141{ 142 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); 143 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); 144 return lp_build_sub(&bld->base, src_top, src_bottom); 145} 146 147 148/** 149 * Register fetch. 150 */ 151static LLVMValueRef 152emit_fetch( 153 struct lp_build_tgsi_soa_context *bld, 154 const struct tgsi_full_instruction *inst, 155 unsigned index, 156 const unsigned chan_index ) 157{ 158 const struct tgsi_full_src_register *reg = &inst->Src[index]; 159 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 160 LLVMValueRef res; 161 162 switch (swizzle) { 163 case TGSI_SWIZZLE_X: 164 case TGSI_SWIZZLE_Y: 165 case TGSI_SWIZZLE_Z: 166 case TGSI_SWIZZLE_W: 167 168 switch (reg->Register.File) { 169 case TGSI_FILE_CONSTANT: { 170 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); 171 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); 172 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 173 res = lp_build_broadcast_scalar(&bld->base, scalar); 174 break; 175 } 176 177 case TGSI_FILE_IMMEDIATE: 178 res = bld->immediates[reg->Register.Index][swizzle]; 179 assert(res); 180 break; 181 182 case TGSI_FILE_INPUT: 183 res = bld->inputs[reg->Register.Index][swizzle]; 184 assert(res); 185 break; 186 187 case TGSI_FILE_TEMPORARY: 188 res = bld->temps[reg->Register.Index][swizzle]; 189 if(!res) 190 return bld->base.undef; 191 break; 192 193 default: 194 assert( 0 ); 195 return bld->base.undef; 196 } 197 break; 198 199 default: 200 assert( 0 ); 201 return bld->base.undef; 202 } 203 204 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 205 case TGSI_UTIL_SIGN_CLEAR: 206 res = lp_build_abs( &bld->base, res ); 207 break; 208 209 case TGSI_UTIL_SIGN_SET: 210 /* TODO: Use bitwese OR for floating point */ 211 res = lp_build_abs( &bld->base, res ); 212 res = LLVMBuildNeg( bld->base.builder, res, "" ); 213 break; 214 215 case TGSI_UTIL_SIGN_TOGGLE: 216 res = LLVMBuildNeg( bld->base.builder, res, "" ); 217 break; 218 219 case TGSI_UTIL_SIGN_KEEP: 220 break; 221 } 222 223 return res; 224} 225 226 227/** 228 * Register fetch with derivatives. 229 */ 230static void 231emit_fetch_deriv( 232 struct lp_build_tgsi_soa_context *bld, 233 const struct tgsi_full_instruction *inst, 234 unsigned index, 235 const unsigned chan_index, 236 LLVMValueRef *res, 237 LLVMValueRef *ddx, 238 LLVMValueRef *ddy) 239{ 240 LLVMValueRef src; 241 242 src = emit_fetch(bld, inst, index, chan_index); 243 244 if(res) 245 *res = src; 246 247 /* TODO: use interpolation coeffs for inputs */ 248 249 if(ddx) 250 *ddx = emit_ddx(bld, src); 251 252 if(ddy) 253 *ddy = emit_ddy(bld, src); 254} 255 256 257/** 258 * Register store. 259 */ 260static void 261emit_store( 262 struct lp_build_tgsi_soa_context *bld, 263 const struct tgsi_full_instruction *inst, 264 unsigned index, 265 unsigned chan_index, 266 LLVMValueRef value) 267{ 268 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 269 270 switch( inst->Instruction.Saturate ) { 271 case TGSI_SAT_NONE: 272 break; 273 274 case TGSI_SAT_ZERO_ONE: 275 value = lp_build_max(&bld->base, value, bld->base.zero); 276 value = lp_build_min(&bld->base, value, bld->base.one); 277 break; 278 279 case TGSI_SAT_MINUS_PLUS_ONE: 280 value = lp_build_max(&bld->base, value, lp_build_const_scalar(bld->base.type, -1.0)); 281 value = lp_build_min(&bld->base, value, bld->base.one); 282 break; 283 284 default: 285 assert(0); 286 } 287 288 switch( reg->Register.File ) { 289 case TGSI_FILE_OUTPUT: 290 bld->outputs[reg->Register.Index][chan_index] = value; 291 break; 292 293 case TGSI_FILE_TEMPORARY: 294 bld->temps[reg->Register.Index][chan_index] = value; 295 break; 296 297 case TGSI_FILE_ADDRESS: 298 /* FIXME */ 299 assert(0); 300 break; 301 302 default: 303 assert( 0 ); 304 } 305} 306 307 308/** 309 * High-level instruction translators. 310 */ 311 312 313static void 314emit_tex( struct lp_build_tgsi_soa_context *bld, 315 const struct tgsi_full_instruction *inst, 316 boolean apply_lodbias, 317 boolean projected, 318 LLVMValueRef *texel) 319{ 320 const uint unit = inst->Src[1].Register.Index; 321 LLVMValueRef lodbias; 322 LLVMValueRef oow = NULL; 323 LLVMValueRef coords[3]; 324 unsigned num_coords; 325 unsigned i; 326 327 switch (inst->Texture.Texture) { 328 case TGSI_TEXTURE_1D: 329 num_coords = 1; 330 break; 331 case TGSI_TEXTURE_2D: 332 case TGSI_TEXTURE_RECT: 333 num_coords = 2; 334 break; 335 case TGSI_TEXTURE_SHADOW1D: 336 case TGSI_TEXTURE_SHADOW2D: 337 case TGSI_TEXTURE_SHADOWRECT: 338 case TGSI_TEXTURE_3D: 339 case TGSI_TEXTURE_CUBE: 340 num_coords = 3; 341 break; 342 default: 343 assert(0); 344 return; 345 } 346 347 if(apply_lodbias) 348 lodbias = emit_fetch( bld, inst, 0, 3 ); 349 else 350 lodbias = bld->base.zero; 351 352 if (projected) { 353 oow = emit_fetch( bld, inst, 0, 3 ); 354 oow = lp_build_rcp(&bld->base, oow); 355 } 356 357 for (i = 0; i < num_coords; i++) { 358 coords[i] = emit_fetch( bld, inst, 0, i ); 359 if (projected) 360 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 361 } 362 for (i = num_coords; i < 3; i++) { 363 coords[i] = bld->base.undef; 364 } 365 366 bld->sampler->emit_fetch_texel(bld->sampler, 367 bld->base.builder, 368 bld->base.type, 369 unit, num_coords, coords, lodbias, 370 texel); 371} 372 373 374static void 375emit_kil( 376 struct lp_build_tgsi_soa_context *bld, 377 const struct tgsi_full_instruction *inst ) 378{ 379 const struct tgsi_full_src_register *reg = &inst->Src[0]; 380 LLVMValueRef terms[NUM_CHANNELS]; 381 LLVMValueRef mask; 382 unsigned chan_index; 383 384 memset(&terms, 0, sizeof terms); 385 386 FOR_EACH_CHANNEL( chan_index ) { 387 unsigned swizzle; 388 389 /* Unswizzle channel */ 390 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 391 392 /* Check if the component has not been already tested. */ 393 assert(swizzle < NUM_CHANNELS); 394 if( !terms[swizzle] ) 395 /* TODO: change the comparison operator instead of setting the sign */ 396 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 397 } 398 399 mask = NULL; 400 FOR_EACH_CHANNEL( chan_index ) { 401 if(terms[chan_index]) { 402 LLVMValueRef chan_mask; 403 404 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 405 406 if(mask) 407 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); 408 else 409 mask = chan_mask; 410 } 411 } 412 413 if(mask) 414 lp_build_mask_update(bld->mask, mask); 415} 416 417 418/** 419 * Check if inst src/dest regs use indirect addressing into temporary 420 * register file. 421 */ 422static boolean 423indirect_temp_reference(const struct tgsi_full_instruction *inst) 424{ 425 uint i; 426 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 427 const struct tgsi_full_src_register *reg = &inst->Src[i]; 428 if (reg->Register.File == TGSI_FILE_TEMPORARY && 429 reg->Register.Indirect) 430 return TRUE; 431 } 432 for (i = 0; i < inst->Instruction.NumDstRegs; i++) { 433 const struct tgsi_full_dst_register *reg = &inst->Dst[i]; 434 if (reg->Register.File == TGSI_FILE_TEMPORARY && 435 reg->Register.Indirect) 436 return TRUE; 437 } 438 return FALSE; 439} 440 441 442static int 443emit_instruction( 444 struct lp_build_tgsi_soa_context *bld, 445 const struct tgsi_full_instruction *inst, 446 const struct tgsi_opcode_info *info) 447{ 448 unsigned chan_index; 449 LLVMValueRef src0, src1, src2; 450 LLVMValueRef tmp0, tmp1, tmp2; 451 LLVMValueRef tmp3 = NULL; 452 LLVMValueRef tmp4 = NULL; 453 LLVMValueRef tmp5 = NULL; 454 LLVMValueRef tmp6 = NULL; 455 LLVMValueRef tmp7 = NULL; 456 LLVMValueRef res; 457 LLVMValueRef dst0[NUM_CHANNELS]; 458 459 /* we can't handle indirect addressing into temp register file yet */ 460 if (indirect_temp_reference(inst)) 461 return FALSE; 462 463 assert(info->num_dst <= 1); 464 if(info->num_dst) { 465 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 466 dst0[chan_index] = bld->base.undef; 467 } 468 } 469 470 switch (inst->Instruction.Opcode) { 471#if 0 472 case TGSI_OPCODE_ARL: 473 /* FIXME */ 474 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 475 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 476 emit_flr(bld, 0, 0); 477 emit_f2it( bld, 0 ); 478 dst0[chan_index] = tmp0; 479 } 480 break; 481#endif 482 483 case TGSI_OPCODE_MOV: 484 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 485 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 486 } 487 break; 488 489 case TGSI_OPCODE_LIT: 490 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 491 dst0[CHAN_X] = bld->base.one; 492 } 493 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 494 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 495 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 496 } 497 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 498 /* XMM[1] = SrcReg[0].yyyy */ 499 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 500 /* XMM[1] = max(XMM[1], 0) */ 501 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 502 /* XMM[2] = SrcReg[0].wwww */ 503 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 504 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 505 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 506 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 507 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 508 } 509 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 510 dst0[CHAN_W] = bld->base.one; 511 } 512 break; 513 514 case TGSI_OPCODE_RCP: 515 /* TGSI_OPCODE_RECIP */ 516 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 517 res = lp_build_rcp(&bld->base, src0); 518 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 519 dst0[chan_index] = res; 520 } 521 break; 522 523 case TGSI_OPCODE_RSQ: 524 /* TGSI_OPCODE_RECIPSQRT */ 525 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 526 src0 = lp_build_abs(&bld->base, src0); 527 res = lp_build_rsqrt(&bld->base, src0); 528 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 529 dst0[chan_index] = res; 530 } 531 break; 532 533 case TGSI_OPCODE_EXP: 534 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 535 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 536 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 537 LLVMValueRef *p_exp2_int_part = NULL; 538 LLVMValueRef *p_frac_part = NULL; 539 LLVMValueRef *p_exp2 = NULL; 540 541 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 542 543 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 544 p_exp2_int_part = &tmp0; 545 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 546 p_frac_part = &tmp1; 547 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 548 p_exp2 = &tmp2; 549 550 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 551 552 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 553 dst0[CHAN_X] = tmp0; 554 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 555 dst0[CHAN_Y] = tmp1; 556 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 557 dst0[CHAN_Z] = tmp2; 558 } 559 /* dst.w = 1.0 */ 560 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 561 dst0[CHAN_W] = bld->base.one; 562 } 563 break; 564 565 case TGSI_OPCODE_LOG: 566 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 567 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 568 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 569 LLVMValueRef *p_floor_log2 = NULL; 570 LLVMValueRef *p_exp = NULL; 571 LLVMValueRef *p_log2 = NULL; 572 573 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 574 src0 = lp_build_abs( &bld->base, src0 ); 575 576 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 577 p_floor_log2 = &tmp0; 578 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 579 p_exp = &tmp1; 580 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 581 p_log2 = &tmp2; 582 583 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 584 585 /* dst.x = floor(lg2(abs(src.x))) */ 586 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 587 dst0[CHAN_X] = tmp0; 588 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 589 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 590 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 591 } 592 /* dst.z = lg2(abs(src.x)) */ 593 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 594 dst0[CHAN_Z] = tmp2; 595 } 596 /* dst.w = 1.0 */ 597 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 598 dst0[CHAN_W] = bld->base.one; 599 } 600 break; 601 602 case TGSI_OPCODE_MUL: 603 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 604 src0 = emit_fetch( bld, inst, 0, chan_index ); 605 src1 = emit_fetch( bld, inst, 1, chan_index ); 606 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 607 } 608 break; 609 610 case TGSI_OPCODE_ADD: 611 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 612 src0 = emit_fetch( bld, inst, 0, chan_index ); 613 src1 = emit_fetch( bld, inst, 1, chan_index ); 614 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 615 } 616 break; 617 618 case TGSI_OPCODE_DP3: 619 /* TGSI_OPCODE_DOT3 */ 620 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 621 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 622 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 623 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 624 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 625 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 626 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 627 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 628 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 629 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 630 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 631 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 632 dst0[chan_index] = tmp0; 633 } 634 break; 635 636 case TGSI_OPCODE_DP4: 637 /* TGSI_OPCODE_DOT4 */ 638 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 639 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 640 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 641 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 642 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 643 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 644 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 645 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 646 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 647 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 648 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 649 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 650 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 651 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 652 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 653 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 654 dst0[chan_index] = tmp0; 655 } 656 break; 657 658 case TGSI_OPCODE_DST: 659 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 660 dst0[CHAN_X] = bld->base.one; 661 } 662 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 663 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 664 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 665 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 666 } 667 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 668 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 669 } 670 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 671 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 672 } 673 break; 674 675 case TGSI_OPCODE_MIN: 676 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 677 src0 = emit_fetch( bld, inst, 0, chan_index ); 678 src1 = emit_fetch( bld, inst, 1, chan_index ); 679 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 680 } 681 break; 682 683 case TGSI_OPCODE_MAX: 684 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 685 src0 = emit_fetch( bld, inst, 0, chan_index ); 686 src1 = emit_fetch( bld, inst, 1, chan_index ); 687 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 688 } 689 break; 690 691 case TGSI_OPCODE_SLT: 692 /* TGSI_OPCODE_SETLT */ 693 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 694 src0 = emit_fetch( bld, inst, 0, chan_index ); 695 src1 = emit_fetch( bld, inst, 1, chan_index ); 696 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 697 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 698 } 699 break; 700 701 case TGSI_OPCODE_SGE: 702 /* TGSI_OPCODE_SETGE */ 703 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 704 src0 = emit_fetch( bld, inst, 0, chan_index ); 705 src1 = emit_fetch( bld, inst, 1, chan_index ); 706 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 707 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 708 } 709 break; 710 711 case TGSI_OPCODE_MAD: 712 /* TGSI_OPCODE_MADD */ 713 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 714 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 715 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 716 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 717 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 718 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 719 dst0[chan_index] = tmp0; 720 } 721 break; 722 723 case TGSI_OPCODE_SUB: 724 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 725 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 726 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 727 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 728 } 729 break; 730 731 case TGSI_OPCODE_LRP: 732 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 733 src0 = emit_fetch( bld, inst, 0, chan_index ); 734 src1 = emit_fetch( bld, inst, 1, chan_index ); 735 src2 = emit_fetch( bld, inst, 2, chan_index ); 736 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 737 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 738 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 739 } 740 break; 741 742 case TGSI_OPCODE_CND: 743 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 744 src0 = emit_fetch( bld, inst, 0, chan_index ); 745 src1 = emit_fetch( bld, inst, 1, chan_index ); 746 src2 = emit_fetch( bld, inst, 2, chan_index ); 747 tmp1 = lp_build_const_scalar(bld->base.type, 0.5); 748 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 749 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 750 } 751 break; 752 753 case TGSI_OPCODE_DP2A: 754 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 755 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 756 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 757 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 758 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 759 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 760 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 761 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 762 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 763 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 764 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 765 } 766 break; 767 768 case TGSI_OPCODE_FRC: 769 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 770 src0 = emit_fetch( bld, inst, 0, chan_index ); 771 tmp0 = lp_build_floor(&bld->base, src0); 772 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 773 dst0[chan_index] = tmp0; 774 } 775 break; 776 777 case TGSI_OPCODE_CLAMP: 778 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 779 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 780 src1 = emit_fetch( bld, inst, 1, chan_index ); 781 src2 = emit_fetch( bld, inst, 2, chan_index ); 782 tmp0 = lp_build_max(&bld->base, tmp0, src1); 783 tmp0 = lp_build_min(&bld->base, tmp0, src2); 784 dst0[chan_index] = tmp0; 785 } 786 break; 787 788 case TGSI_OPCODE_FLR: 789 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 790 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 791 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 792 } 793 break; 794 795 case TGSI_OPCODE_ROUND: 796 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 797 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 798 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 799 } 800 break; 801 802 case TGSI_OPCODE_EX2: { 803 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 804 tmp0 = lp_build_exp2( &bld->base, tmp0); 805 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 806 dst0[chan_index] = tmp0; 807 } 808 break; 809 } 810 811 case TGSI_OPCODE_LG2: 812 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 813 tmp0 = lp_build_log2( &bld->base, tmp0); 814 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 815 dst0[chan_index] = tmp0; 816 } 817 break; 818 819 case TGSI_OPCODE_POW: 820 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 821 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 822 res = lp_build_pow( &bld->base, src0, src1 ); 823 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 824 dst0[chan_index] = res; 825 } 826 break; 827 828 case TGSI_OPCODE_XPD: 829 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 830 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 831 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 832 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 833 } 834 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 835 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 836 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 837 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 838 } 839 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 840 tmp2 = tmp0; 841 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 842 tmp5 = tmp3; 843 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 844 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 845 dst0[CHAN_X] = tmp2; 846 } 847 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 848 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 849 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 850 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 851 } 852 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 853 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 854 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 855 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 856 dst0[CHAN_Y] = tmp3; 857 } 858 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 859 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 860 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 861 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 862 dst0[CHAN_Z] = tmp5; 863 } 864 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 865 dst0[CHAN_W] = bld->base.one; 866 } 867 break; 868 869 case TGSI_OPCODE_ABS: 870 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 871 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 872 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 873 } 874 break; 875 876 case TGSI_OPCODE_RCC: 877 /* deprecated? */ 878 assert(0); 879 return 0; 880 881 case TGSI_OPCODE_DPH: 882 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 883 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 884 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 885 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 886 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 887 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 888 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 889 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 890 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 891 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 892 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 893 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 894 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 895 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 896 dst0[chan_index] = tmp0; 897 } 898 break; 899 900 case TGSI_OPCODE_COS: 901 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 902 tmp0 = lp_build_cos( &bld->base, tmp0 ); 903 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 904 dst0[chan_index] = tmp0; 905 } 906 break; 907 908 case TGSI_OPCODE_DDX: 909 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 910 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 911 } 912 break; 913 914 case TGSI_OPCODE_DDY: 915 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 916 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 917 } 918 break; 919 920 case TGSI_OPCODE_KILP: 921 /* predicated kill */ 922 /* FIXME */ 923 return 0; 924 break; 925 926 case TGSI_OPCODE_KIL: 927 /* conditional kill */ 928 emit_kil( bld, inst ); 929 break; 930 931 case TGSI_OPCODE_PK2H: 932 return 0; 933 break; 934 935 case TGSI_OPCODE_PK2US: 936 return 0; 937 break; 938 939 case TGSI_OPCODE_PK4B: 940 return 0; 941 break; 942 943 case TGSI_OPCODE_PK4UB: 944 return 0; 945 break; 946 947 case TGSI_OPCODE_RFL: 948 return 0; 949 break; 950 951 case TGSI_OPCODE_SEQ: 952 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 953 src0 = emit_fetch( bld, inst, 0, chan_index ); 954 src1 = emit_fetch( bld, inst, 1, chan_index ); 955 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 956 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 957 } 958 break; 959 960 case TGSI_OPCODE_SFL: 961 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 962 dst0[chan_index] = bld->base.zero; 963 } 964 break; 965 966 case TGSI_OPCODE_SGT: 967 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 968 src0 = emit_fetch( bld, inst, 0, chan_index ); 969 src1 = emit_fetch( bld, inst, 1, chan_index ); 970 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 971 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 972 } 973 break; 974 975 case TGSI_OPCODE_SIN: 976 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 977 tmp0 = lp_build_sin( &bld->base, tmp0 ); 978 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 979 dst0[chan_index] = tmp0; 980 } 981 break; 982 983 case TGSI_OPCODE_SLE: 984 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 985 src0 = emit_fetch( bld, inst, 0, chan_index ); 986 src1 = emit_fetch( bld, inst, 1, chan_index ); 987 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 988 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 989 } 990 break; 991 992 case TGSI_OPCODE_SNE: 993 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 994 src0 = emit_fetch( bld, inst, 0, chan_index ); 995 src1 = emit_fetch( bld, inst, 1, chan_index ); 996 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 997 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 998 } 999 break; 1000 1001 case TGSI_OPCODE_STR: 1002 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1003 dst0[chan_index] = bld->base.one; 1004 } 1005 break; 1006 1007 case TGSI_OPCODE_TEX: 1008 emit_tex( bld, inst, FALSE, FALSE, dst0 ); 1009 break; 1010 1011 case TGSI_OPCODE_TXD: 1012 /* FIXME */ 1013 return 0; 1014 break; 1015 1016 case TGSI_OPCODE_UP2H: 1017 /* deprecated */ 1018 assert (0); 1019 return 0; 1020 break; 1021 1022 case TGSI_OPCODE_UP2US: 1023 /* deprecated */ 1024 assert(0); 1025 return 0; 1026 break; 1027 1028 case TGSI_OPCODE_UP4B: 1029 /* deprecated */ 1030 assert(0); 1031 return 0; 1032 break; 1033 1034 case TGSI_OPCODE_UP4UB: 1035 /* deprecated */ 1036 assert(0); 1037 return 0; 1038 break; 1039 1040 case TGSI_OPCODE_X2D: 1041 /* deprecated? */ 1042 assert(0); 1043 return 0; 1044 break; 1045 1046 case TGSI_OPCODE_ARA: 1047 /* deprecated */ 1048 assert(0); 1049 return 0; 1050 break; 1051 1052#if 0 1053 case TGSI_OPCODE_ARR: 1054 /* FIXME */ 1055 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1056 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1057 emit_rnd( bld, 0, 0 ); 1058 emit_f2it( bld, 0 ); 1059 dst0[chan_index] = tmp0; 1060 } 1061 break; 1062#endif 1063 1064 case TGSI_OPCODE_BRA: 1065 /* deprecated */ 1066 assert(0); 1067 return 0; 1068 break; 1069 1070 case TGSI_OPCODE_CAL: 1071 /* FIXME */ 1072 return 0; 1073 break; 1074 1075 case TGSI_OPCODE_RET: 1076 /* FIXME */ 1077 return 0; 1078 break; 1079 1080 case TGSI_OPCODE_END: 1081 break; 1082 1083 case TGSI_OPCODE_SSG: 1084 /* TGSI_OPCODE_SGN */ 1085 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1086 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1087 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 1088 } 1089 break; 1090 1091 case TGSI_OPCODE_CMP: 1092 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1093 src0 = emit_fetch( bld, inst, 0, chan_index ); 1094 src1 = emit_fetch( bld, inst, 1, chan_index ); 1095 src2 = emit_fetch( bld, inst, 2, chan_index ); 1096 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 1097 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 1098 } 1099 break; 1100 1101 case TGSI_OPCODE_SCS: 1102 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1103 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1104 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 1105 } 1106 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1107 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1108 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 1109 } 1110 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1111 dst0[CHAN_Z] = bld->base.zero; 1112 } 1113 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1114 dst0[CHAN_W] = bld->base.one; 1115 } 1116 break; 1117 1118 case TGSI_OPCODE_TXB: 1119 emit_tex( bld, inst, TRUE, FALSE, dst0 ); 1120 break; 1121 1122 case TGSI_OPCODE_NRM: 1123 /* fall-through */ 1124 case TGSI_OPCODE_NRM4: 1125 /* 3 or 4-component normalization */ 1126 { 1127 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1128 1129 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 1130 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 1131 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 1132 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 1133 1134 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1135 1136 /* xmm4 = src.x */ 1137 /* xmm0 = src.x * src.x */ 1138 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1139 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1140 tmp4 = tmp0; 1141 } 1142 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 1143 1144 /* xmm5 = src.y */ 1145 /* xmm0 = xmm0 + src.y * src.y */ 1146 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 1147 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1148 tmp5 = tmp1; 1149 } 1150 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1151 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1152 1153 /* xmm6 = src.z */ 1154 /* xmm0 = xmm0 + src.z * src.z */ 1155 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 1156 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1157 tmp6 = tmp1; 1158 } 1159 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1160 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1161 1162 if (dims == 4) { 1163 /* xmm7 = src.w */ 1164 /* xmm0 = xmm0 + src.w * src.w */ 1165 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 1166 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 1167 tmp7 = tmp1; 1168 } 1169 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1170 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1171 } 1172 1173 /* xmm1 = 1 / sqrt(xmm0) */ 1174 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 1175 1176 /* dst.x = xmm1 * src.x */ 1177 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1178 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 1179 } 1180 1181 /* dst.y = xmm1 * src.y */ 1182 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1183 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 1184 } 1185 1186 /* dst.z = xmm1 * src.z */ 1187 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1188 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 1189 } 1190 1191 /* dst.w = xmm1 * src.w */ 1192 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 1193 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 1194 } 1195 } 1196 1197 /* dst.w = 1.0 */ 1198 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 1199 dst0[CHAN_W] = bld->base.one; 1200 } 1201 } 1202 break; 1203 1204 case TGSI_OPCODE_DIV: 1205 /* deprecated */ 1206 assert( 0 ); 1207 return 0; 1208 break; 1209 1210 case TGSI_OPCODE_DP2: 1211 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1212 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1213 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1214 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1215 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1216 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1217 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1218 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1219 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1220 } 1221 break; 1222 1223 case TGSI_OPCODE_TXL: 1224 emit_tex( bld, inst, TRUE, FALSE, dst0 ); 1225 break; 1226 1227 case TGSI_OPCODE_TXP: 1228 emit_tex( bld, inst, FALSE, TRUE, dst0 ); 1229 break; 1230 1231 case TGSI_OPCODE_BRK: 1232 /* FIXME */ 1233 return 0; 1234 break; 1235 1236 case TGSI_OPCODE_IF: 1237 /* FIXME */ 1238 return 0; 1239 break; 1240 1241 case TGSI_OPCODE_BGNFOR: 1242 /* deprecated */ 1243 assert(0); 1244 return 0; 1245 break; 1246 1247 case TGSI_OPCODE_REP: 1248 /* deprecated */ 1249 assert(0); 1250 return 0; 1251 break; 1252 1253 case TGSI_OPCODE_ELSE: 1254 /* FIXME */ 1255 return 0; 1256 break; 1257 1258 case TGSI_OPCODE_ENDIF: 1259 /* FIXME */ 1260 return 0; 1261 break; 1262 1263 case TGSI_OPCODE_ENDFOR: 1264 /* deprecated */ 1265 assert(0); 1266 return 0; 1267 break; 1268 1269 case TGSI_OPCODE_ENDREP: 1270 /* deprecated */ 1271 assert(0); 1272 return 0; 1273 break; 1274 1275 case TGSI_OPCODE_PUSHA: 1276 /* deprecated? */ 1277 assert(0); 1278 return 0; 1279 break; 1280 1281 case TGSI_OPCODE_POPA: 1282 /* deprecated? */ 1283 assert(0); 1284 return 0; 1285 break; 1286 1287 case TGSI_OPCODE_CEIL: 1288 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1289 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1290 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 1291 } 1292 break; 1293 1294 case TGSI_OPCODE_I2F: 1295 /* deprecated? */ 1296 assert(0); 1297 return 0; 1298 break; 1299 1300 case TGSI_OPCODE_NOT: 1301 /* deprecated? */ 1302 assert(0); 1303 return 0; 1304 break; 1305 1306 case TGSI_OPCODE_TRUNC: 1307 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1308 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1309 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 1310 } 1311 break; 1312 1313 case TGSI_OPCODE_SHL: 1314 /* deprecated? */ 1315 assert(0); 1316 return 0; 1317 break; 1318 1319 case TGSI_OPCODE_ISHR: 1320 /* deprecated? */ 1321 assert(0); 1322 return 0; 1323 break; 1324 1325 case TGSI_OPCODE_AND: 1326 /* deprecated? */ 1327 assert(0); 1328 return 0; 1329 break; 1330 1331 case TGSI_OPCODE_OR: 1332 /* deprecated? */ 1333 assert(0); 1334 return 0; 1335 break; 1336 1337 case TGSI_OPCODE_MOD: 1338 /* deprecated? */ 1339 assert(0); 1340 return 0; 1341 break; 1342 1343 case TGSI_OPCODE_XOR: 1344 /* deprecated? */ 1345 assert(0); 1346 return 0; 1347 break; 1348 1349 case TGSI_OPCODE_SAD: 1350 /* deprecated? */ 1351 assert(0); 1352 return 0; 1353 break; 1354 1355 case TGSI_OPCODE_TXF: 1356 /* deprecated? */ 1357 assert(0); 1358 return 0; 1359 break; 1360 1361 case TGSI_OPCODE_TXQ: 1362 /* deprecated? */ 1363 assert(0); 1364 return 0; 1365 break; 1366 1367 case TGSI_OPCODE_CONT: 1368 /* deprecated? */ 1369 assert(0); 1370 return 0; 1371 break; 1372 1373 case TGSI_OPCODE_EMIT: 1374 return 0; 1375 break; 1376 1377 case TGSI_OPCODE_ENDPRIM: 1378 return 0; 1379 break; 1380 1381 case TGSI_OPCODE_NOP: 1382 break; 1383 1384 default: 1385 return 0; 1386 } 1387 1388 if(info->num_dst) { 1389 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1390 emit_store( bld, inst, 0, chan_index, dst0[chan_index]); 1391 } 1392 } 1393 1394 return 1; 1395} 1396 1397 1398void 1399lp_build_tgsi_soa(LLVMBuilderRef builder, 1400 const struct tgsi_token *tokens, 1401 struct lp_type type, 1402 struct lp_build_mask_context *mask, 1403 LLVMValueRef consts_ptr, 1404 const LLVMValueRef *pos, 1405 const LLVMValueRef (*inputs)[NUM_CHANNELS], 1406 LLVMValueRef (*outputs)[NUM_CHANNELS], 1407 struct lp_build_sampler_soa *sampler) 1408{ 1409 struct lp_build_tgsi_soa_context bld; 1410 struct tgsi_parse_context parse; 1411 uint num_immediates = 0; 1412 unsigned i; 1413 1414 /* Setup build context */ 1415 memset(&bld, 0, sizeof bld); 1416 lp_build_context_init(&bld.base, builder, type); 1417 bld.mask = mask; 1418 bld.pos = pos; 1419 bld.inputs = inputs; 1420 bld.outputs = outputs; 1421 bld.consts_ptr = consts_ptr; 1422 bld.sampler = sampler; 1423 1424 tgsi_parse_init( &parse, tokens ); 1425 1426 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1427 tgsi_parse_token( &parse ); 1428 1429 switch( parse.FullToken.Token.Type ) { 1430 case TGSI_TOKEN_TYPE_DECLARATION: 1431 /* Inputs already interpolated */ 1432 break; 1433 1434 case TGSI_TOKEN_TYPE_INSTRUCTION: 1435 { 1436 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 1437 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode); 1438 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, info )) 1439 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 1440 info ? info->mnemonic : "<invalid>"); 1441 } 1442 1443 break; 1444 1445 case TGSI_TOKEN_TYPE_IMMEDIATE: 1446 /* simply copy the immediate values into the next immediates[] slot */ 1447 { 1448 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1449 assert(size <= 4); 1450 assert(num_immediates < LP_MAX_IMMEDIATES); 1451 for( i = 0; i < size; ++i ) 1452 bld.immediates[num_immediates][i] = 1453 lp_build_const_scalar(type, parse.FullToken.FullImmediate.u[i].Float); 1454 for( i = size; i < 4; ++i ) 1455 bld.immediates[num_immediates][i] = bld.base.undef; 1456 num_immediates++; 1457 } 1458 break; 1459 1460 default: 1461 assert( 0 ); 1462 } 1463 } 1464 1465 tgsi_parse_free( &parse ); 1466} 1467 1468