lp_bld_tgsi_soa.c revision 54b94ee96a6d750d57d99ae9819fcf8206d4680d
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_exec.h" 49#include "tgsi/tgsi_scan.h" 50#include "lp_bld_type.h" 51#include "lp_bld_const.h" 52#include "lp_bld_arit.h" 53#include "lp_bld_logic.h" 54#include "lp_bld_swizzle.h" 55#include "lp_bld_flow.h" 56#include "lp_bld_tgsi.h" 57#include "lp_bld_limits.h" 58#include "lp_bld_debug.h" 59 60 61#define FOR_EACH_CHANNEL( CHAN )\ 62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 63 64#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 66 67#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 69 70#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 71 FOR_EACH_CHANNEL( CHAN )\ 72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 73 74#define CHAN_X 0 75#define CHAN_Y 1 76#define CHAN_Z 2 77#define CHAN_W 3 78 79#define QUAD_TOP_LEFT 0 80#define QUAD_TOP_RIGHT 1 81#define QUAD_BOTTOM_LEFT 2 82#define QUAD_BOTTOM_RIGHT 3 83 84 85struct lp_exec_mask { 86 struct lp_build_context *bld; 87 88 boolean has_mask; 89 90 LLVMTypeRef int_vec_type; 91 92 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 93 int cond_stack_size; 94 LLVMValueRef cond_mask; 95 96 LLVMBasicBlockRef loop_block; 97 LLVMValueRef cont_mask; 98 LLVMValueRef break_mask; 99 LLVMValueRef break_var; 100 struct { 101 LLVMBasicBlockRef loop_block; 102 LLVMValueRef cont_mask; 103 LLVMValueRef break_mask; 104 LLVMValueRef break_var; 105 } loop_stack[LP_MAX_TGSI_NESTING]; 106 int loop_stack_size; 107 108 LLVMValueRef exec_mask; 109}; 110 111struct lp_build_tgsi_soa_context 112{ 113 struct lp_build_context base; 114 115 /* Builder for integer masks and indices */ 116 struct lp_build_context int_bld; 117 118 LLVMValueRef consts_ptr; 119 const LLVMValueRef *pos; 120 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 121 LLVMValueRef (*outputs)[NUM_CHANNELS]; 122 123 struct lp_build_sampler_soa *sampler; 124 125 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 126 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 127 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 128 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; 129 130 /* we allocate an array of temps if we have indirect 131 * addressing and then the temps above is unused */ 132 LLVMValueRef temps_array; 133 boolean has_indirect_addressing; 134 135 struct lp_build_mask_context *mask; 136 struct lp_exec_mask exec_mask; 137}; 138 139static const unsigned char 140swizzle_left[4] = { 141 QUAD_TOP_LEFT, QUAD_TOP_LEFT, 142 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT 143}; 144 145static const unsigned char 146swizzle_right[4] = { 147 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, 148 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT 149}; 150 151static const unsigned char 152swizzle_top[4] = { 153 QUAD_TOP_LEFT, QUAD_TOP_RIGHT, 154 QUAD_TOP_LEFT, QUAD_TOP_RIGHT 155}; 156 157static const unsigned char 158swizzle_bottom[4] = { 159 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, 160 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT 161}; 162 163static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 164{ 165 mask->bld = bld; 166 mask->has_mask = FALSE; 167 mask->cond_stack_size = 0; 168 mask->loop_stack_size = 0; 169 170 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); 171 mask->break_mask = mask->cont_mask = mask->cond_mask = 172 LLVMConstAllOnes(mask->int_vec_type); 173} 174 175static void lp_exec_mask_update(struct lp_exec_mask *mask) 176{ 177 if (mask->loop_stack_size) { 178 /*for loops we need to update the entire mask at runtime */ 179 LLVMValueRef tmp; 180 assert(mask->break_mask); 181 tmp = LLVMBuildAnd(mask->bld->builder, 182 mask->cont_mask, 183 mask->break_mask, 184 "maskcb"); 185 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 186 mask->cond_mask, 187 tmp, 188 "maskfull"); 189 } else 190 mask->exec_mask = mask->cond_mask; 191 192 193 mask->has_mask = (mask->cond_stack_size > 0 || 194 mask->loop_stack_size > 0); 195} 196 197static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 198 LLVMValueRef val) 199{ 200 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 201 if (mask->cond_stack_size == 0) { 202 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 203 } 204 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 205 assert(LLVMTypeOf(val) == mask->int_vec_type); 206 mask->cond_mask = val; 207 208 lp_exec_mask_update(mask); 209} 210 211static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 212{ 213 LLVMValueRef prev_mask; 214 LLVMValueRef inv_mask; 215 216 assert(mask->cond_stack_size); 217 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 218 if (mask->cond_stack_size == 1) { 219 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 220 } 221 222 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, ""); 223 224 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 225 inv_mask, 226 prev_mask, ""); 227 lp_exec_mask_update(mask); 228} 229 230static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 231{ 232 assert(mask->cond_stack_size); 233 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 234 lp_exec_mask_update(mask); 235} 236 237static void lp_exec_bgnloop(struct lp_exec_mask *mask) 238{ 239 if (mask->loop_stack_size == 0) { 240 assert(mask->loop_block == NULL); 241 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 242 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 243 assert(mask->break_var == NULL); 244 } 245 246 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 247 248 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 249 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 250 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 251 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 252 ++mask->loop_stack_size; 253 254 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, ""); 255 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 256 257 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); 258 LLVMBuildBr(mask->bld->builder, mask->loop_block); 259 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); 260 261 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, ""); 262 263 lp_exec_mask_update(mask); 264} 265 266static void lp_exec_break(struct lp_exec_mask *mask) 267{ 268 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 269 mask->exec_mask, 270 "break"); 271 272 mask->break_mask = LLVMBuildAnd(mask->bld->builder, 273 mask->break_mask, 274 exec_mask, "break_full"); 275 276 lp_exec_mask_update(mask); 277} 278 279static void lp_exec_continue(struct lp_exec_mask *mask) 280{ 281 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 282 mask->exec_mask, 283 ""); 284 285 mask->cont_mask = LLVMBuildAnd(mask->bld->builder, 286 mask->cont_mask, 287 exec_mask, ""); 288 289 lp_exec_mask_update(mask); 290} 291 292 293static void lp_exec_endloop(struct lp_exec_mask *mask) 294{ 295 LLVMBasicBlockRef endloop; 296 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* 297 mask->bld->type.length); 298 LLVMValueRef i1cond; 299 300 assert(mask->break_mask); 301 302 /* 303 * Restore the cont_mask, but don't pop 304 */ 305 assert(mask->loop_stack_size); 306 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 307 lp_exec_mask_update(mask); 308 309 /* 310 * Unlike the continue mask, the break_mask must be preserved across loop 311 * iterations 312 */ 313 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 314 315 /* i1cond = (mask == 0) */ 316 i1cond = LLVMBuildICmp( 317 mask->bld->builder, 318 LLVMIntNE, 319 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""), 320 LLVMConstNull(reg_type), ""); 321 322 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); 323 324 LLVMBuildCondBr(mask->bld->builder, 325 i1cond, mask->loop_block, endloop); 326 327 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); 328 329 assert(mask->loop_stack_size); 330 --mask->loop_stack_size; 331 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 332 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 333 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 334 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 335 336 lp_exec_mask_update(mask); 337} 338 339/* stores val into an address pointed to by dst. 340 * mask->exec_mask is used to figure out which bits of val 341 * should be stored into the address 342 * (0 means don't store this bit, 1 means do store). 343 */ 344static void lp_exec_mask_store(struct lp_exec_mask *mask, 345 LLVMValueRef pred, 346 LLVMValueRef val, 347 LLVMValueRef dst) 348{ 349 /* Mix the predicate and execution mask */ 350 if (mask->has_mask) { 351 if (pred) { 352 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); 353 } else { 354 pred = mask->exec_mask; 355 } 356 } 357 358 if (pred) { 359 LLVMValueRef real_val, dst_val; 360 361 dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); 362 real_val = lp_build_select(mask->bld, 363 pred, 364 val, dst_val); 365 366 LLVMBuildStore(mask->bld->builder, real_val, dst); 367 } else 368 LLVMBuildStore(mask->bld->builder, val, dst); 369} 370 371 372static LLVMValueRef 373emit_ddx(struct lp_build_tgsi_soa_context *bld, 374 LLVMValueRef src) 375{ 376 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); 377 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); 378 return lp_build_sub(&bld->base, src_right, src_left); 379} 380 381 382static LLVMValueRef 383emit_ddy(struct lp_build_tgsi_soa_context *bld, 384 LLVMValueRef src) 385{ 386 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); 387 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); 388 return lp_build_sub(&bld->base, src_top, src_bottom); 389} 390 391static LLVMValueRef 392get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 393 unsigned index, 394 unsigned swizzle, 395 boolean is_indirect, 396 LLVMValueRef addr) 397{ 398 if (!bld->has_indirect_addressing) { 399 return bld->temps[index][swizzle]; 400 } else { 401 LLVMValueRef lindex = 402 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0); 403 if (is_indirect) 404 lindex = lp_build_add(&bld->base, lindex, addr); 405 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); 406 } 407} 408 409/** 410 * Register fetch. 411 */ 412static LLVMValueRef 413emit_fetch( 414 struct lp_build_tgsi_soa_context *bld, 415 const struct tgsi_full_instruction *inst, 416 unsigned index, 417 const unsigned chan_index ) 418{ 419 const struct tgsi_full_src_register *reg = &inst->Src[index]; 420 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 421 LLVMValueRef res; 422 LLVMValueRef addr; 423 424 switch (swizzle) { 425 case TGSI_SWIZZLE_X: 426 case TGSI_SWIZZLE_Y: 427 case TGSI_SWIZZLE_Z: 428 case TGSI_SWIZZLE_W: 429 430 if (reg->Register.Indirect) { 431 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); 432 unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); 433 addr = LLVMBuildLoad(bld->base.builder, 434 bld->addr[reg->Indirect.Index][swizzle], 435 ""); 436 /* for indexing we want integers */ 437 addr = LLVMBuildFPToSI(bld->base.builder, addr, 438 int_vec_type, ""); 439 addr = LLVMBuildExtractElement(bld->base.builder, 440 addr, LLVMConstInt(LLVMInt32Type(), 0, 0), 441 ""); 442 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); 443 } 444 445 switch (reg->Register.File) { 446 case TGSI_FILE_CONSTANT: { 447 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); 448 LLVMValueRef scalar, scalar_ptr; 449 450 if (reg->Register.Indirect) { 451 /*lp_build_printf(bld->base.builder, 452 "\taddr = %d\n", addr);*/ 453 index = lp_build_add(&bld->base, index, addr); 454 } 455 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); 456 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 457 458 res = lp_build_broadcast_scalar(&bld->base, scalar); 459 break; 460 } 461 462 case TGSI_FILE_IMMEDIATE: 463 res = bld->immediates[reg->Register.Index][swizzle]; 464 assert(res); 465 break; 466 467 case TGSI_FILE_INPUT: 468 res = bld->inputs[reg->Register.Index][swizzle]; 469 assert(res); 470 break; 471 472 case TGSI_FILE_TEMPORARY: { 473 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 474 swizzle, 475 reg->Register.Indirect, 476 addr); 477 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 478 if(!res) 479 return bld->base.undef; 480 break; 481 } 482 483 default: 484 assert( 0 ); 485 return bld->base.undef; 486 } 487 break; 488 489 default: 490 assert( 0 ); 491 return bld->base.undef; 492 } 493 494 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 495 case TGSI_UTIL_SIGN_CLEAR: 496 res = lp_build_abs( &bld->base, res ); 497 break; 498 499 case TGSI_UTIL_SIGN_SET: 500 /* TODO: Use bitwese OR for floating point */ 501 res = lp_build_abs( &bld->base, res ); 502 res = LLVMBuildNeg( bld->base.builder, res, "" ); 503 break; 504 505 case TGSI_UTIL_SIGN_TOGGLE: 506 res = LLVMBuildNeg( bld->base.builder, res, "" ); 507 break; 508 509 case TGSI_UTIL_SIGN_KEEP: 510 break; 511 } 512 513 return res; 514} 515 516 517/** 518 * Register fetch with derivatives. 519 */ 520static void 521emit_fetch_deriv( 522 struct lp_build_tgsi_soa_context *bld, 523 const struct tgsi_full_instruction *inst, 524 unsigned index, 525 const unsigned chan_index, 526 LLVMValueRef *res, 527 LLVMValueRef *ddx, 528 LLVMValueRef *ddy) 529{ 530 LLVMValueRef src; 531 532 src = emit_fetch(bld, inst, index, chan_index); 533 534 if(res) 535 *res = src; 536 537 /* TODO: use interpolation coeffs for inputs */ 538 539 if(ddx) 540 *ddx = emit_ddx(bld, src); 541 542 if(ddy) 543 *ddy = emit_ddy(bld, src); 544} 545 546 547/** 548 * Predicate. 549 */ 550static void 551emit_fetch_predicate( 552 struct lp_build_tgsi_soa_context *bld, 553 const struct tgsi_full_instruction *inst, 554 LLVMValueRef *pred) 555{ 556 unsigned index; 557 unsigned char swizzles[4]; 558 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 559 LLVMValueRef value; 560 unsigned chan; 561 562 if (!inst->Instruction.Predicate) { 563 FOR_EACH_CHANNEL( chan ) { 564 pred[chan] = NULL; 565 } 566 return; 567 } 568 569 swizzles[0] = inst->Predicate.SwizzleX; 570 swizzles[1] = inst->Predicate.SwizzleY; 571 swizzles[2] = inst->Predicate.SwizzleZ; 572 swizzles[3] = inst->Predicate.SwizzleW; 573 574 index = inst->Predicate.Index; 575 assert(index < LP_MAX_TGSI_PREDS); 576 577 FOR_EACH_CHANNEL( chan ) { 578 unsigned swizzle = swizzles[chan]; 579 580 /* 581 * Only fetch the predicate register channels that are actually listed 582 * in the swizzles 583 */ 584 if (!unswizzled[swizzle]) { 585 value = LLVMBuildLoad(bld->base.builder, 586 bld->preds[index][swizzle], ""); 587 588 /* 589 * Convert the value to an integer mask. 590 * 591 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 592 * is needlessly causing two comparisons due to storing the intermediate 593 * result as float vector instead of an integer mask vector. 594 */ 595 value = lp_build_compare(bld->base.builder, 596 bld->base.type, 597 PIPE_FUNC_NOTEQUAL, 598 value, 599 bld->base.zero); 600 if (inst->Predicate.Negate) { 601 value = LLVMBuildNot(bld->base.builder, value, ""); 602 } 603 604 unswizzled[swizzle] = value; 605 } else { 606 value = unswizzled[swizzle]; 607 } 608 609 pred[chan] = value; 610 } 611} 612 613 614/** 615 * Register store. 616 */ 617static void 618emit_store( 619 struct lp_build_tgsi_soa_context *bld, 620 const struct tgsi_full_instruction *inst, 621 unsigned index, 622 unsigned chan_index, 623 LLVMValueRef pred, 624 LLVMValueRef value) 625{ 626 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 627 LLVMValueRef addr; 628 629 switch( inst->Instruction.Saturate ) { 630 case TGSI_SAT_NONE: 631 break; 632 633 case TGSI_SAT_ZERO_ONE: 634 value = lp_build_max(&bld->base, value, bld->base.zero); 635 value = lp_build_min(&bld->base, value, bld->base.one); 636 break; 637 638 case TGSI_SAT_MINUS_PLUS_ONE: 639 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); 640 value = lp_build_min(&bld->base, value, bld->base.one); 641 break; 642 643 default: 644 assert(0); 645 } 646 647 if (reg->Register.Indirect) { 648 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); 649 unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); 650 addr = LLVMBuildLoad(bld->base.builder, 651 bld->addr[reg->Indirect.Index][swizzle], 652 ""); 653 /* for indexing we want integers */ 654 addr = LLVMBuildFPToSI(bld->base.builder, addr, 655 int_vec_type, ""); 656 addr = LLVMBuildExtractElement(bld->base.builder, 657 addr, LLVMConstInt(LLVMInt32Type(), 0, 0), 658 ""); 659 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); 660 } 661 662 switch( reg->Register.File ) { 663 case TGSI_FILE_OUTPUT: 664 lp_exec_mask_store(&bld->exec_mask, pred, value, 665 bld->outputs[reg->Register.Index][chan_index]); 666 break; 667 668 case TGSI_FILE_TEMPORARY: { 669 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 670 chan_index, 671 reg->Register.Indirect, 672 addr); 673 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); 674 break; 675 } 676 677 case TGSI_FILE_ADDRESS: 678 lp_exec_mask_store(&bld->exec_mask, pred, value, 679 bld->addr[reg->Indirect.Index][chan_index]); 680 break; 681 682 case TGSI_FILE_PREDICATE: 683 lp_exec_mask_store(&bld->exec_mask, pred, value, 684 bld->preds[index][chan_index]); 685 break; 686 687 default: 688 assert( 0 ); 689 } 690} 691 692 693/** 694 * High-level instruction translators. 695 */ 696 697enum tex_modifier { 698 TEX_MODIFIER_NONE = 0, 699 TEX_MODIFIER_PROJECTED, 700 TEX_MODIFIER_LOD_BIAS, 701 TEX_MODIFIER_EXPLICIT_LOD, 702 TEX_MODIFIER_EXPLICIT_DERIV 703}; 704 705static void 706emit_tex( struct lp_build_tgsi_soa_context *bld, 707 const struct tgsi_full_instruction *inst, 708 enum tex_modifier modifier, 709 LLVMValueRef *texel) 710{ 711 unsigned unit; 712 LLVMValueRef lod_bias, explicit_lod; 713 LLVMValueRef oow = NULL; 714 LLVMValueRef coords[3]; 715 LLVMValueRef ddx[3]; 716 LLVMValueRef ddy[3]; 717 unsigned num_coords; 718 unsigned i; 719 720 if (!bld->sampler) { 721 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 722 for (i = 0; i < 4; i++) { 723 texel[i] = bld->base.undef; 724 } 725 return; 726 } 727 728 switch (inst->Texture.Texture) { 729 case TGSI_TEXTURE_1D: 730 num_coords = 1; 731 break; 732 case TGSI_TEXTURE_2D: 733 case TGSI_TEXTURE_RECT: 734 num_coords = 2; 735 break; 736 case TGSI_TEXTURE_SHADOW1D: 737 case TGSI_TEXTURE_SHADOW2D: 738 case TGSI_TEXTURE_SHADOWRECT: 739 case TGSI_TEXTURE_3D: 740 case TGSI_TEXTURE_CUBE: 741 num_coords = 3; 742 break; 743 default: 744 assert(0); 745 return; 746 } 747 748 if (modifier == TEX_MODIFIER_LOD_BIAS) { 749 lod_bias = emit_fetch( bld, inst, 0, 3 ); 750 explicit_lod = NULL; 751 } 752 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 753 lod_bias = NULL; 754 explicit_lod = emit_fetch( bld, inst, 0, 3 ); 755 } 756 else { 757 lod_bias = NULL; 758 explicit_lod = NULL; 759 } 760 761 if (modifier == TEX_MODIFIER_PROJECTED) { 762 oow = emit_fetch( bld, inst, 0, 3 ); 763 oow = lp_build_rcp(&bld->base, oow); 764 } 765 766 for (i = 0; i < num_coords; i++) { 767 coords[i] = emit_fetch( bld, inst, 0, i ); 768 if (modifier == TEX_MODIFIER_PROJECTED) 769 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 770 } 771 for (i = num_coords; i < 3; i++) { 772 coords[i] = bld->base.undef; 773 } 774 775 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) { 776 for (i = 0; i < num_coords; i++) { 777 ddx[i] = emit_fetch( bld, inst, 1, i ); 778 ddy[i] = emit_fetch( bld, inst, 2, i ); 779 } 780 unit = inst->Src[3].Register.Index; 781 } else { 782 for (i = 0; i < num_coords; i++) { 783 ddx[i] = emit_ddx( bld, coords[i] ); 784 ddy[i] = emit_ddy( bld, coords[i] ); 785 } 786 unit = inst->Src[1].Register.Index; 787 } 788 for (i = num_coords; i < 3; i++) { 789 ddx[i] = bld->base.undef; 790 ddy[i] = bld->base.undef; 791 } 792 793 bld->sampler->emit_fetch_texel(bld->sampler, 794 bld->base.builder, 795 bld->base.type, 796 unit, num_coords, coords, 797 ddx, ddy, 798 lod_bias, explicit_lod, 799 texel); 800} 801 802 803/** 804 * Kill fragment if any of the src register values are negative. 805 */ 806static void 807emit_kil( 808 struct lp_build_tgsi_soa_context *bld, 809 const struct tgsi_full_instruction *inst ) 810{ 811 const struct tgsi_full_src_register *reg = &inst->Src[0]; 812 LLVMValueRef terms[NUM_CHANNELS]; 813 LLVMValueRef mask; 814 unsigned chan_index; 815 816 memset(&terms, 0, sizeof terms); 817 818 FOR_EACH_CHANNEL( chan_index ) { 819 unsigned swizzle; 820 821 /* Unswizzle channel */ 822 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 823 824 /* Check if the component has not been already tested. */ 825 assert(swizzle < NUM_CHANNELS); 826 if( !terms[swizzle] ) 827 /* TODO: change the comparison operator instead of setting the sign */ 828 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 829 } 830 831 mask = NULL; 832 FOR_EACH_CHANNEL( chan_index ) { 833 if(terms[chan_index]) { 834 LLVMValueRef chan_mask; 835 836 /* 837 * If term < 0 then mask = 0 else mask = ~0. 838 */ 839 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 840 841 if(mask) 842 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); 843 else 844 mask = chan_mask; 845 } 846 } 847 848 if(mask) 849 lp_build_mask_update(bld->mask, mask); 850} 851 852 853/** 854 * Predicated fragment kill. 855 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 856 * The only predication is the execution mask which will apply if 857 * we're inside a loop or conditional. 858 */ 859static void 860emit_kilp(struct lp_build_tgsi_soa_context *bld, 861 const struct tgsi_full_instruction *inst) 862{ 863 LLVMValueRef mask; 864 865 /* For those channels which are "alive", disable fragment shader 866 * execution. 867 */ 868 if (bld->exec_mask.has_mask) { 869 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); 870 } 871 else { 872 mask = bld->base.zero; 873 } 874 875 lp_build_mask_update(bld->mask, mask); 876} 877 878static void 879emit_declaration( 880 struct lp_build_tgsi_soa_context *bld, 881 const struct tgsi_full_declaration *decl) 882{ 883 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); 884 885 unsigned first = decl->Range.First; 886 unsigned last = decl->Range.Last; 887 unsigned idx, i; 888 889 for (idx = first; idx <= last; ++idx) { 890 switch (decl->Declaration.File) { 891 case TGSI_FILE_TEMPORARY: 892 assert(idx < LP_MAX_TGSI_TEMPS); 893 if (bld->has_indirect_addressing) { 894 LLVMValueRef val = LLVMConstInt(LLVMInt32Type(), 895 last*4 + 4, 0); 896 bld->temps_array = lp_build_array_alloca(bld->base.builder, 897 vec_type, val, ""); 898 } else { 899 for (i = 0; i < NUM_CHANNELS; i++) 900 bld->temps[idx][i] = lp_build_alloca(bld->base.builder, 901 vec_type, ""); 902 } 903 break; 904 905 case TGSI_FILE_OUTPUT: 906 for (i = 0; i < NUM_CHANNELS; i++) 907 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, 908 vec_type, ""); 909 break; 910 911 case TGSI_FILE_ADDRESS: 912 assert(idx < LP_MAX_TGSI_ADDRS); 913 for (i = 0; i < NUM_CHANNELS; i++) 914 bld->addr[idx][i] = lp_build_alloca(bld->base.builder, 915 vec_type, ""); 916 break; 917 918 case TGSI_FILE_PREDICATE: 919 assert(idx < LP_MAX_TGSI_PREDS); 920 for (i = 0; i < NUM_CHANNELS; i++) 921 bld->preds[idx][i] = lp_build_alloca(bld->base.builder, 922 vec_type, ""); 923 break; 924 925 default: 926 /* don't need to declare other vars */ 927 break; 928 } 929 } 930} 931 932 933/** 934 * Emit LLVM for one TGSI instruction. 935 * \param return TRUE for success, FALSE otherwise 936 */ 937static boolean 938emit_instruction( 939 struct lp_build_tgsi_soa_context *bld, 940 const struct tgsi_full_instruction *inst, 941 const struct tgsi_opcode_info *info) 942{ 943 unsigned chan_index; 944 LLVMValueRef src0, src1, src2; 945 LLVMValueRef tmp0, tmp1, tmp2; 946 LLVMValueRef tmp3 = NULL; 947 LLVMValueRef tmp4 = NULL; 948 LLVMValueRef tmp5 = NULL; 949 LLVMValueRef tmp6 = NULL; 950 LLVMValueRef tmp7 = NULL; 951 LLVMValueRef res; 952 LLVMValueRef dst0[NUM_CHANNELS]; 953 954 /* 955 * Stores and write masks are handled in a general fashion after the long 956 * instruction opcode switch statement. 957 * 958 * Although not stricitly necessary, we avoid generating instructions for 959 * channels which won't be stored, in cases where's that easy. For some 960 * complex instructions, like texture sampling, it is more convenient to 961 * assume a full writemask and then let LLVM optimization passes eliminate 962 * redundant code. 963 */ 964 965 assert(info->num_dst <= 1); 966 if (info->num_dst) { 967 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 968 dst0[chan_index] = bld->base.undef; 969 } 970 } 971 972 switch (inst->Instruction.Opcode) { 973 case TGSI_OPCODE_ARL: 974 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 975 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 976 tmp0 = lp_build_floor(&bld->base, tmp0); 977 dst0[chan_index] = tmp0; 978 } 979 break; 980 981 case TGSI_OPCODE_MOV: 982 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 983 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 984 } 985 break; 986 987 case TGSI_OPCODE_LIT: 988 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 989 dst0[CHAN_X] = bld->base.one; 990 } 991 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 992 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 993 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 994 } 995 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 996 /* XMM[1] = SrcReg[0].yyyy */ 997 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 998 /* XMM[1] = max(XMM[1], 0) */ 999 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 1000 /* XMM[2] = SrcReg[0].wwww */ 1001 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 1002 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 1003 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1004 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 1005 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 1006 } 1007 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 1008 dst0[CHAN_W] = bld->base.one; 1009 } 1010 break; 1011 1012 case TGSI_OPCODE_RCP: 1013 /* TGSI_OPCODE_RECIP */ 1014 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1015 res = lp_build_rcp(&bld->base, src0); 1016 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1017 dst0[chan_index] = res; 1018 } 1019 break; 1020 1021 case TGSI_OPCODE_RSQ: 1022 /* TGSI_OPCODE_RECIPSQRT */ 1023 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1024 src0 = lp_build_abs(&bld->base, src0); 1025 res = lp_build_rsqrt(&bld->base, src0); 1026 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1027 dst0[chan_index] = res; 1028 } 1029 break; 1030 1031 case TGSI_OPCODE_EXP: 1032 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1033 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1034 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1035 LLVMValueRef *p_exp2_int_part = NULL; 1036 LLVMValueRef *p_frac_part = NULL; 1037 LLVMValueRef *p_exp2 = NULL; 1038 1039 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1040 1041 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1042 p_exp2_int_part = &tmp0; 1043 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1044 p_frac_part = &tmp1; 1045 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1046 p_exp2 = &tmp2; 1047 1048 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 1049 1050 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1051 dst0[CHAN_X] = tmp0; 1052 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1053 dst0[CHAN_Y] = tmp1; 1054 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1055 dst0[CHAN_Z] = tmp2; 1056 } 1057 /* dst.w = 1.0 */ 1058 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1059 dst0[CHAN_W] = bld->base.one; 1060 } 1061 break; 1062 1063 case TGSI_OPCODE_LOG: 1064 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1065 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1066 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1067 LLVMValueRef *p_floor_log2 = NULL; 1068 LLVMValueRef *p_exp = NULL; 1069 LLVMValueRef *p_log2 = NULL; 1070 1071 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1072 src0 = lp_build_abs( &bld->base, src0 ); 1073 1074 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1075 p_floor_log2 = &tmp0; 1076 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1077 p_exp = &tmp1; 1078 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1079 p_log2 = &tmp2; 1080 1081 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 1082 1083 /* dst.x = floor(lg2(abs(src.x))) */ 1084 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1085 dst0[CHAN_X] = tmp0; 1086 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 1087 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 1088 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 1089 } 1090 /* dst.z = lg2(abs(src.x)) */ 1091 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1092 dst0[CHAN_Z] = tmp2; 1093 } 1094 /* dst.w = 1.0 */ 1095 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1096 dst0[CHAN_W] = bld->base.one; 1097 } 1098 break; 1099 1100 case TGSI_OPCODE_MUL: 1101 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1102 src0 = emit_fetch( bld, inst, 0, chan_index ); 1103 src1 = emit_fetch( bld, inst, 1, chan_index ); 1104 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 1105 } 1106 break; 1107 1108 case TGSI_OPCODE_ADD: 1109 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1110 src0 = emit_fetch( bld, inst, 0, chan_index ); 1111 src1 = emit_fetch( bld, inst, 1, chan_index ); 1112 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 1113 } 1114 break; 1115 1116 case TGSI_OPCODE_DP3: 1117 /* TGSI_OPCODE_DOT3 */ 1118 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1119 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1120 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1121 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1122 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1123 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1124 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1125 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1126 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1127 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1128 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1129 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1130 dst0[chan_index] = tmp0; 1131 } 1132 break; 1133 1134 case TGSI_OPCODE_DP4: 1135 /* TGSI_OPCODE_DOT4 */ 1136 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1137 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1138 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1139 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1140 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1141 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1142 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1143 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1144 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1145 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1146 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1147 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1148 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1149 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1150 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1151 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1152 dst0[chan_index] = tmp0; 1153 } 1154 break; 1155 1156 case TGSI_OPCODE_DST: 1157 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1158 dst0[CHAN_X] = bld->base.one; 1159 } 1160 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1161 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1162 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1163 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1164 } 1165 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1166 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1167 } 1168 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1169 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1170 } 1171 break; 1172 1173 case TGSI_OPCODE_MIN: 1174 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1175 src0 = emit_fetch( bld, inst, 0, chan_index ); 1176 src1 = emit_fetch( bld, inst, 1, chan_index ); 1177 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1178 } 1179 break; 1180 1181 case TGSI_OPCODE_MAX: 1182 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1183 src0 = emit_fetch( bld, inst, 0, chan_index ); 1184 src1 = emit_fetch( bld, inst, 1, chan_index ); 1185 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1186 } 1187 break; 1188 1189 case TGSI_OPCODE_SLT: 1190 /* TGSI_OPCODE_SETLT */ 1191 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1192 src0 = emit_fetch( bld, inst, 0, chan_index ); 1193 src1 = emit_fetch( bld, inst, 1, chan_index ); 1194 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1195 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1196 } 1197 break; 1198 1199 case TGSI_OPCODE_SGE: 1200 /* TGSI_OPCODE_SETGE */ 1201 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1202 src0 = emit_fetch( bld, inst, 0, chan_index ); 1203 src1 = emit_fetch( bld, inst, 1, chan_index ); 1204 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1205 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1206 } 1207 break; 1208 1209 case TGSI_OPCODE_MAD: 1210 /* TGSI_OPCODE_MADD */ 1211 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1212 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1213 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1214 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1215 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1216 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1217 dst0[chan_index] = tmp0; 1218 } 1219 break; 1220 1221 case TGSI_OPCODE_SUB: 1222 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1223 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1224 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1225 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1226 } 1227 break; 1228 1229 case TGSI_OPCODE_LRP: 1230 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1231 src0 = emit_fetch( bld, inst, 0, chan_index ); 1232 src1 = emit_fetch( bld, inst, 1, chan_index ); 1233 src2 = emit_fetch( bld, inst, 2, chan_index ); 1234 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1235 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1236 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1237 } 1238 break; 1239 1240 case TGSI_OPCODE_CND: 1241 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1242 src0 = emit_fetch( bld, inst, 0, chan_index ); 1243 src1 = emit_fetch( bld, inst, 1, chan_index ); 1244 src2 = emit_fetch( bld, inst, 2, chan_index ); 1245 tmp1 = lp_build_const_vec(bld->base.type, 0.5); 1246 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1247 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1248 } 1249 break; 1250 1251 case TGSI_OPCODE_DP2A: 1252 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1253 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1254 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1255 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1256 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1257 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1258 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1259 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1260 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1261 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1262 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1263 } 1264 break; 1265 1266 case TGSI_OPCODE_FRC: 1267 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1268 src0 = emit_fetch( bld, inst, 0, chan_index ); 1269 tmp0 = lp_build_floor(&bld->base, src0); 1270 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1271 dst0[chan_index] = tmp0; 1272 } 1273 break; 1274 1275 case TGSI_OPCODE_CLAMP: 1276 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1277 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1278 src1 = emit_fetch( bld, inst, 1, chan_index ); 1279 src2 = emit_fetch( bld, inst, 2, chan_index ); 1280 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1281 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1282 dst0[chan_index] = tmp0; 1283 } 1284 break; 1285 1286 case TGSI_OPCODE_FLR: 1287 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1288 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1289 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1290 } 1291 break; 1292 1293 case TGSI_OPCODE_ROUND: 1294 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1295 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1296 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1297 } 1298 break; 1299 1300 case TGSI_OPCODE_EX2: { 1301 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1302 tmp0 = lp_build_exp2( &bld->base, tmp0); 1303 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1304 dst0[chan_index] = tmp0; 1305 } 1306 break; 1307 } 1308 1309 case TGSI_OPCODE_LG2: 1310 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1311 tmp0 = lp_build_log2( &bld->base, tmp0); 1312 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1313 dst0[chan_index] = tmp0; 1314 } 1315 break; 1316 1317 case TGSI_OPCODE_POW: 1318 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1319 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1320 res = lp_build_pow( &bld->base, src0, src1 ); 1321 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1322 dst0[chan_index] = res; 1323 } 1324 break; 1325 1326 case TGSI_OPCODE_XPD: 1327 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1328 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1329 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1330 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1331 } 1332 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1333 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1334 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1335 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1336 } 1337 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1338 tmp2 = tmp0; 1339 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1340 tmp5 = tmp3; 1341 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1342 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1343 dst0[CHAN_X] = tmp2; 1344 } 1345 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1346 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1347 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1348 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1349 } 1350 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1351 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1352 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1353 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1354 dst0[CHAN_Y] = tmp3; 1355 } 1356 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1357 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1358 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1359 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1360 dst0[CHAN_Z] = tmp5; 1361 } 1362 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1363 dst0[CHAN_W] = bld->base.one; 1364 } 1365 break; 1366 1367 case TGSI_OPCODE_ABS: 1368 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1369 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1370 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1371 } 1372 break; 1373 1374 case TGSI_OPCODE_RCC: 1375 /* deprecated? */ 1376 assert(0); 1377 return FALSE; 1378 1379 case TGSI_OPCODE_DPH: 1380 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1381 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1382 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1383 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1384 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1385 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1386 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1387 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1388 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1389 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1390 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1391 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1392 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1393 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1394 dst0[chan_index] = tmp0; 1395 } 1396 break; 1397 1398 case TGSI_OPCODE_COS: 1399 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1400 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1401 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1402 dst0[chan_index] = tmp0; 1403 } 1404 break; 1405 1406 case TGSI_OPCODE_DDX: 1407 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1408 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1409 } 1410 break; 1411 1412 case TGSI_OPCODE_DDY: 1413 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1414 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1415 } 1416 break; 1417 1418 case TGSI_OPCODE_KILP: 1419 /* predicated kill */ 1420 emit_kilp( bld, inst ); 1421 break; 1422 1423 case TGSI_OPCODE_KIL: 1424 /* conditional kill */ 1425 emit_kil( bld, inst ); 1426 break; 1427 1428 case TGSI_OPCODE_PK2H: 1429 return FALSE; 1430 break; 1431 1432 case TGSI_OPCODE_PK2US: 1433 return FALSE; 1434 break; 1435 1436 case TGSI_OPCODE_PK4B: 1437 return FALSE; 1438 break; 1439 1440 case TGSI_OPCODE_PK4UB: 1441 return FALSE; 1442 break; 1443 1444 case TGSI_OPCODE_RFL: 1445 return FALSE; 1446 break; 1447 1448 case TGSI_OPCODE_SEQ: 1449 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1450 src0 = emit_fetch( bld, inst, 0, chan_index ); 1451 src1 = emit_fetch( bld, inst, 1, chan_index ); 1452 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1453 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1454 } 1455 break; 1456 1457 case TGSI_OPCODE_SFL: 1458 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1459 dst0[chan_index] = bld->base.zero; 1460 } 1461 break; 1462 1463 case TGSI_OPCODE_SGT: 1464 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1465 src0 = emit_fetch( bld, inst, 0, chan_index ); 1466 src1 = emit_fetch( bld, inst, 1, chan_index ); 1467 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1468 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1469 } 1470 break; 1471 1472 case TGSI_OPCODE_SIN: 1473 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1474 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1475 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1476 dst0[chan_index] = tmp0; 1477 } 1478 break; 1479 1480 case TGSI_OPCODE_SLE: 1481 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1482 src0 = emit_fetch( bld, inst, 0, chan_index ); 1483 src1 = emit_fetch( bld, inst, 1, chan_index ); 1484 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1485 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1486 } 1487 break; 1488 1489 case TGSI_OPCODE_SNE: 1490 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1491 src0 = emit_fetch( bld, inst, 0, chan_index ); 1492 src1 = emit_fetch( bld, inst, 1, chan_index ); 1493 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1494 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1495 } 1496 break; 1497 1498 case TGSI_OPCODE_STR: 1499 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1500 dst0[chan_index] = bld->base.one; 1501 } 1502 break; 1503 1504 case TGSI_OPCODE_TEX: 1505 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 ); 1506 break; 1507 1508 case TGSI_OPCODE_TXD: 1509 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1510 break; 1511 1512 case TGSI_OPCODE_UP2H: 1513 /* deprecated */ 1514 assert (0); 1515 return FALSE; 1516 break; 1517 1518 case TGSI_OPCODE_UP2US: 1519 /* deprecated */ 1520 assert(0); 1521 return FALSE; 1522 break; 1523 1524 case TGSI_OPCODE_UP4B: 1525 /* deprecated */ 1526 assert(0); 1527 return FALSE; 1528 break; 1529 1530 case TGSI_OPCODE_UP4UB: 1531 /* deprecated */ 1532 assert(0); 1533 return FALSE; 1534 break; 1535 1536 case TGSI_OPCODE_X2D: 1537 /* deprecated? */ 1538 assert(0); 1539 return FALSE; 1540 break; 1541 1542 case TGSI_OPCODE_ARA: 1543 /* deprecated */ 1544 assert(0); 1545 return FALSE; 1546 break; 1547 1548 case TGSI_OPCODE_ARR: 1549 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1550 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1551 tmp0 = lp_build_round(&bld->base, tmp0); 1552 dst0[chan_index] = tmp0; 1553 } 1554 break; 1555 1556 case TGSI_OPCODE_BRA: 1557 /* deprecated */ 1558 assert(0); 1559 return FALSE; 1560 break; 1561 1562 case TGSI_OPCODE_CAL: 1563 /* FIXME */ 1564 return FALSE; 1565 break; 1566 1567 case TGSI_OPCODE_RET: 1568 /* FIXME */ 1569 return FALSE; 1570 break; 1571 1572 case TGSI_OPCODE_END: 1573 break; 1574 1575 case TGSI_OPCODE_SSG: 1576 /* TGSI_OPCODE_SGN */ 1577 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1578 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1579 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 1580 } 1581 break; 1582 1583 case TGSI_OPCODE_CMP: 1584 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1585 src0 = emit_fetch( bld, inst, 0, chan_index ); 1586 src1 = emit_fetch( bld, inst, 1, chan_index ); 1587 src2 = emit_fetch( bld, inst, 2, chan_index ); 1588 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 1589 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 1590 } 1591 break; 1592 1593 case TGSI_OPCODE_SCS: 1594 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1595 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1596 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 1597 } 1598 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1599 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1600 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 1601 } 1602 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1603 dst0[CHAN_Z] = bld->base.zero; 1604 } 1605 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1606 dst0[CHAN_W] = bld->base.one; 1607 } 1608 break; 1609 1610 case TGSI_OPCODE_TXB: 1611 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 ); 1612 break; 1613 1614 case TGSI_OPCODE_NRM: 1615 /* fall-through */ 1616 case TGSI_OPCODE_NRM4: 1617 /* 3 or 4-component normalization */ 1618 { 1619 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1620 1621 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 1622 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 1623 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 1624 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 1625 1626 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1627 1628 /* xmm4 = src.x */ 1629 /* xmm0 = src.x * src.x */ 1630 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1631 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1632 tmp4 = tmp0; 1633 } 1634 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 1635 1636 /* xmm5 = src.y */ 1637 /* xmm0 = xmm0 + src.y * src.y */ 1638 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 1639 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1640 tmp5 = tmp1; 1641 } 1642 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1643 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1644 1645 /* xmm6 = src.z */ 1646 /* xmm0 = xmm0 + src.z * src.z */ 1647 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 1648 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1649 tmp6 = tmp1; 1650 } 1651 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1652 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1653 1654 if (dims == 4) { 1655 /* xmm7 = src.w */ 1656 /* xmm0 = xmm0 + src.w * src.w */ 1657 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 1658 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 1659 tmp7 = tmp1; 1660 } 1661 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1662 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1663 } 1664 1665 /* xmm1 = 1 / sqrt(xmm0) */ 1666 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 1667 1668 /* dst.x = xmm1 * src.x */ 1669 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1670 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 1671 } 1672 1673 /* dst.y = xmm1 * src.y */ 1674 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1675 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 1676 } 1677 1678 /* dst.z = xmm1 * src.z */ 1679 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1680 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 1681 } 1682 1683 /* dst.w = xmm1 * src.w */ 1684 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 1685 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 1686 } 1687 } 1688 1689 /* dst.w = 1.0 */ 1690 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 1691 dst0[CHAN_W] = bld->base.one; 1692 } 1693 } 1694 break; 1695 1696 case TGSI_OPCODE_DIV: 1697 /* deprecated */ 1698 assert( 0 ); 1699 return FALSE; 1700 break; 1701 1702 case TGSI_OPCODE_DP2: 1703 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1704 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1705 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1706 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1707 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1708 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1709 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1710 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1711 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1712 } 1713 break; 1714 1715 case TGSI_OPCODE_TXL: 1716 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 1717 break; 1718 1719 case TGSI_OPCODE_TXP: 1720 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 ); 1721 break; 1722 1723 case TGSI_OPCODE_BRK: 1724 lp_exec_break(&bld->exec_mask); 1725 break; 1726 1727 case TGSI_OPCODE_IF: 1728 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1729 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 1730 tmp0, bld->base.zero); 1731 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 1732 break; 1733 1734 case TGSI_OPCODE_BGNLOOP: 1735 lp_exec_bgnloop(&bld->exec_mask); 1736 break; 1737 1738 case TGSI_OPCODE_ELSE: 1739 lp_exec_mask_cond_invert(&bld->exec_mask); 1740 break; 1741 1742 case TGSI_OPCODE_ENDIF: 1743 lp_exec_mask_cond_pop(&bld->exec_mask); 1744 break; 1745 1746 case TGSI_OPCODE_ENDLOOP: 1747 lp_exec_endloop(&bld->exec_mask); 1748 break; 1749 1750 case TGSI_OPCODE_PUSHA: 1751 /* deprecated? */ 1752 assert(0); 1753 return FALSE; 1754 break; 1755 1756 case TGSI_OPCODE_POPA: 1757 /* deprecated? */ 1758 assert(0); 1759 return FALSE; 1760 break; 1761 1762 case TGSI_OPCODE_CEIL: 1763 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1764 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1765 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 1766 } 1767 break; 1768 1769 case TGSI_OPCODE_I2F: 1770 /* deprecated? */ 1771 assert(0); 1772 return FALSE; 1773 break; 1774 1775 case TGSI_OPCODE_NOT: 1776 /* deprecated? */ 1777 assert(0); 1778 return FALSE; 1779 break; 1780 1781 case TGSI_OPCODE_TRUNC: 1782 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1783 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1784 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 1785 } 1786 break; 1787 1788 case TGSI_OPCODE_SHL: 1789 /* deprecated? */ 1790 assert(0); 1791 return FALSE; 1792 break; 1793 1794 case TGSI_OPCODE_ISHR: 1795 /* deprecated? */ 1796 assert(0); 1797 return FALSE; 1798 break; 1799 1800 case TGSI_OPCODE_AND: 1801 /* deprecated? */ 1802 assert(0); 1803 return FALSE; 1804 break; 1805 1806 case TGSI_OPCODE_OR: 1807 /* deprecated? */ 1808 assert(0); 1809 return FALSE; 1810 break; 1811 1812 case TGSI_OPCODE_MOD: 1813 /* deprecated? */ 1814 assert(0); 1815 return FALSE; 1816 break; 1817 1818 case TGSI_OPCODE_XOR: 1819 /* deprecated? */ 1820 assert(0); 1821 return FALSE; 1822 break; 1823 1824 case TGSI_OPCODE_SAD: 1825 /* deprecated? */ 1826 assert(0); 1827 return FALSE; 1828 break; 1829 1830 case TGSI_OPCODE_TXF: 1831 /* deprecated? */ 1832 assert(0); 1833 return FALSE; 1834 break; 1835 1836 case TGSI_OPCODE_TXQ: 1837 /* deprecated? */ 1838 assert(0); 1839 return FALSE; 1840 break; 1841 1842 case TGSI_OPCODE_CONT: 1843 lp_exec_continue(&bld->exec_mask); 1844 break; 1845 1846 case TGSI_OPCODE_EMIT: 1847 return FALSE; 1848 break; 1849 1850 case TGSI_OPCODE_ENDPRIM: 1851 return FALSE; 1852 break; 1853 1854 case TGSI_OPCODE_NOP: 1855 break; 1856 1857 default: 1858 return FALSE; 1859 } 1860 1861 if(info->num_dst) { 1862 LLVMValueRef pred[NUM_CHANNELS]; 1863 1864 emit_fetch_predicate( bld, inst, pred ); 1865 1866 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1867 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); 1868 } 1869 } 1870 1871 return TRUE; 1872} 1873 1874 1875void 1876lp_build_tgsi_soa(LLVMBuilderRef builder, 1877 const struct tgsi_token *tokens, 1878 struct lp_type type, 1879 struct lp_build_mask_context *mask, 1880 LLVMValueRef consts_ptr, 1881 const LLVMValueRef *pos, 1882 const LLVMValueRef (*inputs)[NUM_CHANNELS], 1883 LLVMValueRef (*outputs)[NUM_CHANNELS], 1884 struct lp_build_sampler_soa *sampler, 1885 struct tgsi_shader_info *info) 1886{ 1887 struct lp_build_tgsi_soa_context bld; 1888 struct tgsi_parse_context parse; 1889 uint num_immediates = 0; 1890 unsigned i; 1891 1892 /* Setup build context */ 1893 memset(&bld, 0, sizeof bld); 1894 lp_build_context_init(&bld.base, builder, type); 1895 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type)); 1896 bld.mask = mask; 1897 bld.pos = pos; 1898 bld.inputs = inputs; 1899 bld.outputs = outputs; 1900 bld.consts_ptr = consts_ptr; 1901 bld.sampler = sampler; 1902 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 || 1903 info->opcode_count[TGSI_OPCODE_ARL] > 0; 1904 1905 lp_exec_mask_init(&bld.exec_mask, &bld.base); 1906 1907 tgsi_parse_init( &parse, tokens ); 1908 1909 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1910 tgsi_parse_token( &parse ); 1911 1912 switch( parse.FullToken.Token.Type ) { 1913 case TGSI_TOKEN_TYPE_DECLARATION: 1914 /* Inputs already interpolated */ 1915 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 1916 break; 1917 1918 case TGSI_TOKEN_TYPE_INSTRUCTION: 1919 { 1920 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 1921 const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode); 1922 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info )) 1923 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 1924 opcode_info->mnemonic); 1925 } 1926 1927 break; 1928 1929 case TGSI_TOKEN_TYPE_IMMEDIATE: 1930 /* simply copy the immediate values into the next immediates[] slot */ 1931 { 1932 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1933 assert(size <= 4); 1934 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 1935 for( i = 0; i < size; ++i ) 1936 bld.immediates[num_immediates][i] = 1937 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); 1938 for( i = size; i < 4; ++i ) 1939 bld.immediates[num_immediates][i] = bld.base.undef; 1940 num_immediates++; 1941 } 1942 break; 1943 1944 case TGSI_TOKEN_TYPE_PROPERTY: 1945 break; 1946 1947 default: 1948 assert( 0 ); 1949 } 1950 } 1951 if (0) { 1952 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); 1953 LLVMValueRef function = LLVMGetBasicBlockParent(block); 1954 debug_printf("11111111111111111111111111111 \n"); 1955 tgsi_dump(tokens, 0); 1956 lp_debug_dump_value(function); 1957 debug_printf("2222222222222222222222222222 \n"); 1958 } 1959 tgsi_parse_free( &parse ); 1960} 1961 1962