lp_bld_tgsi_soa.c revision 32a7209c0a0d5ae63f12056ed969087d942c6298
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_exec.h" 49#include "tgsi/tgsi_scan.h" 50#include "lp_bld_type.h" 51#include "lp_bld_const.h" 52#include "lp_bld_arit.h" 53#include "lp_bld_logic.h" 54#include "lp_bld_swizzle.h" 55#include "lp_bld_flow.h" 56#include "lp_bld_tgsi.h" 57#include "lp_bld_limits.h" 58#include "lp_bld_debug.h" 59 60 61#define FOR_EACH_CHANNEL( CHAN )\ 62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 63 64#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 66 67#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 69 70#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 71 FOR_EACH_CHANNEL( CHAN )\ 72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 73 74#define CHAN_X 0 75#define CHAN_Y 1 76#define CHAN_Z 2 77#define CHAN_W 3 78 79#define QUAD_TOP_LEFT 0 80#define QUAD_TOP_RIGHT 1 81#define QUAD_BOTTOM_LEFT 2 82#define QUAD_BOTTOM_RIGHT 3 83 84#define LP_MAX_INSTRUCTIONS 256 85 86 87struct lp_exec_mask { 88 struct lp_build_context *bld; 89 90 boolean has_mask; 91 92 LLVMTypeRef int_vec_type; 93 94 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 95 int cond_stack_size; 96 LLVMValueRef cond_mask; 97 98 LLVMBasicBlockRef loop_block; 99 LLVMValueRef cont_mask; 100 LLVMValueRef break_mask; 101 LLVMValueRef break_var; 102 struct { 103 LLVMBasicBlockRef loop_block; 104 LLVMValueRef cont_mask; 105 LLVMValueRef break_mask; 106 LLVMValueRef break_var; 107 } loop_stack[LP_MAX_TGSI_NESTING]; 108 int loop_stack_size; 109 110 LLVMValueRef ret_mask; 111 struct { 112 int pc; 113 LLVMValueRef ret_mask; 114 } call_stack[LP_MAX_TGSI_NESTING]; 115 int call_stack_size; 116 117 LLVMValueRef exec_mask; 118}; 119 120struct lp_build_tgsi_soa_context 121{ 122 struct lp_build_context base; 123 124 /* Builder for integer masks and indices */ 125 struct lp_build_context int_bld; 126 127 LLVMValueRef consts_ptr; 128 const LLVMValueRef *pos; 129 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 130 LLVMValueRef (*outputs)[NUM_CHANNELS]; 131 132 const struct lp_build_sampler_soa *sampler; 133 134 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 135 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 136 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 137 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; 138 139 /* we allocate an array of temps if we have indirect 140 * addressing and then the temps above is unused */ 141 LLVMValueRef temps_array; 142 boolean has_indirect_addressing; 143 144 struct lp_build_mask_context *mask; 145 struct lp_exec_mask exec_mask; 146 147 struct tgsi_full_instruction *instructions; 148 uint max_instructions; 149}; 150 151static const unsigned char 152swizzle_left[4] = { 153 QUAD_TOP_LEFT, QUAD_TOP_LEFT, 154 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT 155}; 156 157static const unsigned char 158swizzle_right[4] = { 159 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, 160 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT 161}; 162 163static const unsigned char 164swizzle_top[4] = { 165 QUAD_TOP_LEFT, QUAD_TOP_RIGHT, 166 QUAD_TOP_LEFT, QUAD_TOP_RIGHT 167}; 168 169static const unsigned char 170swizzle_bottom[4] = { 171 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, 172 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT 173}; 174 175static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 176{ 177 mask->bld = bld; 178 mask->has_mask = FALSE; 179 mask->cond_stack_size = 0; 180 mask->loop_stack_size = 0; 181 mask->call_stack_size = 0; 182 183 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); 184 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 185 LLVMConstAllOnes(mask->int_vec_type); 186} 187 188static void lp_exec_mask_update(struct lp_exec_mask *mask) 189{ 190 if (mask->loop_stack_size) { 191 /*for loops we need to update the entire mask at runtime */ 192 LLVMValueRef tmp; 193 assert(mask->break_mask); 194 tmp = LLVMBuildAnd(mask->bld->builder, 195 mask->cont_mask, 196 mask->break_mask, 197 "maskcb"); 198 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 199 mask->cond_mask, 200 tmp, 201 "maskfull"); 202 } else 203 mask->exec_mask = mask->cond_mask; 204 205 if (mask->call_stack_size) { 206 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 207 mask->exec_mask, 208 mask->ret_mask, 209 "callmask"); 210 } 211 212 mask->has_mask = (mask->cond_stack_size > 0 || 213 mask->loop_stack_size > 0 || 214 mask->call_stack_size > 0); 215} 216 217static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 218 LLVMValueRef val) 219{ 220 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 221 if (mask->cond_stack_size == 0) { 222 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 223 } 224 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 225 assert(LLVMTypeOf(val) == mask->int_vec_type); 226 mask->cond_mask = val; 227 228 lp_exec_mask_update(mask); 229} 230 231static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 232{ 233 LLVMValueRef prev_mask; 234 LLVMValueRef inv_mask; 235 236 assert(mask->cond_stack_size); 237 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 238 if (mask->cond_stack_size == 1) { 239 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 240 } 241 242 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, ""); 243 244 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 245 inv_mask, 246 prev_mask, ""); 247 lp_exec_mask_update(mask); 248} 249 250static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 251{ 252 assert(mask->cond_stack_size); 253 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 254 lp_exec_mask_update(mask); 255} 256 257static void lp_exec_bgnloop(struct lp_exec_mask *mask) 258{ 259 if (mask->loop_stack_size == 0) { 260 assert(mask->loop_block == NULL); 261 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 262 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 263 assert(mask->break_var == NULL); 264 } 265 266 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 267 268 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 269 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 270 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 271 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 272 ++mask->loop_stack_size; 273 274 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, ""); 275 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 276 277 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); 278 LLVMBuildBr(mask->bld->builder, mask->loop_block); 279 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); 280 281 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, ""); 282 283 lp_exec_mask_update(mask); 284} 285 286static void lp_exec_break(struct lp_exec_mask *mask) 287{ 288 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 289 mask->exec_mask, 290 "break"); 291 292 mask->break_mask = LLVMBuildAnd(mask->bld->builder, 293 mask->break_mask, 294 exec_mask, "break_full"); 295 296 lp_exec_mask_update(mask); 297} 298 299static void lp_exec_continue(struct lp_exec_mask *mask) 300{ 301 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 302 mask->exec_mask, 303 ""); 304 305 mask->cont_mask = LLVMBuildAnd(mask->bld->builder, 306 mask->cont_mask, 307 exec_mask, ""); 308 309 lp_exec_mask_update(mask); 310} 311 312 313static void lp_exec_endloop(struct lp_exec_mask *mask) 314{ 315 LLVMBasicBlockRef endloop; 316 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* 317 mask->bld->type.length); 318 LLVMValueRef i1cond; 319 320 assert(mask->break_mask); 321 322 /* 323 * Restore the cont_mask, but don't pop 324 */ 325 assert(mask->loop_stack_size); 326 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 327 lp_exec_mask_update(mask); 328 329 /* 330 * Unlike the continue mask, the break_mask must be preserved across loop 331 * iterations 332 */ 333 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 334 335 /* i1cond = (mask == 0) */ 336 i1cond = LLVMBuildICmp( 337 mask->bld->builder, 338 LLVMIntNE, 339 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""), 340 LLVMConstNull(reg_type), ""); 341 342 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); 343 344 LLVMBuildCondBr(mask->bld->builder, 345 i1cond, mask->loop_block, endloop); 346 347 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); 348 349 assert(mask->loop_stack_size); 350 --mask->loop_stack_size; 351 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 352 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 353 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 354 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 355 356 lp_exec_mask_update(mask); 357} 358 359/* stores val into an address pointed to by dst. 360 * mask->exec_mask is used to figure out which bits of val 361 * should be stored into the address 362 * (0 means don't store this bit, 1 means do store). 363 */ 364static void lp_exec_mask_store(struct lp_exec_mask *mask, 365 LLVMValueRef pred, 366 LLVMValueRef val, 367 LLVMValueRef dst) 368{ 369 /* Mix the predicate and execution mask */ 370 if (mask->has_mask) { 371 if (pred) { 372 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); 373 } else { 374 pred = mask->exec_mask; 375 } 376 } 377 378 if (pred) { 379 LLVMValueRef real_val, dst_val; 380 381 dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); 382 real_val = lp_build_select(mask->bld, 383 pred, 384 val, dst_val); 385 386 LLVMBuildStore(mask->bld->builder, real_val, dst); 387 } else 388 LLVMBuildStore(mask->bld->builder, val, dst); 389} 390 391static void lp_exec_mask_call(struct lp_exec_mask *mask, 392 int func, 393 int *pc) 394{ 395 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 396 mask->call_stack[mask->call_stack_size].pc = *pc; 397 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 398 mask->call_stack_size++; 399 *pc = func; 400} 401 402static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 403{ 404 LLVMValueRef exec_mask; 405 406 if (mask->call_stack_size == 0) { 407 /* returning from main() */ 408 *pc = -1; 409 return; 410 } 411 exec_mask = LLVMBuildNot(mask->bld->builder, 412 mask->exec_mask, 413 "ret"); 414 415 mask->ret_mask = LLVMBuildAnd(mask->bld->builder, 416 mask->ret_mask, 417 exec_mask, "ret_full"); 418 419 lp_exec_mask_update(mask); 420} 421 422static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 423{ 424} 425 426static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 427{ 428 assert(mask->call_stack_size); 429 mask->call_stack_size--; 430 *pc = mask->call_stack[mask->call_stack_size].pc; 431 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 432 lp_exec_mask_update(mask); 433} 434 435static LLVMValueRef 436emit_ddx(struct lp_build_tgsi_soa_context *bld, 437 LLVMValueRef src) 438{ 439 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); 440 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); 441 return lp_build_sub(&bld->base, src_right, src_left); 442} 443 444 445static LLVMValueRef 446emit_ddy(struct lp_build_tgsi_soa_context *bld, 447 LLVMValueRef src) 448{ 449 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); 450 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); 451 return lp_build_sub(&bld->base, src_top, src_bottom); 452} 453 454static LLVMValueRef 455get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 456 unsigned index, 457 unsigned chan, 458 boolean is_indirect, 459 LLVMValueRef addr) 460{ 461 assert(chan < 4); 462 if (!bld->has_indirect_addressing) { 463 return bld->temps[index][chan]; 464 } else { 465 LLVMValueRef lindex = 466 LLVMConstInt(LLVMInt32Type(), index * 4 + chan, 0); 467 if (is_indirect) 468 lindex = lp_build_add(&bld->base, lindex, addr); 469 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); 470 } 471} 472 473/** 474 * Register fetch. 475 */ 476static LLVMValueRef 477emit_fetch( 478 struct lp_build_tgsi_soa_context *bld, 479 const struct tgsi_full_instruction *inst, 480 unsigned index, 481 const unsigned chan_index ) 482{ 483 const struct tgsi_full_src_register *reg = &inst->Src[index]; 484 const unsigned swizzle = 485 tgsi_util_get_full_src_register_swizzle(reg, chan_index); 486 LLVMValueRef res; 487 LLVMValueRef addr = NULL; 488 489 if (swizzle > 3) { 490 assert(0 && "invalid swizzle in emit_fetch()"); 491 return bld->base.undef; 492 } 493 494 if (reg->Register.Indirect) { 495 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); 496 unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); 497 addr = LLVMBuildLoad(bld->base.builder, 498 bld->addr[reg->Indirect.Index][swizzle], 499 ""); 500 /* for indexing we want integers */ 501 addr = LLVMBuildFPToSI(bld->base.builder, addr, 502 int_vec_type, ""); 503 addr = LLVMBuildExtractElement(bld->base.builder, 504 addr, LLVMConstInt(LLVMInt32Type(), 0, 0), 505 ""); 506 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); 507 } 508 509 switch (reg->Register.File) { 510 case TGSI_FILE_CONSTANT: 511 { 512 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), 513 reg->Register.Index*4 + swizzle, 0); 514 LLVMValueRef scalar, scalar_ptr; 515 516 if (reg->Register.Indirect) { 517 /*lp_build_printf(bld->base.builder, 518 "\taddr = %d\n", addr);*/ 519 index = lp_build_add(&bld->base, index, addr); 520 } 521 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, 522 &index, 1, ""); 523 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 524 525 res = lp_build_broadcast_scalar(&bld->base, scalar); 526 } 527 break; 528 529 case TGSI_FILE_IMMEDIATE: 530 res = bld->immediates[reg->Register.Index][swizzle]; 531 assert(res); 532 break; 533 534 case TGSI_FILE_INPUT: 535 res = bld->inputs[reg->Register.Index][swizzle]; 536 assert(res); 537 break; 538 539 case TGSI_FILE_TEMPORARY: 540 { 541 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 542 swizzle, 543 reg->Register.Indirect, 544 addr); 545 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 546 if(!res) 547 return bld->base.undef; 548 } 549 break; 550 551 default: 552 assert(0 && "invalid src register in emit_fetch()"); 553 return bld->base.undef; 554 } 555 556 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 557 case TGSI_UTIL_SIGN_CLEAR: 558 res = lp_build_abs( &bld->base, res ); 559 break; 560 561 case TGSI_UTIL_SIGN_SET: 562 /* TODO: Use bitwese OR for floating point */ 563 res = lp_build_abs( &bld->base, res ); 564 res = LLVMBuildNeg( bld->base.builder, res, "" ); 565 break; 566 567 case TGSI_UTIL_SIGN_TOGGLE: 568 res = LLVMBuildNeg( bld->base.builder, res, "" ); 569 break; 570 571 case TGSI_UTIL_SIGN_KEEP: 572 break; 573 } 574 575 return res; 576} 577 578 579/** 580 * Register fetch with derivatives. 581 */ 582static void 583emit_fetch_deriv( 584 struct lp_build_tgsi_soa_context *bld, 585 const struct tgsi_full_instruction *inst, 586 unsigned index, 587 const unsigned chan_index, 588 LLVMValueRef *res, 589 LLVMValueRef *ddx, 590 LLVMValueRef *ddy) 591{ 592 LLVMValueRef src; 593 594 src = emit_fetch(bld, inst, index, chan_index); 595 596 if(res) 597 *res = src; 598 599 /* TODO: use interpolation coeffs for inputs */ 600 601 if(ddx) 602 *ddx = emit_ddx(bld, src); 603 604 if(ddy) 605 *ddy = emit_ddy(bld, src); 606} 607 608 609/** 610 * Predicate. 611 */ 612static void 613emit_fetch_predicate( 614 struct lp_build_tgsi_soa_context *bld, 615 const struct tgsi_full_instruction *inst, 616 LLVMValueRef *pred) 617{ 618 unsigned index; 619 unsigned char swizzles[4]; 620 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 621 LLVMValueRef value; 622 unsigned chan; 623 624 if (!inst->Instruction.Predicate) { 625 FOR_EACH_CHANNEL( chan ) { 626 pred[chan] = NULL; 627 } 628 return; 629 } 630 631 swizzles[0] = inst->Predicate.SwizzleX; 632 swizzles[1] = inst->Predicate.SwizzleY; 633 swizzles[2] = inst->Predicate.SwizzleZ; 634 swizzles[3] = inst->Predicate.SwizzleW; 635 636 index = inst->Predicate.Index; 637 assert(index < LP_MAX_TGSI_PREDS); 638 639 FOR_EACH_CHANNEL( chan ) { 640 unsigned swizzle = swizzles[chan]; 641 642 /* 643 * Only fetch the predicate register channels that are actually listed 644 * in the swizzles 645 */ 646 if (!unswizzled[swizzle]) { 647 value = LLVMBuildLoad(bld->base.builder, 648 bld->preds[index][swizzle], ""); 649 650 /* 651 * Convert the value to an integer mask. 652 * 653 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 654 * is needlessly causing two comparisons due to storing the intermediate 655 * result as float vector instead of an integer mask vector. 656 */ 657 value = lp_build_compare(bld->base.builder, 658 bld->base.type, 659 PIPE_FUNC_NOTEQUAL, 660 value, 661 bld->base.zero); 662 if (inst->Predicate.Negate) { 663 value = LLVMBuildNot(bld->base.builder, value, ""); 664 } 665 666 unswizzled[swizzle] = value; 667 } else { 668 value = unswizzled[swizzle]; 669 } 670 671 pred[chan] = value; 672 } 673} 674 675 676/** 677 * Register store. 678 */ 679static void 680emit_store( 681 struct lp_build_tgsi_soa_context *bld, 682 const struct tgsi_full_instruction *inst, 683 unsigned index, 684 unsigned chan_index, 685 LLVMValueRef pred, 686 LLVMValueRef value) 687{ 688 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 689 LLVMValueRef addr = NULL; 690 691 switch( inst->Instruction.Saturate ) { 692 case TGSI_SAT_NONE: 693 break; 694 695 case TGSI_SAT_ZERO_ONE: 696 value = lp_build_max(&bld->base, value, bld->base.zero); 697 value = lp_build_min(&bld->base, value, bld->base.one); 698 break; 699 700 case TGSI_SAT_MINUS_PLUS_ONE: 701 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); 702 value = lp_build_min(&bld->base, value, bld->base.one); 703 break; 704 705 default: 706 assert(0); 707 } 708 709 if (reg->Register.Indirect) { 710 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); 711 unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); 712 addr = LLVMBuildLoad(bld->base.builder, 713 bld->addr[reg->Indirect.Index][swizzle], 714 ""); 715 /* for indexing we want integers */ 716 addr = LLVMBuildFPToSI(bld->base.builder, addr, 717 int_vec_type, ""); 718 addr = LLVMBuildExtractElement(bld->base.builder, 719 addr, LLVMConstInt(LLVMInt32Type(), 0, 0), 720 ""); 721 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); 722 } 723 724 switch( reg->Register.File ) { 725 case TGSI_FILE_OUTPUT: 726 lp_exec_mask_store(&bld->exec_mask, pred, value, 727 bld->outputs[reg->Register.Index][chan_index]); 728 break; 729 730 case TGSI_FILE_TEMPORARY: { 731 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 732 chan_index, 733 reg->Register.Indirect, 734 addr); 735 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); 736 break; 737 } 738 739 case TGSI_FILE_ADDRESS: 740 lp_exec_mask_store(&bld->exec_mask, pred, value, 741 bld->addr[reg->Indirect.Index][chan_index]); 742 break; 743 744 case TGSI_FILE_PREDICATE: 745 lp_exec_mask_store(&bld->exec_mask, pred, value, 746 bld->preds[index][chan_index]); 747 break; 748 749 default: 750 assert( 0 ); 751 } 752} 753 754 755/** 756 * High-level instruction translators. 757 */ 758 759enum tex_modifier { 760 TEX_MODIFIER_NONE = 0, 761 TEX_MODIFIER_PROJECTED, 762 TEX_MODIFIER_LOD_BIAS, 763 TEX_MODIFIER_EXPLICIT_LOD, 764 TEX_MODIFIER_EXPLICIT_DERIV 765}; 766 767static void 768emit_tex( struct lp_build_tgsi_soa_context *bld, 769 const struct tgsi_full_instruction *inst, 770 enum tex_modifier modifier, 771 LLVMValueRef *texel) 772{ 773 unsigned unit; 774 LLVMValueRef lod_bias, explicit_lod; 775 LLVMValueRef oow = NULL; 776 LLVMValueRef coords[3]; 777 LLVMValueRef ddx[3]; 778 LLVMValueRef ddy[3]; 779 unsigned num_coords; 780 unsigned i; 781 782 if (!bld->sampler) { 783 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 784 for (i = 0; i < 4; i++) { 785 texel[i] = bld->base.undef; 786 } 787 return; 788 } 789 790 switch (inst->Texture.Texture) { 791 case TGSI_TEXTURE_1D: 792 num_coords = 1; 793 break; 794 case TGSI_TEXTURE_2D: 795 case TGSI_TEXTURE_RECT: 796 num_coords = 2; 797 break; 798 case TGSI_TEXTURE_SHADOW1D: 799 case TGSI_TEXTURE_SHADOW2D: 800 case TGSI_TEXTURE_SHADOWRECT: 801 case TGSI_TEXTURE_3D: 802 case TGSI_TEXTURE_CUBE: 803 num_coords = 3; 804 break; 805 default: 806 assert(0); 807 return; 808 } 809 810 if (modifier == TEX_MODIFIER_LOD_BIAS) { 811 lod_bias = emit_fetch( bld, inst, 0, 3 ); 812 explicit_lod = NULL; 813 } 814 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 815 lod_bias = NULL; 816 explicit_lod = emit_fetch( bld, inst, 0, 3 ); 817 } 818 else { 819 lod_bias = NULL; 820 explicit_lod = NULL; 821 } 822 823 if (modifier == TEX_MODIFIER_PROJECTED) { 824 oow = emit_fetch( bld, inst, 0, 3 ); 825 oow = lp_build_rcp(&bld->base, oow); 826 } 827 828 for (i = 0; i < num_coords; i++) { 829 coords[i] = emit_fetch( bld, inst, 0, i ); 830 if (modifier == TEX_MODIFIER_PROJECTED) 831 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 832 } 833 for (i = num_coords; i < 3; i++) { 834 coords[i] = bld->base.undef; 835 } 836 837 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) { 838 for (i = 0; i < num_coords; i++) { 839 ddx[i] = emit_fetch( bld, inst, 1, i ); 840 ddy[i] = emit_fetch( bld, inst, 2, i ); 841 } 842 unit = inst->Src[3].Register.Index; 843 } else { 844 for (i = 0; i < num_coords; i++) { 845 ddx[i] = emit_ddx( bld, coords[i] ); 846 ddy[i] = emit_ddy( bld, coords[i] ); 847 } 848 unit = inst->Src[1].Register.Index; 849 } 850 for (i = num_coords; i < 3; i++) { 851 ddx[i] = bld->base.undef; 852 ddy[i] = bld->base.undef; 853 } 854 855 bld->sampler->emit_fetch_texel(bld->sampler, 856 bld->base.builder, 857 bld->base.type, 858 unit, num_coords, coords, 859 ddx, ddy, 860 lod_bias, explicit_lod, 861 texel); 862} 863 864 865/** 866 * Kill fragment if any of the src register values are negative. 867 */ 868static void 869emit_kil( 870 struct lp_build_tgsi_soa_context *bld, 871 const struct tgsi_full_instruction *inst ) 872{ 873 const struct tgsi_full_src_register *reg = &inst->Src[0]; 874 LLVMValueRef terms[NUM_CHANNELS]; 875 LLVMValueRef mask; 876 unsigned chan_index; 877 878 memset(&terms, 0, sizeof terms); 879 880 FOR_EACH_CHANNEL( chan_index ) { 881 unsigned swizzle; 882 883 /* Unswizzle channel */ 884 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 885 886 /* Check if the component has not been already tested. */ 887 assert(swizzle < NUM_CHANNELS); 888 if( !terms[swizzle] ) 889 /* TODO: change the comparison operator instead of setting the sign */ 890 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 891 } 892 893 mask = NULL; 894 FOR_EACH_CHANNEL( chan_index ) { 895 if(terms[chan_index]) { 896 LLVMValueRef chan_mask; 897 898 /* 899 * If term < 0 then mask = 0 else mask = ~0. 900 */ 901 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 902 903 if(mask) 904 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); 905 else 906 mask = chan_mask; 907 } 908 } 909 910 if(mask) 911 lp_build_mask_update(bld->mask, mask); 912} 913 914 915/** 916 * Predicated fragment kill. 917 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 918 * The only predication is the execution mask which will apply if 919 * we're inside a loop or conditional. 920 */ 921static void 922emit_kilp(struct lp_build_tgsi_soa_context *bld, 923 const struct tgsi_full_instruction *inst) 924{ 925 LLVMValueRef mask; 926 927 /* For those channels which are "alive", disable fragment shader 928 * execution. 929 */ 930 if (bld->exec_mask.has_mask) { 931 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); 932 } 933 else { 934 mask = bld->base.zero; 935 } 936 937 lp_build_mask_update(bld->mask, mask); 938} 939 940static void 941emit_declaration( 942 struct lp_build_tgsi_soa_context *bld, 943 const struct tgsi_full_declaration *decl) 944{ 945 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); 946 947 unsigned first = decl->Range.First; 948 unsigned last = decl->Range.Last; 949 unsigned idx, i; 950 951 for (idx = first; idx <= last; ++idx) { 952 switch (decl->Declaration.File) { 953 case TGSI_FILE_TEMPORARY: 954 assert(idx < LP_MAX_TGSI_TEMPS); 955 if (bld->has_indirect_addressing) { 956 LLVMValueRef val = LLVMConstInt(LLVMInt32Type(), 957 last*4 + 4, 0); 958 bld->temps_array = lp_build_array_alloca(bld->base.builder, 959 vec_type, val, ""); 960 } else { 961 for (i = 0; i < NUM_CHANNELS; i++) 962 bld->temps[idx][i] = lp_build_alloca(bld->base.builder, 963 vec_type, ""); 964 } 965 break; 966 967 case TGSI_FILE_OUTPUT: 968 for (i = 0; i < NUM_CHANNELS; i++) 969 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, 970 vec_type, ""); 971 break; 972 973 case TGSI_FILE_ADDRESS: 974 assert(idx < LP_MAX_TGSI_ADDRS); 975 for (i = 0; i < NUM_CHANNELS; i++) 976 bld->addr[idx][i] = lp_build_alloca(bld->base.builder, 977 vec_type, ""); 978 break; 979 980 case TGSI_FILE_PREDICATE: 981 assert(idx < LP_MAX_TGSI_PREDS); 982 for (i = 0; i < NUM_CHANNELS; i++) 983 bld->preds[idx][i] = lp_build_alloca(bld->base.builder, 984 vec_type, ""); 985 break; 986 987 default: 988 /* don't need to declare other vars */ 989 break; 990 } 991 } 992} 993 994 995/** 996 * Emit LLVM for one TGSI instruction. 997 * \param return TRUE for success, FALSE otherwise 998 */ 999static boolean 1000emit_instruction( 1001 struct lp_build_tgsi_soa_context *bld, 1002 const struct tgsi_full_instruction *inst, 1003 const struct tgsi_opcode_info *info, 1004 int *pc) 1005{ 1006 unsigned chan_index; 1007 LLVMValueRef src0, src1, src2; 1008 LLVMValueRef tmp0, tmp1, tmp2; 1009 LLVMValueRef tmp3 = NULL; 1010 LLVMValueRef tmp4 = NULL; 1011 LLVMValueRef tmp5 = NULL; 1012 LLVMValueRef tmp6 = NULL; 1013 LLVMValueRef tmp7 = NULL; 1014 LLVMValueRef res; 1015 LLVMValueRef dst0[NUM_CHANNELS]; 1016 1017 /* 1018 * Stores and write masks are handled in a general fashion after the long 1019 * instruction opcode switch statement. 1020 * 1021 * Although not stricitly necessary, we avoid generating instructions for 1022 * channels which won't be stored, in cases where's that easy. For some 1023 * complex instructions, like texture sampling, it is more convenient to 1024 * assume a full writemask and then let LLVM optimization passes eliminate 1025 * redundant code. 1026 */ 1027 1028 (*pc)++; 1029 1030 assert(info->num_dst <= 1); 1031 if (info->num_dst) { 1032 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1033 dst0[chan_index] = bld->base.undef; 1034 } 1035 } 1036 1037 switch (inst->Instruction.Opcode) { 1038 case TGSI_OPCODE_ARL: 1039 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1040 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1041 tmp0 = lp_build_floor(&bld->base, tmp0); 1042 dst0[chan_index] = tmp0; 1043 } 1044 break; 1045 1046 case TGSI_OPCODE_MOV: 1047 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1048 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 1049 } 1050 break; 1051 1052 case TGSI_OPCODE_LIT: 1053 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 1054 dst0[CHAN_X] = bld->base.one; 1055 } 1056 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1057 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1058 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 1059 } 1060 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1061 /* XMM[1] = SrcReg[0].yyyy */ 1062 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1063 /* XMM[1] = max(XMM[1], 0) */ 1064 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 1065 /* XMM[2] = SrcReg[0].wwww */ 1066 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 1067 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 1068 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1069 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 1070 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 1071 } 1072 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 1073 dst0[CHAN_W] = bld->base.one; 1074 } 1075 break; 1076 1077 case TGSI_OPCODE_RCP: 1078 /* TGSI_OPCODE_RECIP */ 1079 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1080 res = lp_build_rcp(&bld->base, src0); 1081 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1082 dst0[chan_index] = res; 1083 } 1084 break; 1085 1086 case TGSI_OPCODE_RSQ: 1087 /* TGSI_OPCODE_RECIPSQRT */ 1088 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1089 src0 = lp_build_abs(&bld->base, src0); 1090 res = lp_build_rsqrt(&bld->base, src0); 1091 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1092 dst0[chan_index] = res; 1093 } 1094 break; 1095 1096 case TGSI_OPCODE_EXP: 1097 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1098 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1099 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1100 LLVMValueRef *p_exp2_int_part = NULL; 1101 LLVMValueRef *p_frac_part = NULL; 1102 LLVMValueRef *p_exp2 = NULL; 1103 1104 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1105 1106 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1107 p_exp2_int_part = &tmp0; 1108 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1109 p_frac_part = &tmp1; 1110 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1111 p_exp2 = &tmp2; 1112 1113 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 1114 1115 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1116 dst0[CHAN_X] = tmp0; 1117 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1118 dst0[CHAN_Y] = tmp1; 1119 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1120 dst0[CHAN_Z] = tmp2; 1121 } 1122 /* dst.w = 1.0 */ 1123 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1124 dst0[CHAN_W] = bld->base.one; 1125 } 1126 break; 1127 1128 case TGSI_OPCODE_LOG: 1129 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1130 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1131 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1132 LLVMValueRef *p_floor_log2 = NULL; 1133 LLVMValueRef *p_exp = NULL; 1134 LLVMValueRef *p_log2 = NULL; 1135 1136 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1137 src0 = lp_build_abs( &bld->base, src0 ); 1138 1139 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1140 p_floor_log2 = &tmp0; 1141 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1142 p_exp = &tmp1; 1143 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1144 p_log2 = &tmp2; 1145 1146 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 1147 1148 /* dst.x = floor(lg2(abs(src.x))) */ 1149 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1150 dst0[CHAN_X] = tmp0; 1151 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 1152 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 1153 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 1154 } 1155 /* dst.z = lg2(abs(src.x)) */ 1156 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1157 dst0[CHAN_Z] = tmp2; 1158 } 1159 /* dst.w = 1.0 */ 1160 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1161 dst0[CHAN_W] = bld->base.one; 1162 } 1163 break; 1164 1165 case TGSI_OPCODE_MUL: 1166 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1167 src0 = emit_fetch( bld, inst, 0, chan_index ); 1168 src1 = emit_fetch( bld, inst, 1, chan_index ); 1169 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 1170 } 1171 break; 1172 1173 case TGSI_OPCODE_ADD: 1174 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1175 src0 = emit_fetch( bld, inst, 0, chan_index ); 1176 src1 = emit_fetch( bld, inst, 1, chan_index ); 1177 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 1178 } 1179 break; 1180 1181 case TGSI_OPCODE_DP3: 1182 /* TGSI_OPCODE_DOT3 */ 1183 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1184 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1185 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1186 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1187 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1188 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1189 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1190 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1191 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1192 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1193 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1194 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1195 dst0[chan_index] = tmp0; 1196 } 1197 break; 1198 1199 case TGSI_OPCODE_DP4: 1200 /* TGSI_OPCODE_DOT4 */ 1201 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1202 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1203 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1204 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1205 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1206 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1207 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1208 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1209 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1210 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1211 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1212 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1213 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1214 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1215 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1216 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1217 dst0[chan_index] = tmp0; 1218 } 1219 break; 1220 1221 case TGSI_OPCODE_DST: 1222 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1223 dst0[CHAN_X] = bld->base.one; 1224 } 1225 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1226 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1227 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1228 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1229 } 1230 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1231 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1232 } 1233 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1234 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1235 } 1236 break; 1237 1238 case TGSI_OPCODE_MIN: 1239 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1240 src0 = emit_fetch( bld, inst, 0, chan_index ); 1241 src1 = emit_fetch( bld, inst, 1, chan_index ); 1242 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1243 } 1244 break; 1245 1246 case TGSI_OPCODE_MAX: 1247 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1248 src0 = emit_fetch( bld, inst, 0, chan_index ); 1249 src1 = emit_fetch( bld, inst, 1, chan_index ); 1250 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1251 } 1252 break; 1253 1254 case TGSI_OPCODE_SLT: 1255 /* TGSI_OPCODE_SETLT */ 1256 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1257 src0 = emit_fetch( bld, inst, 0, chan_index ); 1258 src1 = emit_fetch( bld, inst, 1, chan_index ); 1259 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1260 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1261 } 1262 break; 1263 1264 case TGSI_OPCODE_SGE: 1265 /* TGSI_OPCODE_SETGE */ 1266 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1267 src0 = emit_fetch( bld, inst, 0, chan_index ); 1268 src1 = emit_fetch( bld, inst, 1, chan_index ); 1269 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1270 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1271 } 1272 break; 1273 1274 case TGSI_OPCODE_MAD: 1275 /* TGSI_OPCODE_MADD */ 1276 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1277 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1278 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1279 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1280 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1281 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1282 dst0[chan_index] = tmp0; 1283 } 1284 break; 1285 1286 case TGSI_OPCODE_SUB: 1287 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1288 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1289 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1290 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1291 } 1292 break; 1293 1294 case TGSI_OPCODE_LRP: 1295 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1296 src0 = emit_fetch( bld, inst, 0, chan_index ); 1297 src1 = emit_fetch( bld, inst, 1, chan_index ); 1298 src2 = emit_fetch( bld, inst, 2, chan_index ); 1299 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1300 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1301 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1302 } 1303 break; 1304 1305 case TGSI_OPCODE_CND: 1306 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1307 src0 = emit_fetch( bld, inst, 0, chan_index ); 1308 src1 = emit_fetch( bld, inst, 1, chan_index ); 1309 src2 = emit_fetch( bld, inst, 2, chan_index ); 1310 tmp1 = lp_build_const_vec(bld->base.type, 0.5); 1311 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1312 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1313 } 1314 break; 1315 1316 case TGSI_OPCODE_DP2A: 1317 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1318 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1319 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1320 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1321 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1322 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1323 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1324 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1325 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1326 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1327 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1328 } 1329 break; 1330 1331 case TGSI_OPCODE_FRC: 1332 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1333 src0 = emit_fetch( bld, inst, 0, chan_index ); 1334 tmp0 = lp_build_floor(&bld->base, src0); 1335 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1336 dst0[chan_index] = tmp0; 1337 } 1338 break; 1339 1340 case TGSI_OPCODE_CLAMP: 1341 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1342 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1343 src1 = emit_fetch( bld, inst, 1, chan_index ); 1344 src2 = emit_fetch( bld, inst, 2, chan_index ); 1345 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1346 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1347 dst0[chan_index] = tmp0; 1348 } 1349 break; 1350 1351 case TGSI_OPCODE_FLR: 1352 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1353 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1354 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1355 } 1356 break; 1357 1358 case TGSI_OPCODE_ROUND: 1359 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1360 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1361 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1362 } 1363 break; 1364 1365 case TGSI_OPCODE_EX2: { 1366 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1367 tmp0 = lp_build_exp2( &bld->base, tmp0); 1368 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1369 dst0[chan_index] = tmp0; 1370 } 1371 break; 1372 } 1373 1374 case TGSI_OPCODE_LG2: 1375 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1376 tmp0 = lp_build_log2( &bld->base, tmp0); 1377 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1378 dst0[chan_index] = tmp0; 1379 } 1380 break; 1381 1382 case TGSI_OPCODE_POW: 1383 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1384 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1385 res = lp_build_pow( &bld->base, src0, src1 ); 1386 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1387 dst0[chan_index] = res; 1388 } 1389 break; 1390 1391 case TGSI_OPCODE_XPD: 1392 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1393 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1394 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1395 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1396 } 1397 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1398 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1399 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1400 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1401 } 1402 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1403 tmp2 = tmp0; 1404 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1405 tmp5 = tmp3; 1406 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1407 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1408 dst0[CHAN_X] = tmp2; 1409 } 1410 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1411 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1412 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1413 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1414 } 1415 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1416 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1417 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1418 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1419 dst0[CHAN_Y] = tmp3; 1420 } 1421 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1422 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1423 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1424 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1425 dst0[CHAN_Z] = tmp5; 1426 } 1427 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1428 dst0[CHAN_W] = bld->base.one; 1429 } 1430 break; 1431 1432 case TGSI_OPCODE_ABS: 1433 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1434 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1435 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1436 } 1437 break; 1438 1439 case TGSI_OPCODE_RCC: 1440 /* deprecated? */ 1441 assert(0); 1442 return FALSE; 1443 1444 case TGSI_OPCODE_DPH: 1445 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1446 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1447 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1448 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1449 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1450 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1451 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1452 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1453 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1454 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1455 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1456 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1457 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1458 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1459 dst0[chan_index] = tmp0; 1460 } 1461 break; 1462 1463 case TGSI_OPCODE_COS: 1464 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1465 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1466 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1467 dst0[chan_index] = tmp0; 1468 } 1469 break; 1470 1471 case TGSI_OPCODE_DDX: 1472 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1473 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1474 } 1475 break; 1476 1477 case TGSI_OPCODE_DDY: 1478 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1479 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1480 } 1481 break; 1482 1483 case TGSI_OPCODE_KILP: 1484 /* predicated kill */ 1485 emit_kilp( bld, inst ); 1486 break; 1487 1488 case TGSI_OPCODE_KIL: 1489 /* conditional kill */ 1490 emit_kil( bld, inst ); 1491 break; 1492 1493 case TGSI_OPCODE_PK2H: 1494 return FALSE; 1495 break; 1496 1497 case TGSI_OPCODE_PK2US: 1498 return FALSE; 1499 break; 1500 1501 case TGSI_OPCODE_PK4B: 1502 return FALSE; 1503 break; 1504 1505 case TGSI_OPCODE_PK4UB: 1506 return FALSE; 1507 break; 1508 1509 case TGSI_OPCODE_RFL: 1510 return FALSE; 1511 break; 1512 1513 case TGSI_OPCODE_SEQ: 1514 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1515 src0 = emit_fetch( bld, inst, 0, chan_index ); 1516 src1 = emit_fetch( bld, inst, 1, chan_index ); 1517 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1518 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1519 } 1520 break; 1521 1522 case TGSI_OPCODE_SFL: 1523 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1524 dst0[chan_index] = bld->base.zero; 1525 } 1526 break; 1527 1528 case TGSI_OPCODE_SGT: 1529 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1530 src0 = emit_fetch( bld, inst, 0, chan_index ); 1531 src1 = emit_fetch( bld, inst, 1, chan_index ); 1532 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1533 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1534 } 1535 break; 1536 1537 case TGSI_OPCODE_SIN: 1538 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1539 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1540 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1541 dst0[chan_index] = tmp0; 1542 } 1543 break; 1544 1545 case TGSI_OPCODE_SLE: 1546 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1547 src0 = emit_fetch( bld, inst, 0, chan_index ); 1548 src1 = emit_fetch( bld, inst, 1, chan_index ); 1549 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1550 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1551 } 1552 break; 1553 1554 case TGSI_OPCODE_SNE: 1555 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1556 src0 = emit_fetch( bld, inst, 0, chan_index ); 1557 src1 = emit_fetch( bld, inst, 1, chan_index ); 1558 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1559 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1560 } 1561 break; 1562 1563 case TGSI_OPCODE_STR: 1564 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1565 dst0[chan_index] = bld->base.one; 1566 } 1567 break; 1568 1569 case TGSI_OPCODE_TEX: 1570 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 ); 1571 break; 1572 1573 case TGSI_OPCODE_TXD: 1574 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1575 break; 1576 1577 case TGSI_OPCODE_UP2H: 1578 /* deprecated */ 1579 assert (0); 1580 return FALSE; 1581 break; 1582 1583 case TGSI_OPCODE_UP2US: 1584 /* deprecated */ 1585 assert(0); 1586 return FALSE; 1587 break; 1588 1589 case TGSI_OPCODE_UP4B: 1590 /* deprecated */ 1591 assert(0); 1592 return FALSE; 1593 break; 1594 1595 case TGSI_OPCODE_UP4UB: 1596 /* deprecated */ 1597 assert(0); 1598 return FALSE; 1599 break; 1600 1601 case TGSI_OPCODE_X2D: 1602 /* deprecated? */ 1603 assert(0); 1604 return FALSE; 1605 break; 1606 1607 case TGSI_OPCODE_ARA: 1608 /* deprecated */ 1609 assert(0); 1610 return FALSE; 1611 break; 1612 1613 case TGSI_OPCODE_ARR: 1614 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1615 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1616 tmp0 = lp_build_round(&bld->base, tmp0); 1617 dst0[chan_index] = tmp0; 1618 } 1619 break; 1620 1621 case TGSI_OPCODE_BRA: 1622 /* deprecated */ 1623 assert(0); 1624 return FALSE; 1625 break; 1626 1627 case TGSI_OPCODE_CAL: 1628 lp_exec_mask_call(&bld->exec_mask, 1629 inst->Label.Label, 1630 pc); 1631 1632 break; 1633 1634 case TGSI_OPCODE_RET: 1635 lp_exec_mask_ret(&bld->exec_mask, pc); 1636 break; 1637 1638 case TGSI_OPCODE_END: 1639 *pc = -1; 1640 break; 1641 1642 case TGSI_OPCODE_SSG: 1643 /* TGSI_OPCODE_SGN */ 1644 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1645 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1646 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 1647 } 1648 break; 1649 1650 case TGSI_OPCODE_CMP: 1651 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1652 src0 = emit_fetch( bld, inst, 0, chan_index ); 1653 src1 = emit_fetch( bld, inst, 1, chan_index ); 1654 src2 = emit_fetch( bld, inst, 2, chan_index ); 1655 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 1656 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 1657 } 1658 break; 1659 1660 case TGSI_OPCODE_SCS: 1661 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1662 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1663 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 1664 } 1665 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1666 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1667 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 1668 } 1669 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1670 dst0[CHAN_Z] = bld->base.zero; 1671 } 1672 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1673 dst0[CHAN_W] = bld->base.one; 1674 } 1675 break; 1676 1677 case TGSI_OPCODE_TXB: 1678 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 ); 1679 break; 1680 1681 case TGSI_OPCODE_NRM: 1682 /* fall-through */ 1683 case TGSI_OPCODE_NRM4: 1684 /* 3 or 4-component normalization */ 1685 { 1686 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1687 1688 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 1689 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 1690 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 1691 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 1692 1693 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1694 1695 /* xmm4 = src.x */ 1696 /* xmm0 = src.x * src.x */ 1697 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1698 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1699 tmp4 = tmp0; 1700 } 1701 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 1702 1703 /* xmm5 = src.y */ 1704 /* xmm0 = xmm0 + src.y * src.y */ 1705 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 1706 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1707 tmp5 = tmp1; 1708 } 1709 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1710 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1711 1712 /* xmm6 = src.z */ 1713 /* xmm0 = xmm0 + src.z * src.z */ 1714 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 1715 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1716 tmp6 = tmp1; 1717 } 1718 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1719 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1720 1721 if (dims == 4) { 1722 /* xmm7 = src.w */ 1723 /* xmm0 = xmm0 + src.w * src.w */ 1724 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 1725 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 1726 tmp7 = tmp1; 1727 } 1728 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1729 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1730 } 1731 1732 /* xmm1 = 1 / sqrt(xmm0) */ 1733 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 1734 1735 /* dst.x = xmm1 * src.x */ 1736 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1737 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 1738 } 1739 1740 /* dst.y = xmm1 * src.y */ 1741 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1742 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 1743 } 1744 1745 /* dst.z = xmm1 * src.z */ 1746 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1747 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 1748 } 1749 1750 /* dst.w = xmm1 * src.w */ 1751 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 1752 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 1753 } 1754 } 1755 1756 /* dst.w = 1.0 */ 1757 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 1758 dst0[CHAN_W] = bld->base.one; 1759 } 1760 } 1761 break; 1762 1763 case TGSI_OPCODE_DIV: 1764 /* deprecated */ 1765 assert( 0 ); 1766 return FALSE; 1767 break; 1768 1769 case TGSI_OPCODE_DP2: 1770 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1771 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1772 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1773 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1774 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1775 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1776 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1777 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1778 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1779 } 1780 break; 1781 1782 case TGSI_OPCODE_TXL: 1783 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 1784 break; 1785 1786 case TGSI_OPCODE_TXP: 1787 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 ); 1788 break; 1789 1790 case TGSI_OPCODE_BRK: 1791 lp_exec_break(&bld->exec_mask); 1792 break; 1793 1794 case TGSI_OPCODE_IF: 1795 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1796 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 1797 tmp0, bld->base.zero); 1798 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 1799 break; 1800 1801 case TGSI_OPCODE_BGNLOOP: 1802 lp_exec_bgnloop(&bld->exec_mask); 1803 break; 1804 1805 case TGSI_OPCODE_BGNSUB: 1806 lp_exec_mask_bgnsub(&bld->exec_mask); 1807 break; 1808 1809 case TGSI_OPCODE_ELSE: 1810 lp_exec_mask_cond_invert(&bld->exec_mask); 1811 break; 1812 1813 case TGSI_OPCODE_ENDIF: 1814 lp_exec_mask_cond_pop(&bld->exec_mask); 1815 break; 1816 1817 case TGSI_OPCODE_ENDLOOP: 1818 lp_exec_endloop(&bld->exec_mask); 1819 break; 1820 1821 case TGSI_OPCODE_ENDSUB: 1822 lp_exec_mask_endsub(&bld->exec_mask, pc); 1823 break; 1824 1825 case TGSI_OPCODE_PUSHA: 1826 /* deprecated? */ 1827 assert(0); 1828 return FALSE; 1829 break; 1830 1831 case TGSI_OPCODE_POPA: 1832 /* deprecated? */ 1833 assert(0); 1834 return FALSE; 1835 break; 1836 1837 case TGSI_OPCODE_CEIL: 1838 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1839 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1840 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 1841 } 1842 break; 1843 1844 case TGSI_OPCODE_I2F: 1845 /* deprecated? */ 1846 assert(0); 1847 return FALSE; 1848 break; 1849 1850 case TGSI_OPCODE_NOT: 1851 /* deprecated? */ 1852 assert(0); 1853 return FALSE; 1854 break; 1855 1856 case TGSI_OPCODE_TRUNC: 1857 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1858 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1859 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 1860 } 1861 break; 1862 1863 case TGSI_OPCODE_SHL: 1864 /* deprecated? */ 1865 assert(0); 1866 return FALSE; 1867 break; 1868 1869 case TGSI_OPCODE_ISHR: 1870 /* deprecated? */ 1871 assert(0); 1872 return FALSE; 1873 break; 1874 1875 case TGSI_OPCODE_AND: 1876 /* deprecated? */ 1877 assert(0); 1878 return FALSE; 1879 break; 1880 1881 case TGSI_OPCODE_OR: 1882 /* deprecated? */ 1883 assert(0); 1884 return FALSE; 1885 break; 1886 1887 case TGSI_OPCODE_MOD: 1888 /* deprecated? */ 1889 assert(0); 1890 return FALSE; 1891 break; 1892 1893 case TGSI_OPCODE_XOR: 1894 /* deprecated? */ 1895 assert(0); 1896 return FALSE; 1897 break; 1898 1899 case TGSI_OPCODE_SAD: 1900 /* deprecated? */ 1901 assert(0); 1902 return FALSE; 1903 break; 1904 1905 case TGSI_OPCODE_TXF: 1906 /* deprecated? */ 1907 assert(0); 1908 return FALSE; 1909 break; 1910 1911 case TGSI_OPCODE_TXQ: 1912 /* deprecated? */ 1913 assert(0); 1914 return FALSE; 1915 break; 1916 1917 case TGSI_OPCODE_CONT: 1918 lp_exec_continue(&bld->exec_mask); 1919 break; 1920 1921 case TGSI_OPCODE_EMIT: 1922 return FALSE; 1923 break; 1924 1925 case TGSI_OPCODE_ENDPRIM: 1926 return FALSE; 1927 break; 1928 1929 case TGSI_OPCODE_NOP: 1930 break; 1931 1932 default: 1933 return FALSE; 1934 } 1935 1936 if(info->num_dst) { 1937 LLVMValueRef pred[NUM_CHANNELS]; 1938 1939 emit_fetch_predicate( bld, inst, pred ); 1940 1941 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1942 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); 1943 } 1944 } 1945 1946 return TRUE; 1947} 1948 1949 1950void 1951lp_build_tgsi_soa(LLVMBuilderRef builder, 1952 const struct tgsi_token *tokens, 1953 struct lp_type type, 1954 struct lp_build_mask_context *mask, 1955 LLVMValueRef consts_ptr, 1956 const LLVMValueRef *pos, 1957 const LLVMValueRef (*inputs)[NUM_CHANNELS], 1958 LLVMValueRef (*outputs)[NUM_CHANNELS], 1959 struct lp_build_sampler_soa *sampler, 1960 const struct tgsi_shader_info *info) 1961{ 1962 struct lp_build_tgsi_soa_context bld; 1963 struct tgsi_parse_context parse; 1964 uint num_immediates = 0; 1965 uint num_instructions = 0; 1966 unsigned i; 1967 int pc = 0; 1968 1969 /* Setup build context */ 1970 memset(&bld, 0, sizeof bld); 1971 lp_build_context_init(&bld.base, builder, type); 1972 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type)); 1973 bld.mask = mask; 1974 bld.pos = pos; 1975 bld.inputs = inputs; 1976 bld.outputs = outputs; 1977 bld.consts_ptr = consts_ptr; 1978 bld.sampler = sampler; 1979 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 || 1980 info->opcode_count[TGSI_OPCODE_ARL] > 0; 1981 bld.instructions = (struct tgsi_full_instruction *) 1982 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); 1983 bld.max_instructions = LP_MAX_INSTRUCTIONS; 1984 1985 if (!bld.instructions) { 1986 return; 1987 } 1988 1989 lp_exec_mask_init(&bld.exec_mask, &bld.base); 1990 1991 tgsi_parse_init( &parse, tokens ); 1992 1993 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1994 tgsi_parse_token( &parse ); 1995 1996 switch( parse.FullToken.Token.Type ) { 1997 case TGSI_TOKEN_TYPE_DECLARATION: 1998 /* Inputs already interpolated */ 1999 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 2000 break; 2001 2002 case TGSI_TOKEN_TYPE_INSTRUCTION: 2003 { 2004 /* save expanded instruction */ 2005 if (num_instructions == bld.max_instructions) { 2006 bld.instructions = REALLOC(bld.instructions, 2007 bld.max_instructions 2008 * sizeof(struct tgsi_full_instruction), 2009 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 2010 * sizeof(struct tgsi_full_instruction)); 2011 bld.max_instructions += LP_MAX_INSTRUCTIONS; 2012 } 2013 2014 memcpy(bld.instructions + num_instructions, 2015 &parse.FullToken.FullInstruction, 2016 sizeof(bld.instructions[0])); 2017 2018 num_instructions++; 2019 } 2020 2021 break; 2022 2023 case TGSI_TOKEN_TYPE_IMMEDIATE: 2024 /* simply copy the immediate values into the next immediates[] slot */ 2025 { 2026 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 2027 assert(size <= 4); 2028 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 2029 for( i = 0; i < size; ++i ) 2030 bld.immediates[num_immediates][i] = 2031 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); 2032 for( i = size; i < 4; ++i ) 2033 bld.immediates[num_immediates][i] = bld.base.undef; 2034 num_immediates++; 2035 } 2036 break; 2037 2038 case TGSI_TOKEN_TYPE_PROPERTY: 2039 break; 2040 2041 default: 2042 assert( 0 ); 2043 } 2044 } 2045 2046 while (pc != -1) { 2047 struct tgsi_full_instruction *instr = bld.instructions + pc; 2048 const struct tgsi_opcode_info *opcode_info = 2049 tgsi_get_opcode_info(instr->Instruction.Opcode); 2050 if (!emit_instruction( &bld, instr, opcode_info, &pc )) 2051 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 2052 opcode_info->mnemonic); 2053 } 2054 2055 if (0) { 2056 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); 2057 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2058 debug_printf("11111111111111111111111111111 \n"); 2059 tgsi_dump(tokens, 0); 2060 lp_debug_dump_value(function); 2061 debug_printf("2222222222222222222222222222 \n"); 2062 } 2063 tgsi_parse_free( &parse ); 2064 2065 if (0) { 2066 LLVMModuleRef module = LLVMGetGlobalParent( 2067 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); 2068 LLVMDumpModule(module); 2069 2070 } 2071 2072 FREE( bld.instructions ); 2073} 2074 2075