lp_bld_tgsi_soa.c revision 6c8c88f02f0dc9cf39ce51d068525a94fccd5dc7
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_exec.h" 49#include "tgsi/tgsi_scan.h" 50#include "lp_bld_type.h" 51#include "lp_bld_const.h" 52#include "lp_bld_arit.h" 53#include "lp_bld_logic.h" 54#include "lp_bld_swizzle.h" 55#include "lp_bld_flow.h" 56#include "lp_bld_tgsi.h" 57#include "lp_bld_limits.h" 58#include "lp_bld_debug.h" 59 60 61#define FOR_EACH_CHANNEL( CHAN )\ 62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 63 64#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 66 67#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 69 70#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 71 FOR_EACH_CHANNEL( CHAN )\ 72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 73 74#define CHAN_X 0 75#define CHAN_Y 1 76#define CHAN_Z 2 77#define CHAN_W 3 78 79#define QUAD_TOP_LEFT 0 80#define QUAD_TOP_RIGHT 1 81#define QUAD_BOTTOM_LEFT 2 82#define QUAD_BOTTOM_RIGHT 3 83 84 85struct lp_exec_mask { 86 struct lp_build_context *bld; 87 88 boolean has_mask; 89 90 LLVMTypeRef int_vec_type; 91 92 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 93 int cond_stack_size; 94 LLVMValueRef cond_mask; 95 96 LLVMValueRef break_stack[LP_MAX_TGSI_NESTING]; 97 int break_stack_size; 98 LLVMValueRef break_mask; 99 100 LLVMValueRef cont_stack[LP_MAX_TGSI_NESTING]; 101 int cont_stack_size; 102 LLVMValueRef cont_mask; 103 104 LLVMBasicBlockRef loop_stack[LP_MAX_TGSI_NESTING]; 105 int loop_stack_size; 106 LLVMBasicBlockRef loop_block; 107 108 109 LLVMValueRef exec_mask; 110}; 111 112struct lp_build_tgsi_soa_context 113{ 114 struct lp_build_context base; 115 116 LLVMValueRef consts_ptr; 117 const LLVMValueRef *pos; 118 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 119 LLVMValueRef (*outputs)[NUM_CHANNELS]; 120 121 struct lp_build_sampler_soa *sampler; 122 123 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 124 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 125 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 126 127 /* we allocate an array of temps if we have indirect 128 * addressing and then the temps above is unused */ 129 LLVMValueRef temps_array; 130 boolean has_indirect_addressing; 131 132 struct lp_build_mask_context *mask; 133 struct lp_exec_mask exec_mask; 134}; 135 136static const unsigned char 137swizzle_left[4] = { 138 QUAD_TOP_LEFT, QUAD_TOP_LEFT, 139 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT 140}; 141 142static const unsigned char 143swizzle_right[4] = { 144 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, 145 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT 146}; 147 148static const unsigned char 149swizzle_top[4] = { 150 QUAD_TOP_LEFT, QUAD_TOP_RIGHT, 151 QUAD_TOP_LEFT, QUAD_TOP_RIGHT 152}; 153 154static const unsigned char 155swizzle_bottom[4] = { 156 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, 157 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT 158}; 159 160static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 161{ 162 mask->bld = bld; 163 mask->has_mask = FALSE; 164 mask->cond_stack_size = 0; 165 mask->loop_stack_size = 0; 166 mask->break_stack_size = 0; 167 mask->cont_stack_size = 0; 168 169 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); 170} 171 172static void lp_exec_mask_update(struct lp_exec_mask *mask) 173{ 174 if (mask->loop_stack_size) { 175 /*for loops we need to update the entire mask at runtime */ 176 LLVMValueRef tmp; 177 assert(mask->break_mask); 178 tmp = LLVMBuildAnd(mask->bld->builder, 179 mask->cont_mask, 180 mask->break_mask, 181 "maskcb"); 182 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 183 mask->cond_mask, 184 tmp, 185 "maskfull"); 186 } else 187 mask->exec_mask = mask->cond_mask; 188 189 190 mask->has_mask = (mask->cond_stack_size > 0 || 191 mask->loop_stack_size > 0); 192} 193 194static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 195 LLVMValueRef val) 196{ 197 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 198 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 199 mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val, 200 mask->int_vec_type, ""); 201 202 lp_exec_mask_update(mask); 203} 204 205static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 206{ 207 LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 208 LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder, 209 mask->cond_mask, ""); 210 211 /* means that we didn't have any mask before and that 212 * we were fully enabled */ 213 if (mask->cond_stack_size <= 1) { 214 prev_mask = LLVMConstAllOnes(mask->int_vec_type); 215 } 216 217 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 218 inv_mask, 219 prev_mask, ""); 220 lp_exec_mask_update(mask); 221} 222 223static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 224{ 225 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 226 lp_exec_mask_update(mask); 227} 228 229static void lp_exec_bgnloop(struct lp_exec_mask *mask) 230{ 231 232 if (mask->cont_stack_size == 0) 233 mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type); 234 if (mask->break_stack_size == 0) 235 mask->break_mask = LLVMConstAllOnes(mask->int_vec_type); 236 if (mask->cond_stack_size == 0) 237 mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); 238 239 assert(mask->break_stack_size < LP_MAX_TGSI_NESTING); 240 assert(mask->cont_stack_size < LP_MAX_TGSI_NESTING); 241 assert(mask->break_stack_size < LP_MAX_TGSI_NESTING); 242 243 mask->break_stack[mask->break_stack_size++] = mask->break_mask; 244 mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; 245 mask->loop_stack[mask->loop_stack_size++] = mask->loop_block; 246 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); 247 LLVMBuildBr(mask->bld->builder, mask->loop_block); 248 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); 249 250 lp_exec_mask_update(mask); 251} 252 253static void lp_exec_break(struct lp_exec_mask *mask) 254{ 255 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 256 mask->exec_mask, 257 "break"); 258 259 mask->break_mask = LLVMBuildAnd(mask->bld->builder, 260 mask->break_mask, 261 exec_mask, "break_full"); 262 263 lp_exec_mask_update(mask); 264} 265 266static void lp_exec_continue(struct lp_exec_mask *mask) 267{ 268 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 269 mask->exec_mask, 270 ""); 271 272 mask->cont_mask = LLVMBuildAnd(mask->bld->builder, 273 mask->cont_mask, 274 exec_mask, ""); 275 276 lp_exec_mask_update(mask); 277} 278 279 280static void lp_exec_endloop(struct lp_exec_mask *mask) 281{ 282 LLVMBasicBlockRef endloop; 283 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* 284 mask->bld->type.length); 285 LLVMValueRef i1cond; 286 287 assert(mask->break_mask); 288 289 /* i1cond = (mask == 0) */ 290 i1cond = LLVMBuildICmp( 291 mask->bld->builder, 292 LLVMIntNE, 293 LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""), 294 LLVMConstNull(reg_type), ""); 295 296 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); 297 298 LLVMBuildCondBr(mask->bld->builder, 299 i1cond, mask->loop_block, endloop); 300 301 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); 302 303 mask->loop_block = mask->loop_stack[--mask->loop_stack_size]; 304 /* pop the cont mask */ 305 if (mask->cont_stack_size) { 306 mask->cont_mask = mask->cont_stack[--mask->cont_stack_size]; 307 } 308 /* pop the break mask */ 309 if (mask->break_stack_size) { 310 mask->break_mask = mask->break_stack[--mask->break_stack_size]; 311 } 312 313 lp_exec_mask_update(mask); 314} 315 316/* stores val into an address pointed to by dst. 317 * mask->exec_mask is used to figure out which bits of val 318 * should be stored into the address 319 * (0 means don't store this bit, 1 means do store). 320 */ 321static void lp_exec_mask_store(struct lp_exec_mask *mask, 322 LLVMValueRef val, 323 LLVMValueRef dst) 324{ 325 if (mask->has_mask) { 326 LLVMValueRef real_val, dst_val; 327 328 dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); 329 real_val = lp_build_select(mask->bld, 330 mask->exec_mask, 331 val, dst_val); 332 333 LLVMBuildStore(mask->bld->builder, real_val, dst); 334 } else 335 LLVMBuildStore(mask->bld->builder, val, dst); 336} 337 338 339static LLVMValueRef 340emit_ddx(struct lp_build_tgsi_soa_context *bld, 341 LLVMValueRef src) 342{ 343 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); 344 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); 345 return lp_build_sub(&bld->base, src_right, src_left); 346} 347 348 349static LLVMValueRef 350emit_ddy(struct lp_build_tgsi_soa_context *bld, 351 LLVMValueRef src) 352{ 353 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); 354 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); 355 return lp_build_sub(&bld->base, src_top, src_bottom); 356} 357 358static LLVMValueRef 359get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 360 unsigned index, 361 unsigned swizzle, 362 boolean is_indirect, 363 LLVMValueRef addr) 364{ 365 if (!bld->has_indirect_addressing) { 366 return bld->temps[index][swizzle]; 367 } else { 368 LLVMValueRef lindex = 369 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0); 370 if (is_indirect) 371 lindex = lp_build_add(&bld->base, lindex, addr); 372 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); 373 } 374} 375 376/** 377 * Register fetch. 378 */ 379static LLVMValueRef 380emit_fetch( 381 struct lp_build_tgsi_soa_context *bld, 382 const struct tgsi_full_instruction *inst, 383 unsigned index, 384 const unsigned chan_index ) 385{ 386 const struct tgsi_full_src_register *reg = &inst->Src[index]; 387 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 388 LLVMValueRef res; 389 LLVMValueRef addr; 390 391 switch (swizzle) { 392 case TGSI_SWIZZLE_X: 393 case TGSI_SWIZZLE_Y: 394 case TGSI_SWIZZLE_Z: 395 case TGSI_SWIZZLE_W: 396 397 if (reg->Register.Indirect) { 398 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); 399 unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); 400 addr = LLVMBuildLoad(bld->base.builder, 401 bld->addr[reg->Indirect.Index][swizzle], 402 ""); 403 /* for indexing we want integers */ 404 addr = LLVMBuildFPToSI(bld->base.builder, addr, 405 int_vec_type, ""); 406 addr = LLVMBuildExtractElement(bld->base.builder, 407 addr, LLVMConstInt(LLVMInt32Type(), 0, 0), 408 ""); 409 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); 410 } 411 412 switch (reg->Register.File) { 413 case TGSI_FILE_CONSTANT: { 414 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); 415 LLVMValueRef scalar, scalar_ptr; 416 417 if (reg->Register.Indirect) { 418 /*lp_build_printf(bld->base.builder, 419 "\taddr = %d\n", addr);*/ 420 index = lp_build_add(&bld->base, index, addr); 421 } 422 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); 423 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 424 425 res = lp_build_broadcast_scalar(&bld->base, scalar); 426 break; 427 } 428 429 case TGSI_FILE_IMMEDIATE: 430 res = bld->immediates[reg->Register.Index][swizzle]; 431 assert(res); 432 break; 433 434 case TGSI_FILE_INPUT: 435 res = bld->inputs[reg->Register.Index][swizzle]; 436 assert(res); 437 break; 438 439 case TGSI_FILE_TEMPORARY: { 440 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 441 swizzle, 442 reg->Register.Indirect, 443 addr); 444 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 445 if(!res) 446 return bld->base.undef; 447 break; 448 } 449 450 default: 451 assert( 0 ); 452 return bld->base.undef; 453 } 454 break; 455 456 default: 457 assert( 0 ); 458 return bld->base.undef; 459 } 460 461 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 462 case TGSI_UTIL_SIGN_CLEAR: 463 res = lp_build_abs( &bld->base, res ); 464 break; 465 466 case TGSI_UTIL_SIGN_SET: 467 /* TODO: Use bitwese OR for floating point */ 468 res = lp_build_abs( &bld->base, res ); 469 res = LLVMBuildNeg( bld->base.builder, res, "" ); 470 break; 471 472 case TGSI_UTIL_SIGN_TOGGLE: 473 res = LLVMBuildNeg( bld->base.builder, res, "" ); 474 break; 475 476 case TGSI_UTIL_SIGN_KEEP: 477 break; 478 } 479 480 return res; 481} 482 483 484/** 485 * Register fetch with derivatives. 486 */ 487static void 488emit_fetch_deriv( 489 struct lp_build_tgsi_soa_context *bld, 490 const struct tgsi_full_instruction *inst, 491 unsigned index, 492 const unsigned chan_index, 493 LLVMValueRef *res, 494 LLVMValueRef *ddx, 495 LLVMValueRef *ddy) 496{ 497 LLVMValueRef src; 498 499 src = emit_fetch(bld, inst, index, chan_index); 500 501 if(res) 502 *res = src; 503 504 /* TODO: use interpolation coeffs for inputs */ 505 506 if(ddx) 507 *ddx = emit_ddx(bld, src); 508 509 if(ddy) 510 *ddy = emit_ddy(bld, src); 511} 512 513 514/** 515 * Register store. 516 */ 517static void 518emit_store( 519 struct lp_build_tgsi_soa_context *bld, 520 const struct tgsi_full_instruction *inst, 521 unsigned index, 522 unsigned chan_index, 523 LLVMValueRef value) 524{ 525 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 526 LLVMValueRef addr; 527 528 switch( inst->Instruction.Saturate ) { 529 case TGSI_SAT_NONE: 530 break; 531 532 case TGSI_SAT_ZERO_ONE: 533 value = lp_build_max(&bld->base, value, bld->base.zero); 534 value = lp_build_min(&bld->base, value, bld->base.one); 535 break; 536 537 case TGSI_SAT_MINUS_PLUS_ONE: 538 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); 539 value = lp_build_min(&bld->base, value, bld->base.one); 540 break; 541 542 default: 543 assert(0); 544 } 545 546 if (reg->Register.Indirect) { 547 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); 548 unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); 549 addr = LLVMBuildLoad(bld->base.builder, 550 bld->addr[reg->Indirect.Index][swizzle], 551 ""); 552 /* for indexing we want integers */ 553 addr = LLVMBuildFPToSI(bld->base.builder, addr, 554 int_vec_type, ""); 555 addr = LLVMBuildExtractElement(bld->base.builder, 556 addr, LLVMConstInt(LLVMInt32Type(), 0, 0), 557 ""); 558 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); 559 } 560 561 switch( reg->Register.File ) { 562 case TGSI_FILE_OUTPUT: 563 lp_exec_mask_store(&bld->exec_mask, value, 564 bld->outputs[reg->Register.Index][chan_index]); 565 break; 566 567 case TGSI_FILE_TEMPORARY: { 568 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 569 chan_index, 570 reg->Register.Indirect, 571 addr); 572 lp_exec_mask_store(&bld->exec_mask, value, temp_ptr); 573 break; 574 } 575 576 case TGSI_FILE_ADDRESS: 577 lp_exec_mask_store(&bld->exec_mask, value, 578 bld->addr[reg->Indirect.Index][chan_index]); 579 break; 580 581 case TGSI_FILE_PREDICATE: 582 /* FIXME */ 583 break; 584 585 default: 586 assert( 0 ); 587 } 588} 589 590 591/** 592 * High-level instruction translators. 593 */ 594 595 596static void 597emit_tex( struct lp_build_tgsi_soa_context *bld, 598 const struct tgsi_full_instruction *inst, 599 boolean apply_lodbias, 600 boolean projected, 601 LLVMValueRef *texel) 602{ 603 const uint unit = inst->Src[1].Register.Index; 604 LLVMValueRef lodbias; 605 LLVMValueRef oow = NULL; 606 LLVMValueRef coords[3]; 607 unsigned num_coords; 608 unsigned i; 609 610 if (!bld->sampler) { 611 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 612 for (i = 0; i < 4; i++) { 613 texel[i] = bld->base.undef; 614 } 615 return; 616 } 617 618 switch (inst->Texture.Texture) { 619 case TGSI_TEXTURE_1D: 620 num_coords = 1; 621 break; 622 case TGSI_TEXTURE_2D: 623 case TGSI_TEXTURE_RECT: 624 num_coords = 2; 625 break; 626 case TGSI_TEXTURE_SHADOW1D: 627 case TGSI_TEXTURE_SHADOW2D: 628 case TGSI_TEXTURE_SHADOWRECT: 629 case TGSI_TEXTURE_3D: 630 case TGSI_TEXTURE_CUBE: 631 num_coords = 3; 632 break; 633 default: 634 assert(0); 635 return; 636 } 637 638 if(apply_lodbias) 639 lodbias = emit_fetch( bld, inst, 0, 3 ); 640 else 641 lodbias = bld->base.zero; 642 643 if (projected) { 644 oow = emit_fetch( bld, inst, 0, 3 ); 645 oow = lp_build_rcp(&bld->base, oow); 646 } 647 648 for (i = 0; i < num_coords; i++) { 649 coords[i] = emit_fetch( bld, inst, 0, i ); 650 if (projected) 651 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 652 } 653 for (i = num_coords; i < 3; i++) { 654 coords[i] = bld->base.undef; 655 } 656 657 bld->sampler->emit_fetch_texel(bld->sampler, 658 bld->base.builder, 659 bld->base.type, 660 unit, num_coords, coords, lodbias, 661 texel); 662} 663 664 665/** 666 * Kill fragment if any of the src register values are negative. 667 */ 668static void 669emit_kil( 670 struct lp_build_tgsi_soa_context *bld, 671 const struct tgsi_full_instruction *inst ) 672{ 673 const struct tgsi_full_src_register *reg = &inst->Src[0]; 674 LLVMValueRef terms[NUM_CHANNELS]; 675 LLVMValueRef mask; 676 unsigned chan_index; 677 678 memset(&terms, 0, sizeof terms); 679 680 FOR_EACH_CHANNEL( chan_index ) { 681 unsigned swizzle; 682 683 /* Unswizzle channel */ 684 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 685 686 /* Check if the component has not been already tested. */ 687 assert(swizzle < NUM_CHANNELS); 688 if( !terms[swizzle] ) 689 /* TODO: change the comparison operator instead of setting the sign */ 690 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 691 } 692 693 mask = NULL; 694 FOR_EACH_CHANNEL( chan_index ) { 695 if(terms[chan_index]) { 696 LLVMValueRef chan_mask; 697 698 /* 699 * If term < 0 then mask = 0 else mask = ~0. 700 */ 701 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 702 703 if(mask) 704 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); 705 else 706 mask = chan_mask; 707 } 708 } 709 710 if(mask) 711 lp_build_mask_update(bld->mask, mask); 712} 713 714 715/** 716 * Predicated fragment kill. 717 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 718 * The only predication is the execution mask which will apply if 719 * we're inside a loop or conditional. 720 */ 721static void 722emit_kilp(struct lp_build_tgsi_soa_context *bld, 723 const struct tgsi_full_instruction *inst) 724{ 725 LLVMValueRef mask; 726 727 /* For those channels which are "alive", disable fragment shader 728 * execution. 729 */ 730 if (bld->exec_mask.has_mask) { 731 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); 732 } 733 else { 734 mask = bld->base.zero; 735 } 736 737 lp_build_mask_update(bld->mask, mask); 738} 739 740static void 741emit_declaration( 742 struct lp_build_tgsi_soa_context *bld, 743 const struct tgsi_full_declaration *decl) 744{ 745 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); 746 747 unsigned first = decl->Range.First; 748 unsigned last = decl->Range.Last; 749 unsigned idx, i; 750 751 for (idx = first; idx <= last; ++idx) { 752 switch (decl->Declaration.File) { 753 case TGSI_FILE_TEMPORARY: 754 assert(idx < LP_MAX_TGSI_TEMPS); 755 if (bld->has_indirect_addressing) { 756 LLVMValueRef val = LLVMConstInt(LLVMInt32Type(), 757 last*4 + 4, 0); 758 bld->temps_array = lp_build_array_alloca(bld->base.builder, 759 vec_type, val, ""); 760 } else { 761 for (i = 0; i < NUM_CHANNELS; i++) 762 bld->temps[idx][i] = lp_build_alloca(bld->base.builder, 763 vec_type, ""); 764 } 765 break; 766 767 case TGSI_FILE_OUTPUT: 768 for (i = 0; i < NUM_CHANNELS; i++) 769 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, 770 vec_type, ""); 771 break; 772 773 case TGSI_FILE_ADDRESS: 774 assert(idx < LP_MAX_TGSI_ADDRS); 775 for (i = 0; i < NUM_CHANNELS; i++) 776 bld->addr[idx][i] = lp_build_alloca(bld->base.builder, 777 vec_type, ""); 778 break; 779 780 case TGSI_FILE_PREDICATE: 781 _debug_printf("warning: predicate registers not yet implemented\n"); 782 break; 783 784 default: 785 /* don't need to declare other vars */ 786 break; 787 } 788 } 789} 790 791 792/** 793 * Emit LLVM for one TGSI instruction. 794 * \param return TRUE for success, FALSE otherwise 795 */ 796static boolean 797emit_instruction( 798 struct lp_build_tgsi_soa_context *bld, 799 const struct tgsi_full_instruction *inst, 800 const struct tgsi_opcode_info *info) 801{ 802 unsigned chan_index; 803 LLVMValueRef src0, src1, src2; 804 LLVMValueRef tmp0, tmp1, tmp2; 805 LLVMValueRef tmp3 = NULL; 806 LLVMValueRef tmp4 = NULL; 807 LLVMValueRef tmp5 = NULL; 808 LLVMValueRef tmp6 = NULL; 809 LLVMValueRef tmp7 = NULL; 810 LLVMValueRef res; 811 LLVMValueRef dst0[NUM_CHANNELS]; 812 813 /* 814 * Stores and write masks are handled in a general fashion after the long 815 * instruction opcode switch statement. 816 * 817 * Although not stricitly necessary, we avoid generating instructions for 818 * channels which won't be stored, in cases where's that easy. For some 819 * complex instructions, like texture sampling, it is more convenient to 820 * assume a full writemask and then let LLVM optimization passes eliminate 821 * redundant code. 822 */ 823 824 assert(info->num_dst <= 1); 825 if(info->num_dst) { 826 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 827 dst0[chan_index] = bld->base.undef; 828 } 829 } 830 831 switch (inst->Instruction.Opcode) { 832 case TGSI_OPCODE_ARL: 833 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 834 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 835 tmp0 = lp_build_floor(&bld->base, tmp0); 836 dst0[chan_index] = tmp0; 837 } 838 break; 839 840 case TGSI_OPCODE_MOV: 841 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 842 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 843 } 844 break; 845 846 case TGSI_OPCODE_LIT: 847 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 848 dst0[CHAN_X] = bld->base.one; 849 } 850 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 851 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 852 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 853 } 854 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 855 /* XMM[1] = SrcReg[0].yyyy */ 856 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 857 /* XMM[1] = max(XMM[1], 0) */ 858 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 859 /* XMM[2] = SrcReg[0].wwww */ 860 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 861 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 862 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 863 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 864 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 865 } 866 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 867 dst0[CHAN_W] = bld->base.one; 868 } 869 break; 870 871 case TGSI_OPCODE_RCP: 872 /* TGSI_OPCODE_RECIP */ 873 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 874 res = lp_build_rcp(&bld->base, src0); 875 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 876 dst0[chan_index] = res; 877 } 878 break; 879 880 case TGSI_OPCODE_RSQ: 881 /* TGSI_OPCODE_RECIPSQRT */ 882 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 883 src0 = lp_build_abs(&bld->base, src0); 884 res = lp_build_rsqrt(&bld->base, src0); 885 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 886 dst0[chan_index] = res; 887 } 888 break; 889 890 case TGSI_OPCODE_EXP: 891 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 892 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 893 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 894 LLVMValueRef *p_exp2_int_part = NULL; 895 LLVMValueRef *p_frac_part = NULL; 896 LLVMValueRef *p_exp2 = NULL; 897 898 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 899 900 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 901 p_exp2_int_part = &tmp0; 902 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 903 p_frac_part = &tmp1; 904 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 905 p_exp2 = &tmp2; 906 907 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 908 909 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 910 dst0[CHAN_X] = tmp0; 911 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 912 dst0[CHAN_Y] = tmp1; 913 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 914 dst0[CHAN_Z] = tmp2; 915 } 916 /* dst.w = 1.0 */ 917 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 918 dst0[CHAN_W] = bld->base.one; 919 } 920 break; 921 922 case TGSI_OPCODE_LOG: 923 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 924 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 925 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 926 LLVMValueRef *p_floor_log2 = NULL; 927 LLVMValueRef *p_exp = NULL; 928 LLVMValueRef *p_log2 = NULL; 929 930 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 931 src0 = lp_build_abs( &bld->base, src0 ); 932 933 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 934 p_floor_log2 = &tmp0; 935 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 936 p_exp = &tmp1; 937 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 938 p_log2 = &tmp2; 939 940 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 941 942 /* dst.x = floor(lg2(abs(src.x))) */ 943 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 944 dst0[CHAN_X] = tmp0; 945 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 946 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 947 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 948 } 949 /* dst.z = lg2(abs(src.x)) */ 950 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 951 dst0[CHAN_Z] = tmp2; 952 } 953 /* dst.w = 1.0 */ 954 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 955 dst0[CHAN_W] = bld->base.one; 956 } 957 break; 958 959 case TGSI_OPCODE_MUL: 960 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 961 src0 = emit_fetch( bld, inst, 0, chan_index ); 962 src1 = emit_fetch( bld, inst, 1, chan_index ); 963 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 964 } 965 break; 966 967 case TGSI_OPCODE_ADD: 968 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 969 src0 = emit_fetch( bld, inst, 0, chan_index ); 970 src1 = emit_fetch( bld, inst, 1, chan_index ); 971 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 972 } 973 break; 974 975 case TGSI_OPCODE_DP3: 976 /* TGSI_OPCODE_DOT3 */ 977 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 978 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 979 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 980 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 981 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 982 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 983 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 984 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 985 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 986 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 987 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 988 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 989 dst0[chan_index] = tmp0; 990 } 991 break; 992 993 case TGSI_OPCODE_DP4: 994 /* TGSI_OPCODE_DOT4 */ 995 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 996 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 997 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 998 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 999 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1000 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1001 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1002 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1003 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1004 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1005 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1006 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1007 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1008 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1009 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1010 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1011 dst0[chan_index] = tmp0; 1012 } 1013 break; 1014 1015 case TGSI_OPCODE_DST: 1016 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1017 dst0[CHAN_X] = bld->base.one; 1018 } 1019 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1020 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1021 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1022 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1023 } 1024 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1025 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1026 } 1027 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1028 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1029 } 1030 break; 1031 1032 case TGSI_OPCODE_MIN: 1033 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1034 src0 = emit_fetch( bld, inst, 0, chan_index ); 1035 src1 = emit_fetch( bld, inst, 1, chan_index ); 1036 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1037 } 1038 break; 1039 1040 case TGSI_OPCODE_MAX: 1041 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1042 src0 = emit_fetch( bld, inst, 0, chan_index ); 1043 src1 = emit_fetch( bld, inst, 1, chan_index ); 1044 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1045 } 1046 break; 1047 1048 case TGSI_OPCODE_SLT: 1049 /* TGSI_OPCODE_SETLT */ 1050 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1051 src0 = emit_fetch( bld, inst, 0, chan_index ); 1052 src1 = emit_fetch( bld, inst, 1, chan_index ); 1053 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1054 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1055 } 1056 break; 1057 1058 case TGSI_OPCODE_SGE: 1059 /* TGSI_OPCODE_SETGE */ 1060 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1061 src0 = emit_fetch( bld, inst, 0, chan_index ); 1062 src1 = emit_fetch( bld, inst, 1, chan_index ); 1063 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1064 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1065 } 1066 break; 1067 1068 case TGSI_OPCODE_MAD: 1069 /* TGSI_OPCODE_MADD */ 1070 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1071 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1072 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1073 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1074 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1075 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1076 dst0[chan_index] = tmp0; 1077 } 1078 break; 1079 1080 case TGSI_OPCODE_SUB: 1081 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1082 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1083 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1084 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1085 } 1086 break; 1087 1088 case TGSI_OPCODE_LRP: 1089 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1090 src0 = emit_fetch( bld, inst, 0, chan_index ); 1091 src1 = emit_fetch( bld, inst, 1, chan_index ); 1092 src2 = emit_fetch( bld, inst, 2, chan_index ); 1093 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1094 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1095 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1096 } 1097 break; 1098 1099 case TGSI_OPCODE_CND: 1100 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1101 src0 = emit_fetch( bld, inst, 0, chan_index ); 1102 src1 = emit_fetch( bld, inst, 1, chan_index ); 1103 src2 = emit_fetch( bld, inst, 2, chan_index ); 1104 tmp1 = lp_build_const_vec(bld->base.type, 0.5); 1105 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1106 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1107 } 1108 break; 1109 1110 case TGSI_OPCODE_DP2A: 1111 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1112 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1113 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1114 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1115 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1116 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1117 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1118 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1119 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1120 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1121 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1122 } 1123 break; 1124 1125 case TGSI_OPCODE_FRC: 1126 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1127 src0 = emit_fetch( bld, inst, 0, chan_index ); 1128 tmp0 = lp_build_floor(&bld->base, src0); 1129 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1130 dst0[chan_index] = tmp0; 1131 } 1132 break; 1133 1134 case TGSI_OPCODE_CLAMP: 1135 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1136 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1137 src1 = emit_fetch( bld, inst, 1, chan_index ); 1138 src2 = emit_fetch( bld, inst, 2, chan_index ); 1139 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1140 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1141 dst0[chan_index] = tmp0; 1142 } 1143 break; 1144 1145 case TGSI_OPCODE_FLR: 1146 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1147 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1148 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1149 } 1150 break; 1151 1152 case TGSI_OPCODE_ROUND: 1153 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1154 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1155 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1156 } 1157 break; 1158 1159 case TGSI_OPCODE_EX2: { 1160 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1161 tmp0 = lp_build_exp2( &bld->base, tmp0); 1162 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1163 dst0[chan_index] = tmp0; 1164 } 1165 break; 1166 } 1167 1168 case TGSI_OPCODE_LG2: 1169 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1170 tmp0 = lp_build_log2( &bld->base, tmp0); 1171 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1172 dst0[chan_index] = tmp0; 1173 } 1174 break; 1175 1176 case TGSI_OPCODE_POW: 1177 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1178 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1179 res = lp_build_pow( &bld->base, src0, src1 ); 1180 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1181 dst0[chan_index] = res; 1182 } 1183 break; 1184 1185 case TGSI_OPCODE_XPD: 1186 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1187 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1188 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1189 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1190 } 1191 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1192 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1193 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1194 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1195 } 1196 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1197 tmp2 = tmp0; 1198 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1199 tmp5 = tmp3; 1200 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1201 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1202 dst0[CHAN_X] = tmp2; 1203 } 1204 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1205 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1206 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1207 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1208 } 1209 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1210 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1211 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1212 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1213 dst0[CHAN_Y] = tmp3; 1214 } 1215 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1216 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1217 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1218 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1219 dst0[CHAN_Z] = tmp5; 1220 } 1221 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1222 dst0[CHAN_W] = bld->base.one; 1223 } 1224 break; 1225 1226 case TGSI_OPCODE_ABS: 1227 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1228 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1229 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1230 } 1231 break; 1232 1233 case TGSI_OPCODE_RCC: 1234 /* deprecated? */ 1235 assert(0); 1236 return FALSE; 1237 1238 case TGSI_OPCODE_DPH: 1239 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1240 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1241 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1242 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1243 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1244 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1245 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1246 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1247 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1248 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1249 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1250 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1251 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1252 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1253 dst0[chan_index] = tmp0; 1254 } 1255 break; 1256 1257 case TGSI_OPCODE_COS: 1258 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1259 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1260 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1261 dst0[chan_index] = tmp0; 1262 } 1263 break; 1264 1265 case TGSI_OPCODE_DDX: 1266 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1267 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1268 } 1269 break; 1270 1271 case TGSI_OPCODE_DDY: 1272 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1273 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1274 } 1275 break; 1276 1277 case TGSI_OPCODE_KILP: 1278 /* predicated kill */ 1279 emit_kilp( bld, inst ); 1280 break; 1281 1282 case TGSI_OPCODE_KIL: 1283 /* conditional kill */ 1284 emit_kil( bld, inst ); 1285 break; 1286 1287 case TGSI_OPCODE_PK2H: 1288 return FALSE; 1289 break; 1290 1291 case TGSI_OPCODE_PK2US: 1292 return FALSE; 1293 break; 1294 1295 case TGSI_OPCODE_PK4B: 1296 return FALSE; 1297 break; 1298 1299 case TGSI_OPCODE_PK4UB: 1300 return FALSE; 1301 break; 1302 1303 case TGSI_OPCODE_RFL: 1304 return FALSE; 1305 break; 1306 1307 case TGSI_OPCODE_SEQ: 1308 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1309 src0 = emit_fetch( bld, inst, 0, chan_index ); 1310 src1 = emit_fetch( bld, inst, 1, chan_index ); 1311 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1312 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1313 } 1314 break; 1315 1316 case TGSI_OPCODE_SFL: 1317 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1318 dst0[chan_index] = bld->base.zero; 1319 } 1320 break; 1321 1322 case TGSI_OPCODE_SGT: 1323 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1324 src0 = emit_fetch( bld, inst, 0, chan_index ); 1325 src1 = emit_fetch( bld, inst, 1, chan_index ); 1326 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1327 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1328 } 1329 break; 1330 1331 case TGSI_OPCODE_SIN: 1332 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1333 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1334 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1335 dst0[chan_index] = tmp0; 1336 } 1337 break; 1338 1339 case TGSI_OPCODE_SLE: 1340 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1341 src0 = emit_fetch( bld, inst, 0, chan_index ); 1342 src1 = emit_fetch( bld, inst, 1, chan_index ); 1343 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1344 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1345 } 1346 break; 1347 1348 case TGSI_OPCODE_SNE: 1349 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1350 src0 = emit_fetch( bld, inst, 0, chan_index ); 1351 src1 = emit_fetch( bld, inst, 1, chan_index ); 1352 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1353 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1354 } 1355 break; 1356 1357 case TGSI_OPCODE_STR: 1358 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1359 dst0[chan_index] = bld->base.one; 1360 } 1361 break; 1362 1363 case TGSI_OPCODE_TEX: 1364 emit_tex( bld, inst, FALSE, FALSE, dst0 ); 1365 break; 1366 1367 case TGSI_OPCODE_TXD: 1368 /* FIXME */ 1369 return FALSE; 1370 break; 1371 1372 case TGSI_OPCODE_UP2H: 1373 /* deprecated */ 1374 assert (0); 1375 return FALSE; 1376 break; 1377 1378 case TGSI_OPCODE_UP2US: 1379 /* deprecated */ 1380 assert(0); 1381 return FALSE; 1382 break; 1383 1384 case TGSI_OPCODE_UP4B: 1385 /* deprecated */ 1386 assert(0); 1387 return FALSE; 1388 break; 1389 1390 case TGSI_OPCODE_UP4UB: 1391 /* deprecated */ 1392 assert(0); 1393 return FALSE; 1394 break; 1395 1396 case TGSI_OPCODE_X2D: 1397 /* deprecated? */ 1398 assert(0); 1399 return FALSE; 1400 break; 1401 1402 case TGSI_OPCODE_ARA: 1403 /* deprecated */ 1404 assert(0); 1405 return FALSE; 1406 break; 1407 1408 case TGSI_OPCODE_ARR: 1409 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1410 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1411 tmp0 = lp_build_round(&bld->base, tmp0); 1412 dst0[chan_index] = tmp0; 1413 } 1414 break; 1415 1416 case TGSI_OPCODE_BRA: 1417 /* deprecated */ 1418 assert(0); 1419 return FALSE; 1420 break; 1421 1422 case TGSI_OPCODE_CAL: 1423 /* FIXME */ 1424 return FALSE; 1425 break; 1426 1427 case TGSI_OPCODE_RET: 1428 /* FIXME */ 1429 return FALSE; 1430 break; 1431 1432 case TGSI_OPCODE_END: 1433 break; 1434 1435 case TGSI_OPCODE_SSG: 1436 /* TGSI_OPCODE_SGN */ 1437 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1438 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1439 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 1440 } 1441 break; 1442 1443 case TGSI_OPCODE_CMP: 1444 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1445 src0 = emit_fetch( bld, inst, 0, chan_index ); 1446 src1 = emit_fetch( bld, inst, 1, chan_index ); 1447 src2 = emit_fetch( bld, inst, 2, chan_index ); 1448 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 1449 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 1450 } 1451 break; 1452 1453 case TGSI_OPCODE_SCS: 1454 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1455 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1456 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 1457 } 1458 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1459 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1460 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 1461 } 1462 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1463 dst0[CHAN_Z] = bld->base.zero; 1464 } 1465 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1466 dst0[CHAN_W] = bld->base.one; 1467 } 1468 break; 1469 1470 case TGSI_OPCODE_TXB: 1471 emit_tex( bld, inst, TRUE, FALSE, dst0 ); 1472 break; 1473 1474 case TGSI_OPCODE_NRM: 1475 /* fall-through */ 1476 case TGSI_OPCODE_NRM4: 1477 /* 3 or 4-component normalization */ 1478 { 1479 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1480 1481 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 1482 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 1483 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 1484 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 1485 1486 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1487 1488 /* xmm4 = src.x */ 1489 /* xmm0 = src.x * src.x */ 1490 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1491 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1492 tmp4 = tmp0; 1493 } 1494 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 1495 1496 /* xmm5 = src.y */ 1497 /* xmm0 = xmm0 + src.y * src.y */ 1498 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 1499 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1500 tmp5 = tmp1; 1501 } 1502 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1503 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1504 1505 /* xmm6 = src.z */ 1506 /* xmm0 = xmm0 + src.z * src.z */ 1507 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 1508 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1509 tmp6 = tmp1; 1510 } 1511 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1512 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1513 1514 if (dims == 4) { 1515 /* xmm7 = src.w */ 1516 /* xmm0 = xmm0 + src.w * src.w */ 1517 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 1518 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 1519 tmp7 = tmp1; 1520 } 1521 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1522 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1523 } 1524 1525 /* xmm1 = 1 / sqrt(xmm0) */ 1526 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 1527 1528 /* dst.x = xmm1 * src.x */ 1529 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1530 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 1531 } 1532 1533 /* dst.y = xmm1 * src.y */ 1534 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1535 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 1536 } 1537 1538 /* dst.z = xmm1 * src.z */ 1539 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1540 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 1541 } 1542 1543 /* dst.w = xmm1 * src.w */ 1544 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 1545 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 1546 } 1547 } 1548 1549 /* dst.w = 1.0 */ 1550 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 1551 dst0[CHAN_W] = bld->base.one; 1552 } 1553 } 1554 break; 1555 1556 case TGSI_OPCODE_DIV: 1557 /* deprecated */ 1558 assert( 0 ); 1559 return FALSE; 1560 break; 1561 1562 case TGSI_OPCODE_DP2: 1563 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1564 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1565 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1566 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1567 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1568 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1569 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1570 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1571 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1572 } 1573 break; 1574 1575 case TGSI_OPCODE_TXL: 1576 emit_tex( bld, inst, TRUE, FALSE, dst0 ); 1577 break; 1578 1579 case TGSI_OPCODE_TXP: 1580 emit_tex( bld, inst, FALSE, TRUE, dst0 ); 1581 break; 1582 1583 case TGSI_OPCODE_BRK: 1584 lp_exec_break(&bld->exec_mask); 1585 break; 1586 1587 case TGSI_OPCODE_IF: 1588 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1589 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 1590 tmp0, bld->base.zero); 1591 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 1592 break; 1593 1594 case TGSI_OPCODE_BGNLOOP: 1595 lp_exec_bgnloop(&bld->exec_mask); 1596 break; 1597 1598 case TGSI_OPCODE_ELSE: 1599 lp_exec_mask_cond_invert(&bld->exec_mask); 1600 break; 1601 1602 case TGSI_OPCODE_ENDIF: 1603 lp_exec_mask_cond_pop(&bld->exec_mask); 1604 break; 1605 1606 case TGSI_OPCODE_ENDLOOP: 1607 lp_exec_endloop(&bld->exec_mask); 1608 break; 1609 1610 case TGSI_OPCODE_PUSHA: 1611 /* deprecated? */ 1612 assert(0); 1613 return FALSE; 1614 break; 1615 1616 case TGSI_OPCODE_POPA: 1617 /* deprecated? */ 1618 assert(0); 1619 return FALSE; 1620 break; 1621 1622 case TGSI_OPCODE_CEIL: 1623 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1624 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1625 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 1626 } 1627 break; 1628 1629 case TGSI_OPCODE_I2F: 1630 /* deprecated? */ 1631 assert(0); 1632 return FALSE; 1633 break; 1634 1635 case TGSI_OPCODE_NOT: 1636 /* deprecated? */ 1637 assert(0); 1638 return FALSE; 1639 break; 1640 1641 case TGSI_OPCODE_TRUNC: 1642 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1643 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1644 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 1645 } 1646 break; 1647 1648 case TGSI_OPCODE_SHL: 1649 /* deprecated? */ 1650 assert(0); 1651 return FALSE; 1652 break; 1653 1654 case TGSI_OPCODE_ISHR: 1655 /* deprecated? */ 1656 assert(0); 1657 return FALSE; 1658 break; 1659 1660 case TGSI_OPCODE_AND: 1661 /* deprecated? */ 1662 assert(0); 1663 return FALSE; 1664 break; 1665 1666 case TGSI_OPCODE_OR: 1667 /* deprecated? */ 1668 assert(0); 1669 return FALSE; 1670 break; 1671 1672 case TGSI_OPCODE_MOD: 1673 /* deprecated? */ 1674 assert(0); 1675 return FALSE; 1676 break; 1677 1678 case TGSI_OPCODE_XOR: 1679 /* deprecated? */ 1680 assert(0); 1681 return FALSE; 1682 break; 1683 1684 case TGSI_OPCODE_SAD: 1685 /* deprecated? */ 1686 assert(0); 1687 return FALSE; 1688 break; 1689 1690 case TGSI_OPCODE_TXF: 1691 /* deprecated? */ 1692 assert(0); 1693 return FALSE; 1694 break; 1695 1696 case TGSI_OPCODE_TXQ: 1697 /* deprecated? */ 1698 assert(0); 1699 return FALSE; 1700 break; 1701 1702 case TGSI_OPCODE_CONT: 1703 lp_exec_continue(&bld->exec_mask); 1704 break; 1705 1706 case TGSI_OPCODE_EMIT: 1707 return FALSE; 1708 break; 1709 1710 case TGSI_OPCODE_ENDPRIM: 1711 return FALSE; 1712 break; 1713 1714 case TGSI_OPCODE_NOP: 1715 break; 1716 1717 default: 1718 return FALSE; 1719 } 1720 1721 if(info->num_dst) { 1722 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1723 emit_store( bld, inst, 0, chan_index, dst0[chan_index]); 1724 } 1725 } 1726 1727 return TRUE; 1728} 1729 1730 1731void 1732lp_build_tgsi_soa(LLVMBuilderRef builder, 1733 const struct tgsi_token *tokens, 1734 struct lp_type type, 1735 struct lp_build_mask_context *mask, 1736 LLVMValueRef consts_ptr, 1737 const LLVMValueRef *pos, 1738 const LLVMValueRef (*inputs)[NUM_CHANNELS], 1739 LLVMValueRef (*outputs)[NUM_CHANNELS], 1740 struct lp_build_sampler_soa *sampler, 1741 struct tgsi_shader_info *info) 1742{ 1743 struct lp_build_tgsi_soa_context bld; 1744 struct tgsi_parse_context parse; 1745 uint num_immediates = 0; 1746 unsigned i; 1747 1748 /* Setup build context */ 1749 memset(&bld, 0, sizeof bld); 1750 lp_build_context_init(&bld.base, builder, type); 1751 bld.mask = mask; 1752 bld.pos = pos; 1753 bld.inputs = inputs; 1754 bld.outputs = outputs; 1755 bld.consts_ptr = consts_ptr; 1756 bld.sampler = sampler; 1757 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 || 1758 info->opcode_count[TGSI_OPCODE_ARL] > 0; 1759 1760 lp_exec_mask_init(&bld.exec_mask, &bld.base); 1761 1762 tgsi_parse_init( &parse, tokens ); 1763 1764 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1765 tgsi_parse_token( &parse ); 1766 1767 switch( parse.FullToken.Token.Type ) { 1768 case TGSI_TOKEN_TYPE_DECLARATION: 1769 /* Inputs already interpolated */ 1770 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 1771 break; 1772 1773 case TGSI_TOKEN_TYPE_INSTRUCTION: 1774 { 1775 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 1776 const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode); 1777 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info )) 1778 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 1779 opcode_info->mnemonic); 1780 } 1781 1782 break; 1783 1784 case TGSI_TOKEN_TYPE_IMMEDIATE: 1785 /* simply copy the immediate values into the next immediates[] slot */ 1786 { 1787 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1788 assert(size <= 4); 1789 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 1790 for( i = 0; i < size; ++i ) 1791 bld.immediates[num_immediates][i] = 1792 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); 1793 for( i = size; i < 4; ++i ) 1794 bld.immediates[num_immediates][i] = bld.base.undef; 1795 num_immediates++; 1796 } 1797 break; 1798 1799 case TGSI_TOKEN_TYPE_PROPERTY: 1800 break; 1801 1802 default: 1803 assert( 0 ); 1804 } 1805 } 1806 if (0) { 1807 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); 1808 LLVMValueRef function = LLVMGetBasicBlockParent(block); 1809 debug_printf("11111111111111111111111111111 \n"); 1810 tgsi_dump(tokens, 0); 1811 LLVMDumpValue(function); 1812 debug_printf("2222222222222222222222222222 \n"); 1813 } 1814 tgsi_parse_free( &parse ); 1815} 1816 1817