lp_bld_tgsi_soa.c revision 962558daaed43b0111cd062e32821aad106869d7
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_exec.h" 49#include "tgsi/tgsi_scan.h" 50#include "lp_bld_type.h" 51#include "lp_bld_const.h" 52#include "lp_bld_arit.h" 53#include "lp_bld_logic.h" 54#include "lp_bld_swizzle.h" 55#include "lp_bld_flow.h" 56#include "lp_bld_tgsi.h" 57#include "lp_bld_limits.h" 58#include "lp_bld_debug.h" 59 60 61#define FOR_EACH_CHANNEL( CHAN )\ 62 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 63 64#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 65 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 66 67#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 68 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 69 70#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 71 FOR_EACH_CHANNEL( CHAN )\ 72 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 73 74#define CHAN_X 0 75#define CHAN_Y 1 76#define CHAN_Z 2 77#define CHAN_W 3 78 79#define QUAD_TOP_LEFT 0 80#define QUAD_TOP_RIGHT 1 81#define QUAD_BOTTOM_LEFT 2 82#define QUAD_BOTTOM_RIGHT 3 83 84 85struct lp_exec_mask { 86 struct lp_build_context *bld; 87 88 boolean has_mask; 89 90 LLVMTypeRef int_vec_type; 91 92 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 93 int cond_stack_size; 94 LLVMValueRef cond_mask; 95 96 LLVMValueRef break_stack[LP_MAX_TGSI_NESTING]; 97 int break_stack_size; 98 LLVMValueRef break_mask; 99 100 LLVMValueRef cont_stack[LP_MAX_TGSI_NESTING]; 101 int cont_stack_size; 102 LLVMValueRef cont_mask; 103 104 LLVMBasicBlockRef loop_stack[LP_MAX_TGSI_NESTING]; 105 int loop_stack_size; 106 LLVMBasicBlockRef loop_block; 107 108 109 LLVMValueRef exec_mask; 110}; 111 112struct lp_build_tgsi_soa_context 113{ 114 struct lp_build_context base; 115 116 LLVMValueRef consts_ptr; 117 const LLVMValueRef *pos; 118 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 119 LLVMValueRef (*outputs)[NUM_CHANNELS]; 120 121 struct lp_build_sampler_soa *sampler; 122 123 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 124 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 125 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 126 127 /* we allocate an array of temps if we have indirect 128 * addressing and then the temps above is unused */ 129 LLVMValueRef temps_array; 130 boolean has_indirect_addressing; 131 132 struct lp_build_mask_context *mask; 133 struct lp_exec_mask exec_mask; 134}; 135 136static const unsigned char 137swizzle_left[4] = { 138 QUAD_TOP_LEFT, QUAD_TOP_LEFT, 139 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_LEFT 140}; 141 142static const unsigned char 143swizzle_right[4] = { 144 QUAD_TOP_RIGHT, QUAD_TOP_RIGHT, 145 QUAD_BOTTOM_RIGHT, QUAD_BOTTOM_RIGHT 146}; 147 148static const unsigned char 149swizzle_top[4] = { 150 QUAD_TOP_LEFT, QUAD_TOP_RIGHT, 151 QUAD_TOP_LEFT, QUAD_TOP_RIGHT 152}; 153 154static const unsigned char 155swizzle_bottom[4] = { 156 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT, 157 QUAD_BOTTOM_LEFT, QUAD_BOTTOM_RIGHT 158}; 159 160static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 161{ 162 mask->bld = bld; 163 mask->has_mask = FALSE; 164 mask->cond_stack_size = 0; 165 mask->loop_stack_size = 0; 166 mask->break_stack_size = 0; 167 mask->cont_stack_size = 0; 168 169 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); 170} 171 172static void lp_exec_mask_update(struct lp_exec_mask *mask) 173{ 174 if (mask->loop_stack_size) { 175 /*for loops we need to update the entire mask at runtime */ 176 LLVMValueRef tmp; 177 assert(mask->break_mask); 178 tmp = LLVMBuildAnd(mask->bld->builder, 179 mask->cont_mask, 180 mask->break_mask, 181 "maskcb"); 182 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 183 mask->cond_mask, 184 tmp, 185 "maskfull"); 186 } else 187 mask->exec_mask = mask->cond_mask; 188 189 190 mask->has_mask = (mask->cond_stack_size > 0 || 191 mask->loop_stack_size > 0); 192} 193 194static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 195 LLVMValueRef val) 196{ 197 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 198 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 199 mask->cond_mask = LLVMBuildBitCast(mask->bld->builder, val, 200 mask->int_vec_type, ""); 201 202 lp_exec_mask_update(mask); 203} 204 205static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 206{ 207 LLVMValueRef prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 208 LLVMValueRef inv_mask = LLVMBuildNot(mask->bld->builder, 209 mask->cond_mask, ""); 210 211 /* means that we didn't have any mask before and that 212 * we were fully enabled */ 213 if (mask->cond_stack_size <= 1) { 214 prev_mask = LLVMConstAllOnes(mask->int_vec_type); 215 } 216 217 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 218 inv_mask, 219 prev_mask, ""); 220 lp_exec_mask_update(mask); 221} 222 223static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 224{ 225 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 226 lp_exec_mask_update(mask); 227} 228 229static void lp_exec_bgnloop(struct lp_exec_mask *mask) 230{ 231 232 if (mask->cont_stack_size == 0) 233 mask->cont_mask = LLVMConstAllOnes(mask->int_vec_type); 234 if (mask->break_stack_size == 0) 235 mask->break_mask = LLVMConstAllOnes(mask->int_vec_type); 236 if (mask->cond_stack_size == 0) 237 mask->cond_mask = LLVMConstAllOnes(mask->int_vec_type); 238 239 assert(mask->break_stack_size < LP_MAX_TGSI_NESTING); 240 assert(mask->cont_stack_size < LP_MAX_TGSI_NESTING); 241 assert(mask->break_stack_size < LP_MAX_TGSI_NESTING); 242 243 mask->break_stack[mask->break_stack_size++] = mask->break_mask; 244 mask->cont_stack[mask->cont_stack_size++] = mask->cont_mask; 245 mask->loop_stack[mask->loop_stack_size++] = mask->loop_block; 246 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); 247 LLVMBuildBr(mask->bld->builder, mask->loop_block); 248 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); 249 250 lp_exec_mask_update(mask); 251} 252 253static void lp_exec_break(struct lp_exec_mask *mask) 254{ 255 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 256 mask->exec_mask, 257 "break"); 258 259 mask->break_mask = LLVMBuildAnd(mask->bld->builder, 260 mask->break_mask, 261 exec_mask, "break_full"); 262 263 lp_exec_mask_update(mask); 264} 265 266static void lp_exec_continue(struct lp_exec_mask *mask) 267{ 268 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 269 mask->exec_mask, 270 ""); 271 272 mask->cont_mask = LLVMBuildAnd(mask->bld->builder, 273 mask->cont_mask, 274 exec_mask, ""); 275 276 lp_exec_mask_update(mask); 277} 278 279 280static void lp_exec_endloop(struct lp_exec_mask *mask) 281{ 282 LLVMBasicBlockRef endloop; 283 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* 284 mask->bld->type.length); 285 LLVMValueRef i1cond; 286 287 assert(mask->break_mask); 288 289 /* i1cond = (mask == 0) */ 290 i1cond = LLVMBuildICmp( 291 mask->bld->builder, 292 LLVMIntNE, 293 LLVMBuildBitCast(mask->bld->builder, mask->break_mask, reg_type, ""), 294 LLVMConstNull(reg_type), ""); 295 296 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); 297 298 LLVMBuildCondBr(mask->bld->builder, 299 i1cond, mask->loop_block, endloop); 300 301 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); 302 303 mask->loop_block = mask->loop_stack[--mask->loop_stack_size]; 304 /* pop the cont mask */ 305 if (mask->cont_stack_size) { 306 mask->cont_mask = mask->cont_stack[--mask->cont_stack_size]; 307 } 308 /* pop the break mask */ 309 if (mask->break_stack_size) { 310 mask->break_mask = mask->break_stack[--mask->break_stack_size]; 311 } 312 313 lp_exec_mask_update(mask); 314} 315 316/* stores val into an address pointed to by dst. 317 * mask->exec_mask is used to figure out which bits of val 318 * should be stored into the address 319 * (0 means don't store this bit, 1 means do store). 320 */ 321static void lp_exec_mask_store(struct lp_exec_mask *mask, 322 LLVMValueRef val, 323 LLVMValueRef dst) 324{ 325 if (mask->has_mask) { 326 LLVMValueRef real_val, dst_val; 327 328 dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); 329 real_val = lp_build_select(mask->bld, 330 mask->exec_mask, 331 val, dst_val); 332 333 LLVMBuildStore(mask->bld->builder, real_val, dst); 334 } else 335 LLVMBuildStore(mask->bld->builder, val, dst); 336} 337 338 339static LLVMValueRef 340emit_ddx(struct lp_build_tgsi_soa_context *bld, 341 LLVMValueRef src) 342{ 343 LLVMValueRef src_left = lp_build_swizzle1_aos(&bld->base, src, swizzle_left); 344 LLVMValueRef src_right = lp_build_swizzle1_aos(&bld->base, src, swizzle_right); 345 return lp_build_sub(&bld->base, src_right, src_left); 346} 347 348 349static LLVMValueRef 350emit_ddy(struct lp_build_tgsi_soa_context *bld, 351 LLVMValueRef src) 352{ 353 LLVMValueRef src_top = lp_build_swizzle1_aos(&bld->base, src, swizzle_top); 354 LLVMValueRef src_bottom = lp_build_swizzle1_aos(&bld->base, src, swizzle_bottom); 355 return lp_build_sub(&bld->base, src_top, src_bottom); 356} 357 358static LLVMValueRef 359get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 360 unsigned index, 361 unsigned swizzle, 362 boolean is_indirect, 363 LLVMValueRef addr) 364{ 365 if (!bld->has_indirect_addressing) { 366 return bld->temps[index][swizzle]; 367 } else { 368 LLVMValueRef lindex = 369 LLVMConstInt(LLVMInt32Type(), index*4 + swizzle, 0); 370 if (is_indirect) 371 lindex = lp_build_add(&bld->base, lindex, addr); 372 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); 373 } 374} 375 376/** 377 * Register fetch. 378 */ 379static LLVMValueRef 380emit_fetch( 381 struct lp_build_tgsi_soa_context *bld, 382 const struct tgsi_full_instruction *inst, 383 unsigned index, 384 const unsigned chan_index ) 385{ 386 const struct tgsi_full_src_register *reg = &inst->Src[index]; 387 unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 388 LLVMValueRef res; 389 LLVMValueRef addr; 390 391 switch (swizzle) { 392 case TGSI_SWIZZLE_X: 393 case TGSI_SWIZZLE_Y: 394 case TGSI_SWIZZLE_Z: 395 case TGSI_SWIZZLE_W: 396 397 if (reg->Register.Indirect) { 398 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); 399 unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); 400 addr = LLVMBuildLoad(bld->base.builder, 401 bld->addr[reg->Indirect.Index][swizzle], 402 ""); 403 /* for indexing we want integers */ 404 addr = LLVMBuildFPToSI(bld->base.builder, addr, 405 int_vec_type, ""); 406 addr = LLVMBuildExtractElement(bld->base.builder, 407 addr, LLVMConstInt(LLVMInt32Type(), 0, 0), 408 ""); 409 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); 410 } 411 412 switch (reg->Register.File) { 413 case TGSI_FILE_CONSTANT: { 414 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0); 415 LLVMValueRef scalar, scalar_ptr; 416 417 if (reg->Register.Indirect) { 418 /*lp_build_printf(bld->base.builder, 419 "\taddr = %d\n", addr);*/ 420 index = lp_build_add(&bld->base, index, addr); 421 } 422 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, ""); 423 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 424 425 res = lp_build_broadcast_scalar(&bld->base, scalar); 426 break; 427 } 428 429 case TGSI_FILE_IMMEDIATE: 430 res = bld->immediates[reg->Register.Index][swizzle]; 431 assert(res); 432 break; 433 434 case TGSI_FILE_INPUT: 435 res = bld->inputs[reg->Register.Index][swizzle]; 436 assert(res); 437 break; 438 439 case TGSI_FILE_TEMPORARY: { 440 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 441 swizzle, 442 reg->Register.Indirect, 443 addr); 444 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 445 if(!res) 446 return bld->base.undef; 447 break; 448 } 449 450 default: 451 assert( 0 ); 452 return bld->base.undef; 453 } 454 break; 455 456 default: 457 assert( 0 ); 458 return bld->base.undef; 459 } 460 461 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 462 case TGSI_UTIL_SIGN_CLEAR: 463 res = lp_build_abs( &bld->base, res ); 464 break; 465 466 case TGSI_UTIL_SIGN_SET: 467 /* TODO: Use bitwese OR for floating point */ 468 res = lp_build_abs( &bld->base, res ); 469 res = LLVMBuildNeg( bld->base.builder, res, "" ); 470 break; 471 472 case TGSI_UTIL_SIGN_TOGGLE: 473 res = LLVMBuildNeg( bld->base.builder, res, "" ); 474 break; 475 476 case TGSI_UTIL_SIGN_KEEP: 477 break; 478 } 479 480 return res; 481} 482 483 484/** 485 * Register fetch with derivatives. 486 */ 487static void 488emit_fetch_deriv( 489 struct lp_build_tgsi_soa_context *bld, 490 const struct tgsi_full_instruction *inst, 491 unsigned index, 492 const unsigned chan_index, 493 LLVMValueRef *res, 494 LLVMValueRef *ddx, 495 LLVMValueRef *ddy) 496{ 497 LLVMValueRef src; 498 499 src = emit_fetch(bld, inst, index, chan_index); 500 501 if(res) 502 *res = src; 503 504 /* TODO: use interpolation coeffs for inputs */ 505 506 if(ddx) 507 *ddx = emit_ddx(bld, src); 508 509 if(ddy) 510 *ddy = emit_ddy(bld, src); 511} 512 513 514/** 515 * Register store. 516 */ 517static void 518emit_store( 519 struct lp_build_tgsi_soa_context *bld, 520 const struct tgsi_full_instruction *inst, 521 unsigned index, 522 unsigned chan_index, 523 LLVMValueRef value) 524{ 525 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 526 LLVMValueRef addr; 527 528 switch( inst->Instruction.Saturate ) { 529 case TGSI_SAT_NONE: 530 break; 531 532 case TGSI_SAT_ZERO_ONE: 533 value = lp_build_max(&bld->base, value, bld->base.zero); 534 value = lp_build_min(&bld->base, value, bld->base.one); 535 break; 536 537 case TGSI_SAT_MINUS_PLUS_ONE: 538 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); 539 value = lp_build_min(&bld->base, value, bld->base.one); 540 break; 541 542 default: 543 assert(0); 544 } 545 546 if (reg->Register.Indirect) { 547 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); 548 unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); 549 addr = LLVMBuildLoad(bld->base.builder, 550 bld->addr[reg->Indirect.Index][swizzle], 551 ""); 552 /* for indexing we want integers */ 553 addr = LLVMBuildFPToSI(bld->base.builder, addr, 554 int_vec_type, ""); 555 addr = LLVMBuildExtractElement(bld->base.builder, 556 addr, LLVMConstInt(LLVMInt32Type(), 0, 0), 557 ""); 558 addr = lp_build_mul(&bld->base, addr, LLVMConstInt(LLVMInt32Type(), 4, 0)); 559 } 560 561 switch( reg->Register.File ) { 562 case TGSI_FILE_OUTPUT: 563 lp_exec_mask_store(&bld->exec_mask, value, 564 bld->outputs[reg->Register.Index][chan_index]); 565 break; 566 567 case TGSI_FILE_TEMPORARY: { 568 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 569 chan_index, 570 reg->Register.Indirect, 571 addr); 572 lp_exec_mask_store(&bld->exec_mask, value, temp_ptr); 573 break; 574 } 575 576 case TGSI_FILE_ADDRESS: 577 lp_exec_mask_store(&bld->exec_mask, value, 578 bld->addr[reg->Indirect.Index][chan_index]); 579 break; 580 581 case TGSI_FILE_PREDICATE: 582 /* FIXME */ 583 break; 584 585 default: 586 assert( 0 ); 587 } 588} 589 590 591/** 592 * High-level instruction translators. 593 */ 594 595enum tex_modifier { 596 TEX_MODIFIER_NONE = 0, 597 TEX_MODIFIER_PROJECTED, 598 TEX_MODIFIER_LOD_BIAS, 599 TEX_MODIFIER_EXPLICIT_LOD, 600 TEX_MODIFIER_EXPLICIT_DERIV 601}; 602 603static void 604emit_tex( struct lp_build_tgsi_soa_context *bld, 605 const struct tgsi_full_instruction *inst, 606 enum tex_modifier modifier, 607 LLVMValueRef *texel) 608{ 609 unsigned unit; 610 LLVMValueRef lodbias; 611 LLVMValueRef oow = NULL; 612 LLVMValueRef coords[3]; 613 LLVMValueRef ddx[3]; 614 LLVMValueRef ddy[3]; 615 unsigned num_coords; 616 unsigned i; 617 618 if (!bld->sampler) { 619 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 620 for (i = 0; i < 4; i++) { 621 texel[i] = bld->base.undef; 622 } 623 return; 624 } 625 626 switch (inst->Texture.Texture) { 627 case TGSI_TEXTURE_1D: 628 num_coords = 1; 629 break; 630 case TGSI_TEXTURE_2D: 631 case TGSI_TEXTURE_RECT: 632 num_coords = 2; 633 break; 634 case TGSI_TEXTURE_SHADOW1D: 635 case TGSI_TEXTURE_SHADOW2D: 636 case TGSI_TEXTURE_SHADOWRECT: 637 case TGSI_TEXTURE_3D: 638 case TGSI_TEXTURE_CUBE: 639 num_coords = 3; 640 break; 641 default: 642 assert(0); 643 return; 644 } 645 646 /* FIXME: Treat TEX_MODIFIER_EXPLICIT_LOD correctly */ 647 if (modifier == TEX_MODIFIER_LOD_BIAS || TEX_MODIFIER_EXPLICIT_LOD) 648 lodbias = emit_fetch( bld, inst, 0, 3 ); 649 else 650 lodbias = bld->base.zero; 651 652 if (modifier == TEX_MODIFIER_PROJECTED) { 653 oow = emit_fetch( bld, inst, 0, 3 ); 654 oow = lp_build_rcp(&bld->base, oow); 655 } 656 657 for (i = 0; i < num_coords; i++) { 658 coords[i] = emit_fetch( bld, inst, 0, i ); 659 if (modifier == TEX_MODIFIER_PROJECTED) 660 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 661 } 662 for (i = num_coords; i < 3; i++) { 663 coords[i] = bld->base.undef; 664 } 665 666 if (modifier == TEX_MODIFIER_EXPLICIT_DERIV) { 667 for (i = 0; i < num_coords; i++) { 668 ddx[i] = emit_fetch( bld, inst, 1, i ); 669 ddy[i] = emit_fetch( bld, inst, 2, i ); 670 } 671 unit = inst->Src[3].Register.Index; 672 } else { 673 for (i = 0; i < num_coords; i++) { 674 ddx[i] = emit_ddx( bld, coords[i] ); 675 ddy[i] = emit_ddy( bld, coords[i] ); 676 } 677 unit = inst->Src[1].Register.Index; 678 } 679 680 bld->sampler->emit_fetch_texel(bld->sampler, 681 bld->base.builder, 682 bld->base.type, 683 unit, num_coords, coords, 684 ddx, ddy, lodbias, 685 texel); 686} 687 688 689/** 690 * Kill fragment if any of the src register values are negative. 691 */ 692static void 693emit_kil( 694 struct lp_build_tgsi_soa_context *bld, 695 const struct tgsi_full_instruction *inst ) 696{ 697 const struct tgsi_full_src_register *reg = &inst->Src[0]; 698 LLVMValueRef terms[NUM_CHANNELS]; 699 LLVMValueRef mask; 700 unsigned chan_index; 701 702 memset(&terms, 0, sizeof terms); 703 704 FOR_EACH_CHANNEL( chan_index ) { 705 unsigned swizzle; 706 707 /* Unswizzle channel */ 708 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 709 710 /* Check if the component has not been already tested. */ 711 assert(swizzle < NUM_CHANNELS); 712 if( !terms[swizzle] ) 713 /* TODO: change the comparison operator instead of setting the sign */ 714 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 715 } 716 717 mask = NULL; 718 FOR_EACH_CHANNEL( chan_index ) { 719 if(terms[chan_index]) { 720 LLVMValueRef chan_mask; 721 722 /* 723 * If term < 0 then mask = 0 else mask = ~0. 724 */ 725 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 726 727 if(mask) 728 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); 729 else 730 mask = chan_mask; 731 } 732 } 733 734 if(mask) 735 lp_build_mask_update(bld->mask, mask); 736} 737 738 739/** 740 * Predicated fragment kill. 741 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 742 * The only predication is the execution mask which will apply if 743 * we're inside a loop or conditional. 744 */ 745static void 746emit_kilp(struct lp_build_tgsi_soa_context *bld, 747 const struct tgsi_full_instruction *inst) 748{ 749 LLVMValueRef mask; 750 751 /* For those channels which are "alive", disable fragment shader 752 * execution. 753 */ 754 if (bld->exec_mask.has_mask) { 755 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); 756 } 757 else { 758 mask = bld->base.zero; 759 } 760 761 lp_build_mask_update(bld->mask, mask); 762} 763 764static void 765emit_declaration( 766 struct lp_build_tgsi_soa_context *bld, 767 const struct tgsi_full_declaration *decl) 768{ 769 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); 770 771 unsigned first = decl->Range.First; 772 unsigned last = decl->Range.Last; 773 unsigned idx, i; 774 775 for (idx = first; idx <= last; ++idx) { 776 switch (decl->Declaration.File) { 777 case TGSI_FILE_TEMPORARY: 778 assert(idx < LP_MAX_TGSI_TEMPS); 779 if (bld->has_indirect_addressing) { 780 LLVMValueRef val = LLVMConstInt(LLVMInt32Type(), 781 last*4 + 4, 0); 782 bld->temps_array = lp_build_array_alloca(bld->base.builder, 783 vec_type, val, ""); 784 } else { 785 for (i = 0; i < NUM_CHANNELS; i++) 786 bld->temps[idx][i] = lp_build_alloca(bld->base.builder, 787 vec_type, ""); 788 } 789 break; 790 791 case TGSI_FILE_OUTPUT: 792 for (i = 0; i < NUM_CHANNELS; i++) 793 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, 794 vec_type, ""); 795 break; 796 797 case TGSI_FILE_ADDRESS: 798 assert(idx < LP_MAX_TGSI_ADDRS); 799 for (i = 0; i < NUM_CHANNELS; i++) 800 bld->addr[idx][i] = lp_build_alloca(bld->base.builder, 801 vec_type, ""); 802 break; 803 804 case TGSI_FILE_PREDICATE: 805 _debug_printf("warning: predicate registers not yet implemented\n"); 806 break; 807 808 default: 809 /* don't need to declare other vars */ 810 break; 811 } 812 } 813} 814 815 816/** 817 * Emit LLVM for one TGSI instruction. 818 * \param return TRUE for success, FALSE otherwise 819 */ 820static boolean 821emit_instruction( 822 struct lp_build_tgsi_soa_context *bld, 823 const struct tgsi_full_instruction *inst, 824 const struct tgsi_opcode_info *info) 825{ 826 unsigned chan_index; 827 LLVMValueRef src0, src1, src2; 828 LLVMValueRef tmp0, tmp1, tmp2; 829 LLVMValueRef tmp3 = NULL; 830 LLVMValueRef tmp4 = NULL; 831 LLVMValueRef tmp5 = NULL; 832 LLVMValueRef tmp6 = NULL; 833 LLVMValueRef tmp7 = NULL; 834 LLVMValueRef res; 835 LLVMValueRef dst0[NUM_CHANNELS]; 836 837 /* 838 * Stores and write masks are handled in a general fashion after the long 839 * instruction opcode switch statement. 840 * 841 * Although not stricitly necessary, we avoid generating instructions for 842 * channels which won't be stored, in cases where's that easy. For some 843 * complex instructions, like texture sampling, it is more convenient to 844 * assume a full writemask and then let LLVM optimization passes eliminate 845 * redundant code. 846 */ 847 848 assert(info->num_dst <= 1); 849 if(info->num_dst) { 850 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 851 dst0[chan_index] = bld->base.undef; 852 } 853 } 854 855 switch (inst->Instruction.Opcode) { 856 case TGSI_OPCODE_ARL: 857 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 858 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 859 tmp0 = lp_build_floor(&bld->base, tmp0); 860 dst0[chan_index] = tmp0; 861 } 862 break; 863 864 case TGSI_OPCODE_MOV: 865 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 866 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 867 } 868 break; 869 870 case TGSI_OPCODE_LIT: 871 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 872 dst0[CHAN_X] = bld->base.one; 873 } 874 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 875 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 876 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 877 } 878 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 879 /* XMM[1] = SrcReg[0].yyyy */ 880 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 881 /* XMM[1] = max(XMM[1], 0) */ 882 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 883 /* XMM[2] = SrcReg[0].wwww */ 884 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 885 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 886 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 887 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 888 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 889 } 890 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 891 dst0[CHAN_W] = bld->base.one; 892 } 893 break; 894 895 case TGSI_OPCODE_RCP: 896 /* TGSI_OPCODE_RECIP */ 897 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 898 res = lp_build_rcp(&bld->base, src0); 899 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 900 dst0[chan_index] = res; 901 } 902 break; 903 904 case TGSI_OPCODE_RSQ: 905 /* TGSI_OPCODE_RECIPSQRT */ 906 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 907 src0 = lp_build_abs(&bld->base, src0); 908 res = lp_build_rsqrt(&bld->base, src0); 909 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 910 dst0[chan_index] = res; 911 } 912 break; 913 914 case TGSI_OPCODE_EXP: 915 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 916 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 917 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 918 LLVMValueRef *p_exp2_int_part = NULL; 919 LLVMValueRef *p_frac_part = NULL; 920 LLVMValueRef *p_exp2 = NULL; 921 922 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 923 924 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 925 p_exp2_int_part = &tmp0; 926 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 927 p_frac_part = &tmp1; 928 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 929 p_exp2 = &tmp2; 930 931 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 932 933 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 934 dst0[CHAN_X] = tmp0; 935 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 936 dst0[CHAN_Y] = tmp1; 937 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 938 dst0[CHAN_Z] = tmp2; 939 } 940 /* dst.w = 1.0 */ 941 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 942 dst0[CHAN_W] = bld->base.one; 943 } 944 break; 945 946 case TGSI_OPCODE_LOG: 947 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 948 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 949 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 950 LLVMValueRef *p_floor_log2 = NULL; 951 LLVMValueRef *p_exp = NULL; 952 LLVMValueRef *p_log2 = NULL; 953 954 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 955 src0 = lp_build_abs( &bld->base, src0 ); 956 957 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 958 p_floor_log2 = &tmp0; 959 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 960 p_exp = &tmp1; 961 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 962 p_log2 = &tmp2; 963 964 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 965 966 /* dst.x = floor(lg2(abs(src.x))) */ 967 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 968 dst0[CHAN_X] = tmp0; 969 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 970 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 971 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 972 } 973 /* dst.z = lg2(abs(src.x)) */ 974 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 975 dst0[CHAN_Z] = tmp2; 976 } 977 /* dst.w = 1.0 */ 978 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 979 dst0[CHAN_W] = bld->base.one; 980 } 981 break; 982 983 case TGSI_OPCODE_MUL: 984 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 985 src0 = emit_fetch( bld, inst, 0, chan_index ); 986 src1 = emit_fetch( bld, inst, 1, chan_index ); 987 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 988 } 989 break; 990 991 case TGSI_OPCODE_ADD: 992 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 993 src0 = emit_fetch( bld, inst, 0, chan_index ); 994 src1 = emit_fetch( bld, inst, 1, chan_index ); 995 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 996 } 997 break; 998 999 case TGSI_OPCODE_DP3: 1000 /* TGSI_OPCODE_DOT3 */ 1001 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1002 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1003 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1004 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1005 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1006 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1007 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1008 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1009 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1010 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1011 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1012 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1013 dst0[chan_index] = tmp0; 1014 } 1015 break; 1016 1017 case TGSI_OPCODE_DP4: 1018 /* TGSI_OPCODE_DOT4 */ 1019 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1020 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1021 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1022 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1023 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1024 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1025 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1026 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1027 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1028 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1029 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1030 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1031 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1032 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1033 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1034 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1035 dst0[chan_index] = tmp0; 1036 } 1037 break; 1038 1039 case TGSI_OPCODE_DST: 1040 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1041 dst0[CHAN_X] = bld->base.one; 1042 } 1043 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1044 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1045 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1046 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1047 } 1048 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1049 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1050 } 1051 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1052 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1053 } 1054 break; 1055 1056 case TGSI_OPCODE_MIN: 1057 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1058 src0 = emit_fetch( bld, inst, 0, chan_index ); 1059 src1 = emit_fetch( bld, inst, 1, chan_index ); 1060 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1061 } 1062 break; 1063 1064 case TGSI_OPCODE_MAX: 1065 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1066 src0 = emit_fetch( bld, inst, 0, chan_index ); 1067 src1 = emit_fetch( bld, inst, 1, chan_index ); 1068 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1069 } 1070 break; 1071 1072 case TGSI_OPCODE_SLT: 1073 /* TGSI_OPCODE_SETLT */ 1074 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1075 src0 = emit_fetch( bld, inst, 0, chan_index ); 1076 src1 = emit_fetch( bld, inst, 1, chan_index ); 1077 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1078 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1079 } 1080 break; 1081 1082 case TGSI_OPCODE_SGE: 1083 /* TGSI_OPCODE_SETGE */ 1084 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1085 src0 = emit_fetch( bld, inst, 0, chan_index ); 1086 src1 = emit_fetch( bld, inst, 1, chan_index ); 1087 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1088 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1089 } 1090 break; 1091 1092 case TGSI_OPCODE_MAD: 1093 /* TGSI_OPCODE_MADD */ 1094 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1095 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1096 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1097 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1098 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1099 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1100 dst0[chan_index] = tmp0; 1101 } 1102 break; 1103 1104 case TGSI_OPCODE_SUB: 1105 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1106 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1107 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1108 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1109 } 1110 break; 1111 1112 case TGSI_OPCODE_LRP: 1113 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1114 src0 = emit_fetch( bld, inst, 0, chan_index ); 1115 src1 = emit_fetch( bld, inst, 1, chan_index ); 1116 src2 = emit_fetch( bld, inst, 2, chan_index ); 1117 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1118 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1119 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1120 } 1121 break; 1122 1123 case TGSI_OPCODE_CND: 1124 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1125 src0 = emit_fetch( bld, inst, 0, chan_index ); 1126 src1 = emit_fetch( bld, inst, 1, chan_index ); 1127 src2 = emit_fetch( bld, inst, 2, chan_index ); 1128 tmp1 = lp_build_const_vec(bld->base.type, 0.5); 1129 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1130 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1131 } 1132 break; 1133 1134 case TGSI_OPCODE_DP2A: 1135 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1136 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1137 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1138 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1139 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1140 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1141 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1142 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1143 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1144 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1145 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1146 } 1147 break; 1148 1149 case TGSI_OPCODE_FRC: 1150 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1151 src0 = emit_fetch( bld, inst, 0, chan_index ); 1152 tmp0 = lp_build_floor(&bld->base, src0); 1153 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1154 dst0[chan_index] = tmp0; 1155 } 1156 break; 1157 1158 case TGSI_OPCODE_CLAMP: 1159 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1160 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1161 src1 = emit_fetch( bld, inst, 1, chan_index ); 1162 src2 = emit_fetch( bld, inst, 2, chan_index ); 1163 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1164 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1165 dst0[chan_index] = tmp0; 1166 } 1167 break; 1168 1169 case TGSI_OPCODE_FLR: 1170 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1171 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1172 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1173 } 1174 break; 1175 1176 case TGSI_OPCODE_ROUND: 1177 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1178 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1179 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1180 } 1181 break; 1182 1183 case TGSI_OPCODE_EX2: { 1184 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1185 tmp0 = lp_build_exp2( &bld->base, tmp0); 1186 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1187 dst0[chan_index] = tmp0; 1188 } 1189 break; 1190 } 1191 1192 case TGSI_OPCODE_LG2: 1193 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1194 tmp0 = lp_build_log2( &bld->base, tmp0); 1195 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1196 dst0[chan_index] = tmp0; 1197 } 1198 break; 1199 1200 case TGSI_OPCODE_POW: 1201 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1202 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1203 res = lp_build_pow( &bld->base, src0, src1 ); 1204 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1205 dst0[chan_index] = res; 1206 } 1207 break; 1208 1209 case TGSI_OPCODE_XPD: 1210 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1211 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1212 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1213 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1214 } 1215 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1216 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1217 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1218 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1219 } 1220 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1221 tmp2 = tmp0; 1222 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1223 tmp5 = tmp3; 1224 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1225 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1226 dst0[CHAN_X] = tmp2; 1227 } 1228 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1229 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1230 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1231 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1232 } 1233 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1234 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1235 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1236 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1237 dst0[CHAN_Y] = tmp3; 1238 } 1239 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1240 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1241 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1242 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1243 dst0[CHAN_Z] = tmp5; 1244 } 1245 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1246 dst0[CHAN_W] = bld->base.one; 1247 } 1248 break; 1249 1250 case TGSI_OPCODE_ABS: 1251 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1252 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1253 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1254 } 1255 break; 1256 1257 case TGSI_OPCODE_RCC: 1258 /* deprecated? */ 1259 assert(0); 1260 return FALSE; 1261 1262 case TGSI_OPCODE_DPH: 1263 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1264 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1265 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1266 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1267 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1268 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1269 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1270 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1271 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1272 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1273 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1274 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1275 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1276 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1277 dst0[chan_index] = tmp0; 1278 } 1279 break; 1280 1281 case TGSI_OPCODE_COS: 1282 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1283 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1284 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1285 dst0[chan_index] = tmp0; 1286 } 1287 break; 1288 1289 case TGSI_OPCODE_DDX: 1290 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1291 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1292 } 1293 break; 1294 1295 case TGSI_OPCODE_DDY: 1296 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1297 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1298 } 1299 break; 1300 1301 case TGSI_OPCODE_KILP: 1302 /* predicated kill */ 1303 emit_kilp( bld, inst ); 1304 break; 1305 1306 case TGSI_OPCODE_KIL: 1307 /* conditional kill */ 1308 emit_kil( bld, inst ); 1309 break; 1310 1311 case TGSI_OPCODE_PK2H: 1312 return FALSE; 1313 break; 1314 1315 case TGSI_OPCODE_PK2US: 1316 return FALSE; 1317 break; 1318 1319 case TGSI_OPCODE_PK4B: 1320 return FALSE; 1321 break; 1322 1323 case TGSI_OPCODE_PK4UB: 1324 return FALSE; 1325 break; 1326 1327 case TGSI_OPCODE_RFL: 1328 return FALSE; 1329 break; 1330 1331 case TGSI_OPCODE_SEQ: 1332 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1333 src0 = emit_fetch( bld, inst, 0, chan_index ); 1334 src1 = emit_fetch( bld, inst, 1, chan_index ); 1335 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1336 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1337 } 1338 break; 1339 1340 case TGSI_OPCODE_SFL: 1341 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1342 dst0[chan_index] = bld->base.zero; 1343 } 1344 break; 1345 1346 case TGSI_OPCODE_SGT: 1347 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1348 src0 = emit_fetch( bld, inst, 0, chan_index ); 1349 src1 = emit_fetch( bld, inst, 1, chan_index ); 1350 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1351 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1352 } 1353 break; 1354 1355 case TGSI_OPCODE_SIN: 1356 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1357 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1358 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1359 dst0[chan_index] = tmp0; 1360 } 1361 break; 1362 1363 case TGSI_OPCODE_SLE: 1364 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1365 src0 = emit_fetch( bld, inst, 0, chan_index ); 1366 src1 = emit_fetch( bld, inst, 1, chan_index ); 1367 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1368 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1369 } 1370 break; 1371 1372 case TGSI_OPCODE_SNE: 1373 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1374 src0 = emit_fetch( bld, inst, 0, chan_index ); 1375 src1 = emit_fetch( bld, inst, 1, chan_index ); 1376 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1377 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1378 } 1379 break; 1380 1381 case TGSI_OPCODE_STR: 1382 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1383 dst0[chan_index] = bld->base.one; 1384 } 1385 break; 1386 1387 case TGSI_OPCODE_TEX: 1388 emit_tex( bld, inst, TEX_MODIFIER_NONE, dst0 ); 1389 break; 1390 1391 case TGSI_OPCODE_TXD: 1392 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1393 break; 1394 1395 case TGSI_OPCODE_UP2H: 1396 /* deprecated */ 1397 assert (0); 1398 return FALSE; 1399 break; 1400 1401 case TGSI_OPCODE_UP2US: 1402 /* deprecated */ 1403 assert(0); 1404 return FALSE; 1405 break; 1406 1407 case TGSI_OPCODE_UP4B: 1408 /* deprecated */ 1409 assert(0); 1410 return FALSE; 1411 break; 1412 1413 case TGSI_OPCODE_UP4UB: 1414 /* deprecated */ 1415 assert(0); 1416 return FALSE; 1417 break; 1418 1419 case TGSI_OPCODE_X2D: 1420 /* deprecated? */ 1421 assert(0); 1422 return FALSE; 1423 break; 1424 1425 case TGSI_OPCODE_ARA: 1426 /* deprecated */ 1427 assert(0); 1428 return FALSE; 1429 break; 1430 1431 case TGSI_OPCODE_ARR: 1432 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1433 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1434 tmp0 = lp_build_round(&bld->base, tmp0); 1435 dst0[chan_index] = tmp0; 1436 } 1437 break; 1438 1439 case TGSI_OPCODE_BRA: 1440 /* deprecated */ 1441 assert(0); 1442 return FALSE; 1443 break; 1444 1445 case TGSI_OPCODE_CAL: 1446 /* FIXME */ 1447 return FALSE; 1448 break; 1449 1450 case TGSI_OPCODE_RET: 1451 /* FIXME */ 1452 return FALSE; 1453 break; 1454 1455 case TGSI_OPCODE_END: 1456 break; 1457 1458 case TGSI_OPCODE_SSG: 1459 /* TGSI_OPCODE_SGN */ 1460 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1461 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1462 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 1463 } 1464 break; 1465 1466 case TGSI_OPCODE_CMP: 1467 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1468 src0 = emit_fetch( bld, inst, 0, chan_index ); 1469 src1 = emit_fetch( bld, inst, 1, chan_index ); 1470 src2 = emit_fetch( bld, inst, 2, chan_index ); 1471 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 1472 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 1473 } 1474 break; 1475 1476 case TGSI_OPCODE_SCS: 1477 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1478 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1479 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 1480 } 1481 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1482 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1483 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 1484 } 1485 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1486 dst0[CHAN_Z] = bld->base.zero; 1487 } 1488 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1489 dst0[CHAN_W] = bld->base.one; 1490 } 1491 break; 1492 1493 case TGSI_OPCODE_TXB: 1494 emit_tex( bld, inst, TEX_MODIFIER_LOD_BIAS, dst0 ); 1495 break; 1496 1497 case TGSI_OPCODE_NRM: 1498 /* fall-through */ 1499 case TGSI_OPCODE_NRM4: 1500 /* 3 or 4-component normalization */ 1501 { 1502 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1503 1504 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 1505 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 1506 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 1507 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 1508 1509 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1510 1511 /* xmm4 = src.x */ 1512 /* xmm0 = src.x * src.x */ 1513 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1514 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1515 tmp4 = tmp0; 1516 } 1517 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 1518 1519 /* xmm5 = src.y */ 1520 /* xmm0 = xmm0 + src.y * src.y */ 1521 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 1522 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1523 tmp5 = tmp1; 1524 } 1525 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1526 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1527 1528 /* xmm6 = src.z */ 1529 /* xmm0 = xmm0 + src.z * src.z */ 1530 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 1531 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1532 tmp6 = tmp1; 1533 } 1534 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1535 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1536 1537 if (dims == 4) { 1538 /* xmm7 = src.w */ 1539 /* xmm0 = xmm0 + src.w * src.w */ 1540 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 1541 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 1542 tmp7 = tmp1; 1543 } 1544 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1545 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1546 } 1547 1548 /* xmm1 = 1 / sqrt(xmm0) */ 1549 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 1550 1551 /* dst.x = xmm1 * src.x */ 1552 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1553 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 1554 } 1555 1556 /* dst.y = xmm1 * src.y */ 1557 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1558 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 1559 } 1560 1561 /* dst.z = xmm1 * src.z */ 1562 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1563 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 1564 } 1565 1566 /* dst.w = xmm1 * src.w */ 1567 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 1568 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 1569 } 1570 } 1571 1572 /* dst.w = 1.0 */ 1573 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 1574 dst0[CHAN_W] = bld->base.one; 1575 } 1576 } 1577 break; 1578 1579 case TGSI_OPCODE_DIV: 1580 /* deprecated */ 1581 assert( 0 ); 1582 return FALSE; 1583 break; 1584 1585 case TGSI_OPCODE_DP2: 1586 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1587 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1588 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1589 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1590 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1591 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1592 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1593 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1594 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1595 } 1596 break; 1597 1598 case TGSI_OPCODE_TXL: 1599 emit_tex( bld, inst, TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 1600 break; 1601 1602 case TGSI_OPCODE_TXP: 1603 emit_tex( bld, inst, TEX_MODIFIER_PROJECTED, dst0 ); 1604 break; 1605 1606 case TGSI_OPCODE_BRK: 1607 lp_exec_break(&bld->exec_mask); 1608 break; 1609 1610 case TGSI_OPCODE_IF: 1611 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1612 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 1613 tmp0, bld->base.zero); 1614 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 1615 break; 1616 1617 case TGSI_OPCODE_BGNLOOP: 1618 lp_exec_bgnloop(&bld->exec_mask); 1619 break; 1620 1621 case TGSI_OPCODE_ELSE: 1622 lp_exec_mask_cond_invert(&bld->exec_mask); 1623 break; 1624 1625 case TGSI_OPCODE_ENDIF: 1626 lp_exec_mask_cond_pop(&bld->exec_mask); 1627 break; 1628 1629 case TGSI_OPCODE_ENDLOOP: 1630 lp_exec_endloop(&bld->exec_mask); 1631 break; 1632 1633 case TGSI_OPCODE_PUSHA: 1634 /* deprecated? */ 1635 assert(0); 1636 return FALSE; 1637 break; 1638 1639 case TGSI_OPCODE_POPA: 1640 /* deprecated? */ 1641 assert(0); 1642 return FALSE; 1643 break; 1644 1645 case TGSI_OPCODE_CEIL: 1646 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1647 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1648 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 1649 } 1650 break; 1651 1652 case TGSI_OPCODE_I2F: 1653 /* deprecated? */ 1654 assert(0); 1655 return FALSE; 1656 break; 1657 1658 case TGSI_OPCODE_NOT: 1659 /* deprecated? */ 1660 assert(0); 1661 return FALSE; 1662 break; 1663 1664 case TGSI_OPCODE_TRUNC: 1665 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1666 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1667 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 1668 } 1669 break; 1670 1671 case TGSI_OPCODE_SHL: 1672 /* deprecated? */ 1673 assert(0); 1674 return FALSE; 1675 break; 1676 1677 case TGSI_OPCODE_ISHR: 1678 /* deprecated? */ 1679 assert(0); 1680 return FALSE; 1681 break; 1682 1683 case TGSI_OPCODE_AND: 1684 /* deprecated? */ 1685 assert(0); 1686 return FALSE; 1687 break; 1688 1689 case TGSI_OPCODE_OR: 1690 /* deprecated? */ 1691 assert(0); 1692 return FALSE; 1693 break; 1694 1695 case TGSI_OPCODE_MOD: 1696 /* deprecated? */ 1697 assert(0); 1698 return FALSE; 1699 break; 1700 1701 case TGSI_OPCODE_XOR: 1702 /* deprecated? */ 1703 assert(0); 1704 return FALSE; 1705 break; 1706 1707 case TGSI_OPCODE_SAD: 1708 /* deprecated? */ 1709 assert(0); 1710 return FALSE; 1711 break; 1712 1713 case TGSI_OPCODE_TXF: 1714 /* deprecated? */ 1715 assert(0); 1716 return FALSE; 1717 break; 1718 1719 case TGSI_OPCODE_TXQ: 1720 /* deprecated? */ 1721 assert(0); 1722 return FALSE; 1723 break; 1724 1725 case TGSI_OPCODE_CONT: 1726 lp_exec_continue(&bld->exec_mask); 1727 break; 1728 1729 case TGSI_OPCODE_EMIT: 1730 return FALSE; 1731 break; 1732 1733 case TGSI_OPCODE_ENDPRIM: 1734 return FALSE; 1735 break; 1736 1737 case TGSI_OPCODE_NOP: 1738 break; 1739 1740 default: 1741 return FALSE; 1742 } 1743 1744 if(info->num_dst) { 1745 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1746 emit_store( bld, inst, 0, chan_index, dst0[chan_index]); 1747 } 1748 } 1749 1750 return TRUE; 1751} 1752 1753 1754void 1755lp_build_tgsi_soa(LLVMBuilderRef builder, 1756 const struct tgsi_token *tokens, 1757 struct lp_type type, 1758 struct lp_build_mask_context *mask, 1759 LLVMValueRef consts_ptr, 1760 const LLVMValueRef *pos, 1761 const LLVMValueRef (*inputs)[NUM_CHANNELS], 1762 LLVMValueRef (*outputs)[NUM_CHANNELS], 1763 struct lp_build_sampler_soa *sampler, 1764 struct tgsi_shader_info *info) 1765{ 1766 struct lp_build_tgsi_soa_context bld; 1767 struct tgsi_parse_context parse; 1768 uint num_immediates = 0; 1769 unsigned i; 1770 1771 /* Setup build context */ 1772 memset(&bld, 0, sizeof bld); 1773 lp_build_context_init(&bld.base, builder, type); 1774 bld.mask = mask; 1775 bld.pos = pos; 1776 bld.inputs = inputs; 1777 bld.outputs = outputs; 1778 bld.consts_ptr = consts_ptr; 1779 bld.sampler = sampler; 1780 bld.has_indirect_addressing = info->opcode_count[TGSI_OPCODE_ARR] > 0 || 1781 info->opcode_count[TGSI_OPCODE_ARL] > 0; 1782 1783 lp_exec_mask_init(&bld.exec_mask, &bld.base); 1784 1785 tgsi_parse_init( &parse, tokens ); 1786 1787 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1788 tgsi_parse_token( &parse ); 1789 1790 switch( parse.FullToken.Token.Type ) { 1791 case TGSI_TOKEN_TYPE_DECLARATION: 1792 /* Inputs already interpolated */ 1793 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 1794 break; 1795 1796 case TGSI_TOKEN_TYPE_INSTRUCTION: 1797 { 1798 unsigned opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 1799 const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(opcode); 1800 if (!emit_instruction( &bld, &parse.FullToken.FullInstruction, opcode_info )) 1801 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 1802 opcode_info->mnemonic); 1803 } 1804 1805 break; 1806 1807 case TGSI_TOKEN_TYPE_IMMEDIATE: 1808 /* simply copy the immediate values into the next immediates[] slot */ 1809 { 1810 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1811 assert(size <= 4); 1812 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 1813 for( i = 0; i < size; ++i ) 1814 bld.immediates[num_immediates][i] = 1815 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); 1816 for( i = size; i < 4; ++i ) 1817 bld.immediates[num_immediates][i] = bld.base.undef; 1818 num_immediates++; 1819 } 1820 break; 1821 1822 case TGSI_TOKEN_TYPE_PROPERTY: 1823 break; 1824 1825 default: 1826 assert( 0 ); 1827 } 1828 } 1829 if (0) { 1830 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); 1831 LLVMValueRef function = LLVMGetBasicBlockParent(block); 1832 debug_printf("11111111111111111111111111111 \n"); 1833 tgsi_dump(tokens, 0); 1834 LLVMDumpValue(function); 1835 debug_printf("2222222222222222222222222222 \n"); 1836 } 1837 tgsi_parse_free( &parse ); 1838} 1839 1840