lp_bld_tgsi_soa.c revision 17dbd41cf23e7e7de2f27e5e9252d7f792d932f3
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_scan.h" 49#include "lp_bld_type.h" 50#include "lp_bld_const.h" 51#include "lp_bld_arit.h" 52#include "lp_bld_bitarit.h" 53#include "lp_bld_gather.h" 54#include "lp_bld_logic.h" 55#include "lp_bld_swizzle.h" 56#include "lp_bld_flow.h" 57#include "lp_bld_quad.h" 58#include "lp_bld_tgsi.h" 59#include "lp_bld_limits.h" 60#include "lp_bld_debug.h" 61 62 63#define FOR_EACH_CHANNEL( CHAN )\ 64 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 65 66#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 67 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 68 69#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 70 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 71 72#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 73 FOR_EACH_CHANNEL( CHAN )\ 74 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 75 76#define CHAN_X 0 77#define CHAN_Y 1 78#define CHAN_Z 2 79#define CHAN_W 3 80#define NUM_CHANNELS 4 81 82#define LP_MAX_INSTRUCTIONS 256 83 84 85struct lp_exec_mask { 86 struct lp_build_context *bld; 87 88 boolean has_mask; 89 90 LLVMTypeRef int_vec_type; 91 92 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 93 int cond_stack_size; 94 LLVMValueRef cond_mask; 95 96 LLVMBasicBlockRef loop_block; 97 LLVMValueRef cont_mask; 98 LLVMValueRef break_mask; 99 LLVMValueRef break_var; 100 struct { 101 LLVMBasicBlockRef loop_block; 102 LLVMValueRef cont_mask; 103 LLVMValueRef break_mask; 104 LLVMValueRef break_var; 105 } loop_stack[LP_MAX_TGSI_NESTING]; 106 int loop_stack_size; 107 108 LLVMValueRef ret_mask; 109 struct { 110 int pc; 111 LLVMValueRef ret_mask; 112 } call_stack[LP_MAX_TGSI_NESTING]; 113 int call_stack_size; 114 115 LLVMValueRef exec_mask; 116}; 117 118struct lp_build_tgsi_soa_context 119{ 120 struct lp_build_context base; 121 122 /* Builder for integer masks and indices */ 123 struct lp_build_context uint_bld; 124 125 LLVMValueRef consts_ptr; 126 const LLVMValueRef *pos; 127 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 128 LLVMValueRef (*outputs)[NUM_CHANNELS]; 129 130 const struct lp_build_sampler_soa *sampler; 131 132 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 133 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 134 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 135 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; 136 137 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 138 * set in the indirect_files field. 139 * The temps[] array above is unused then. 140 */ 141 LLVMValueRef temps_array; 142 143 const struct tgsi_shader_info *info; 144 /** bitmask indicating which register files are accessed indirectly */ 145 unsigned indirect_files; 146 147 struct lp_build_mask_context *mask; 148 struct lp_exec_mask exec_mask; 149 150 struct tgsi_full_instruction *instructions; 151 uint max_instructions; 152}; 153 154static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 155{ 156 mask->bld = bld; 157 mask->has_mask = FALSE; 158 mask->cond_stack_size = 0; 159 mask->loop_stack_size = 0; 160 mask->call_stack_size = 0; 161 162 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); 163 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 164 LLVMConstAllOnes(mask->int_vec_type); 165} 166 167static void lp_exec_mask_update(struct lp_exec_mask *mask) 168{ 169 if (mask->loop_stack_size) { 170 /*for loops we need to update the entire mask at runtime */ 171 LLVMValueRef tmp; 172 assert(mask->break_mask); 173 tmp = LLVMBuildAnd(mask->bld->builder, 174 mask->cont_mask, 175 mask->break_mask, 176 "maskcb"); 177 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 178 mask->cond_mask, 179 tmp, 180 "maskfull"); 181 } else 182 mask->exec_mask = mask->cond_mask; 183 184 if (mask->call_stack_size) { 185 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 186 mask->exec_mask, 187 mask->ret_mask, 188 "callmask"); 189 } 190 191 mask->has_mask = (mask->cond_stack_size > 0 || 192 mask->loop_stack_size > 0 || 193 mask->call_stack_size > 0); 194} 195 196static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 197 LLVMValueRef val) 198{ 199 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 200 if (mask->cond_stack_size == 0) { 201 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 202 } 203 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 204 assert(LLVMTypeOf(val) == mask->int_vec_type); 205 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 206 mask->cond_mask, 207 val, 208 ""); 209 lp_exec_mask_update(mask); 210} 211 212static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 213{ 214 LLVMValueRef prev_mask; 215 LLVMValueRef inv_mask; 216 217 assert(mask->cond_stack_size); 218 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 219 if (mask->cond_stack_size == 1) { 220 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 221 } 222 223 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, ""); 224 225 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 226 inv_mask, 227 prev_mask, ""); 228 lp_exec_mask_update(mask); 229} 230 231static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 232{ 233 assert(mask->cond_stack_size); 234 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 235 lp_exec_mask_update(mask); 236} 237 238static void lp_exec_bgnloop(struct lp_exec_mask *mask) 239{ 240 if (mask->loop_stack_size == 0) { 241 assert(mask->loop_block == NULL); 242 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 243 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 244 assert(mask->break_var == NULL); 245 } 246 247 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 248 249 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 250 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 251 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 252 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 253 ++mask->loop_stack_size; 254 255 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, ""); 256 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 257 258 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); 259 LLVMBuildBr(mask->bld->builder, mask->loop_block); 260 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); 261 262 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, ""); 263 264 lp_exec_mask_update(mask); 265} 266 267static void lp_exec_break(struct lp_exec_mask *mask) 268{ 269 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 270 mask->exec_mask, 271 "break"); 272 273 mask->break_mask = LLVMBuildAnd(mask->bld->builder, 274 mask->break_mask, 275 exec_mask, "break_full"); 276 277 lp_exec_mask_update(mask); 278} 279 280static void lp_exec_continue(struct lp_exec_mask *mask) 281{ 282 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 283 mask->exec_mask, 284 ""); 285 286 mask->cont_mask = LLVMBuildAnd(mask->bld->builder, 287 mask->cont_mask, 288 exec_mask, ""); 289 290 lp_exec_mask_update(mask); 291} 292 293 294static void lp_exec_endloop(struct lp_exec_mask *mask) 295{ 296 LLVMBasicBlockRef endloop; 297 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* 298 mask->bld->type.length); 299 LLVMValueRef i1cond; 300 301 assert(mask->break_mask); 302 303 /* 304 * Restore the cont_mask, but don't pop 305 */ 306 assert(mask->loop_stack_size); 307 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 308 lp_exec_mask_update(mask); 309 310 /* 311 * Unlike the continue mask, the break_mask must be preserved across loop 312 * iterations 313 */ 314 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 315 316 /* i1cond = (mask == 0) */ 317 i1cond = LLVMBuildICmp( 318 mask->bld->builder, 319 LLVMIntNE, 320 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""), 321 LLVMConstNull(reg_type), ""); 322 323 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); 324 325 LLVMBuildCondBr(mask->bld->builder, 326 i1cond, mask->loop_block, endloop); 327 328 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); 329 330 assert(mask->loop_stack_size); 331 --mask->loop_stack_size; 332 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 333 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 334 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 335 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 336 337 lp_exec_mask_update(mask); 338} 339 340/* stores val into an address pointed to by dst. 341 * mask->exec_mask is used to figure out which bits of val 342 * should be stored into the address 343 * (0 means don't store this bit, 1 means do store). 344 */ 345static void lp_exec_mask_store(struct lp_exec_mask *mask, 346 LLVMValueRef pred, 347 LLVMValueRef val, 348 LLVMValueRef dst) 349{ 350 /* Mix the predicate and execution mask */ 351 if (mask->has_mask) { 352 if (pred) { 353 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); 354 } else { 355 pred = mask->exec_mask; 356 } 357 } 358 359 if (pred) { 360 LLVMValueRef real_val, dst_val; 361 362 dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); 363 real_val = lp_build_select(mask->bld, 364 pred, 365 val, dst_val); 366 367 LLVMBuildStore(mask->bld->builder, real_val, dst); 368 } else 369 LLVMBuildStore(mask->bld->builder, val, dst); 370} 371 372static void lp_exec_mask_call(struct lp_exec_mask *mask, 373 int func, 374 int *pc) 375{ 376 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 377 mask->call_stack[mask->call_stack_size].pc = *pc; 378 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 379 mask->call_stack_size++; 380 *pc = func; 381} 382 383static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 384{ 385 LLVMValueRef exec_mask; 386 387 if (mask->call_stack_size == 0) { 388 /* returning from main() */ 389 *pc = -1; 390 return; 391 } 392 exec_mask = LLVMBuildNot(mask->bld->builder, 393 mask->exec_mask, 394 "ret"); 395 396 mask->ret_mask = LLVMBuildAnd(mask->bld->builder, 397 mask->ret_mask, 398 exec_mask, "ret_full"); 399 400 lp_exec_mask_update(mask); 401} 402 403static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 404{ 405} 406 407static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 408{ 409 assert(mask->call_stack_size); 410 mask->call_stack_size--; 411 *pc = mask->call_stack[mask->call_stack_size].pc; 412 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 413 lp_exec_mask_update(mask); 414} 415 416 417/** 418 * Return pointer to a temporary register channel (src or dest). 419 * Note that indirect addressing cannot be handled here. 420 * \param index which temporary register 421 * \param chan which channel of the temp register. 422 */ 423static LLVMValueRef 424get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 425 unsigned index, 426 unsigned chan) 427{ 428 assert(chan < 4); 429 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 430 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); 431 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); 432 } 433 else { 434 return bld->temps[index][chan]; 435 } 436} 437 438 439/** 440 * Gather vector. 441 * XXX the lp_build_gather() function should be capable of doing this 442 * with a little work. 443 */ 444static LLVMValueRef 445build_gather(struct lp_build_tgsi_soa_context *bld, 446 LLVMValueRef base_ptr, 447 LLVMValueRef indexes) 448{ 449 LLVMValueRef res = bld->base.undef; 450 unsigned i; 451 452 /* 453 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 454 */ 455 for (i = 0; i < bld->base.type.length; i++) { 456 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); 457 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder, 458 indexes, ii, ""); 459 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr, 460 &index, 1, ""); 461 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 462 463 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, ""); 464 } 465 466 return res; 467} 468 469 470/** 471 * Read the current value of the ADDR register, convert the floats to 472 * ints, multiply by four and return the vector of offsets. 473 * The offsets will be used to index into the constant buffer or 474 * temporary register file. 475 */ 476static LLVMValueRef 477get_indirect_index(struct lp_build_tgsi_soa_context *bld, 478 unsigned reg_file, unsigned reg_index, 479 const struct tgsi_src_register *indirect_reg) 480{ 481 struct lp_build_context *uint_bld = &bld->uint_bld; 482 /* always use X component of address register */ 483 unsigned swizzle = indirect_reg->SwizzleX; 484 LLVMValueRef base; 485 LLVMValueRef rel; 486 LLVMValueRef max_index; 487 LLVMValueRef index; 488 489 assert(bld->indirect_files & (1 << reg_file)); 490 491 base = lp_build_const_int_vec(uint_bld->type, reg_index); 492 493 assert(swizzle < 4); 494 rel = LLVMBuildLoad(bld->base.builder, 495 bld->addr[indirect_reg->Index][swizzle], 496 "load addr reg"); 497 498 /* for indexing we want integers */ 499 rel = LLVMBuildFPToSI(bld->base.builder, 500 rel, 501 uint_bld->vec_type, ""); 502 503 index = lp_build_add(uint_bld, base, rel); 504 505 max_index = lp_build_const_int_vec(uint_bld->type, 506 bld->info->file_max[reg_file]); 507 508 assert(!uint_bld->type.sign); 509 index = lp_build_min(uint_bld, index, max_index); 510 511 return index; 512} 513 514 515/** 516 * Register fetch. 517 */ 518static LLVMValueRef 519emit_fetch( 520 struct lp_build_tgsi_soa_context *bld, 521 const struct tgsi_full_instruction *inst, 522 unsigned src_op, 523 const unsigned chan_index ) 524{ 525 struct lp_build_context *uint_bld = &bld->uint_bld; 526 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 527 const unsigned swizzle = 528 tgsi_util_get_full_src_register_swizzle(reg, chan_index); 529 LLVMValueRef res; 530 LLVMValueRef indirect_index = NULL; 531 532 if (swizzle > 3) { 533 assert(0 && "invalid swizzle in emit_fetch()"); 534 return bld->base.undef; 535 } 536 537 if (reg->Register.Indirect) { 538 indirect_index = get_indirect_index(bld, 539 reg->Register.File, 540 reg->Register.Index, 541 ®->Indirect); 542 } else { 543 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 544 } 545 546 switch (reg->Register.File) { 547 case TGSI_FILE_CONSTANT: 548 if (reg->Register.Indirect) { 549 LLVMValueRef swizzle_vec = 550 lp_build_const_int_vec(uint_bld->type, swizzle); 551 LLVMValueRef index_vec; /* index into the const buffer */ 552 553 /* index_vec = indirect_index * 4 + swizzle */ 554 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 555 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 556 557 /* Gather values from the constant buffer */ 558 res = build_gather(bld, bld->consts_ptr, index_vec); 559 } 560 else { 561 LLVMValueRef index; /* index into the const buffer */ 562 LLVMValueRef scalar, scalar_ptr; 563 564 index = lp_build_const_int32(reg->Register.Index*4 + swizzle); 565 566 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, 567 &index, 1, ""); 568 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 569 570 res = lp_build_broadcast_scalar(&bld->base, scalar); 571 } 572 break; 573 574 case TGSI_FILE_IMMEDIATE: 575 res = bld->immediates[reg->Register.Index][swizzle]; 576 assert(res); 577 break; 578 579 case TGSI_FILE_INPUT: 580 res = bld->inputs[reg->Register.Index][swizzle]; 581 assert(res); 582 break; 583 584 case TGSI_FILE_TEMPORARY: 585 if (reg->Register.Indirect) { 586 LLVMValueRef swizzle_vec = 587 lp_build_const_int_vec(uint_bld->type, swizzle); 588 LLVMValueRef length_vec = 589 lp_build_const_int_vec(uint_bld->type, bld->base.type.length); 590 LLVMValueRef index_vec; /* index into the const buffer */ 591 LLVMValueRef temps_array; 592 LLVMTypeRef float4_ptr_type; 593 594 /* index_vec = (indirect_index * 4 + swizzle) * length */ 595 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 596 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 597 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 598 599 /* cast temps_array pointer to float* */ 600 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); 601 temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array, 602 float4_ptr_type, ""); 603 604 /* Gather values from the temporary register array */ 605 res = build_gather(bld, temps_array, index_vec); 606 } 607 else { 608 LLVMValueRef temp_ptr; 609 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); 610 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 611 if (!res) 612 return bld->base.undef; 613 } 614 break; 615 616 default: 617 assert(0 && "invalid src register in emit_fetch()"); 618 return bld->base.undef; 619 } 620 621 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 622 case TGSI_UTIL_SIGN_CLEAR: 623 res = lp_build_abs( &bld->base, res ); 624 break; 625 626 case TGSI_UTIL_SIGN_SET: 627 res = lp_build_abs( &bld->base, res ); 628 /* fall through */ 629 case TGSI_UTIL_SIGN_TOGGLE: 630 res = lp_build_negate( &bld->base, res ); 631 break; 632 633 case TGSI_UTIL_SIGN_KEEP: 634 break; 635 } 636 637 return res; 638} 639 640 641/** 642 * Register fetch with derivatives. 643 */ 644static void 645emit_fetch_deriv( 646 struct lp_build_tgsi_soa_context *bld, 647 const struct tgsi_full_instruction *inst, 648 unsigned index, 649 const unsigned chan_index, 650 LLVMValueRef *res, 651 LLVMValueRef *ddx, 652 LLVMValueRef *ddy) 653{ 654 LLVMValueRef src; 655 656 src = emit_fetch(bld, inst, index, chan_index); 657 658 if(res) 659 *res = src; 660 661 /* TODO: use interpolation coeffs for inputs */ 662 663 if(ddx) 664 *ddx = lp_build_ddx(&bld->base, src); 665 666 if(ddy) 667 *ddy = lp_build_ddy(&bld->base, src); 668} 669 670 671/** 672 * Predicate. 673 */ 674static void 675emit_fetch_predicate( 676 struct lp_build_tgsi_soa_context *bld, 677 const struct tgsi_full_instruction *inst, 678 LLVMValueRef *pred) 679{ 680 unsigned index; 681 unsigned char swizzles[4]; 682 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 683 LLVMValueRef value; 684 unsigned chan; 685 686 if (!inst->Instruction.Predicate) { 687 FOR_EACH_CHANNEL( chan ) { 688 pred[chan] = NULL; 689 } 690 return; 691 } 692 693 swizzles[0] = inst->Predicate.SwizzleX; 694 swizzles[1] = inst->Predicate.SwizzleY; 695 swizzles[2] = inst->Predicate.SwizzleZ; 696 swizzles[3] = inst->Predicate.SwizzleW; 697 698 index = inst->Predicate.Index; 699 assert(index < LP_MAX_TGSI_PREDS); 700 701 FOR_EACH_CHANNEL( chan ) { 702 unsigned swizzle = swizzles[chan]; 703 704 /* 705 * Only fetch the predicate register channels that are actually listed 706 * in the swizzles 707 */ 708 if (!unswizzled[swizzle]) { 709 value = LLVMBuildLoad(bld->base.builder, 710 bld->preds[index][swizzle], ""); 711 712 /* 713 * Convert the value to an integer mask. 714 * 715 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 716 * is needlessly causing two comparisons due to storing the intermediate 717 * result as float vector instead of an integer mask vector. 718 */ 719 value = lp_build_compare(bld->base.builder, 720 bld->base.type, 721 PIPE_FUNC_NOTEQUAL, 722 value, 723 bld->base.zero); 724 if (inst->Predicate.Negate) { 725 value = LLVMBuildNot(bld->base.builder, value, ""); 726 } 727 728 unswizzled[swizzle] = value; 729 } else { 730 value = unswizzled[swizzle]; 731 } 732 733 pred[chan] = value; 734 } 735} 736 737 738/** 739 * Register store. 740 */ 741static void 742emit_store( 743 struct lp_build_tgsi_soa_context *bld, 744 const struct tgsi_full_instruction *inst, 745 unsigned index, 746 unsigned chan_index, 747 LLVMValueRef pred, 748 LLVMValueRef value) 749{ 750 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 751 LLVMValueRef indirect_index = NULL; 752 753 switch( inst->Instruction.Saturate ) { 754 case TGSI_SAT_NONE: 755 break; 756 757 case TGSI_SAT_ZERO_ONE: 758 value = lp_build_max(&bld->base, value, bld->base.zero); 759 value = lp_build_min(&bld->base, value, bld->base.one); 760 break; 761 762 case TGSI_SAT_MINUS_PLUS_ONE: 763 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); 764 value = lp_build_min(&bld->base, value, bld->base.one); 765 break; 766 767 default: 768 assert(0); 769 } 770 771 if (reg->Register.Indirect) { 772 indirect_index = get_indirect_index(bld, 773 reg->Register.File, 774 reg->Register.Index, 775 ®->Indirect); 776 } else { 777 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 778 } 779 780 switch( reg->Register.File ) { 781 case TGSI_FILE_OUTPUT: 782 lp_exec_mask_store(&bld->exec_mask, pred, value, 783 bld->outputs[reg->Register.Index][chan_index]); 784 break; 785 786 case TGSI_FILE_TEMPORARY: 787 if (reg->Register.Indirect) { 788 /* XXX not done yet */ 789 debug_printf("WARNING: LLVM scatter store of temp regs" 790 " not implemented\n"); 791 } 792 else { 793 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 794 chan_index); 795 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); 796 } 797 break; 798 799 case TGSI_FILE_ADDRESS: 800 lp_exec_mask_store(&bld->exec_mask, pred, value, 801 bld->addr[reg->Indirect.Index][chan_index]); 802 break; 803 804 case TGSI_FILE_PREDICATE: 805 lp_exec_mask_store(&bld->exec_mask, pred, value, 806 bld->preds[reg->Register.Index][chan_index]); 807 break; 808 809 default: 810 assert( 0 ); 811 } 812} 813 814 815/** 816 * High-level instruction translators. 817 */ 818 819static void 820emit_tex( struct lp_build_tgsi_soa_context *bld, 821 const struct tgsi_full_instruction *inst, 822 enum lp_build_tex_modifier modifier, 823 LLVMValueRef *texel) 824{ 825 unsigned unit; 826 LLVMValueRef lod_bias, explicit_lod; 827 LLVMValueRef oow = NULL; 828 LLVMValueRef coords[3]; 829 LLVMValueRef ddx[3]; 830 LLVMValueRef ddy[3]; 831 unsigned num_coords; 832 unsigned i; 833 834 if (!bld->sampler) { 835 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 836 for (i = 0; i < 4; i++) { 837 texel[i] = bld->base.undef; 838 } 839 return; 840 } 841 842 switch (inst->Texture.Texture) { 843 case TGSI_TEXTURE_1D: 844 num_coords = 1; 845 break; 846 case TGSI_TEXTURE_2D: 847 case TGSI_TEXTURE_RECT: 848 num_coords = 2; 849 break; 850 case TGSI_TEXTURE_SHADOW1D: 851 case TGSI_TEXTURE_SHADOW2D: 852 case TGSI_TEXTURE_SHADOWRECT: 853 case TGSI_TEXTURE_3D: 854 case TGSI_TEXTURE_CUBE: 855 num_coords = 3; 856 break; 857 default: 858 assert(0); 859 return; 860 } 861 862 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 863 lod_bias = emit_fetch( bld, inst, 0, 3 ); 864 explicit_lod = NULL; 865 } 866 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 867 lod_bias = NULL; 868 explicit_lod = emit_fetch( bld, inst, 0, 3 ); 869 } 870 else { 871 lod_bias = NULL; 872 explicit_lod = NULL; 873 } 874 875 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 876 oow = emit_fetch( bld, inst, 0, 3 ); 877 oow = lp_build_rcp(&bld->base, oow); 878 } 879 880 for (i = 0; i < num_coords; i++) { 881 coords[i] = emit_fetch( bld, inst, 0, i ); 882 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 883 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 884 } 885 for (i = num_coords; i < 3; i++) { 886 coords[i] = bld->base.undef; 887 } 888 889 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 890 LLVMTypeRef i32t = LLVMInt32Type(); 891 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); 892 for (i = 0; i < num_coords; i++) { 893 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); 894 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); 895 ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, ""); 896 ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, ""); 897 } 898 unit = inst->Src[3].Register.Index; 899 } else { 900 for (i = 0; i < num_coords; i++) { 901 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); 902 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); 903 } 904 unit = inst->Src[1].Register.Index; 905 } 906 for (i = num_coords; i < 3; i++) { 907 ddx[i] = LLVMGetUndef(bld->base.elem_type); 908 ddy[i] = LLVMGetUndef(bld->base.elem_type); 909 } 910 911 bld->sampler->emit_fetch_texel(bld->sampler, 912 bld->base.builder, 913 bld->base.type, 914 unit, num_coords, coords, 915 ddx, ddy, 916 lod_bias, explicit_lod, 917 texel); 918} 919 920 921/** 922 * Kill fragment if any of the src register values are negative. 923 */ 924static void 925emit_kil( 926 struct lp_build_tgsi_soa_context *bld, 927 const struct tgsi_full_instruction *inst ) 928{ 929 const struct tgsi_full_src_register *reg = &inst->Src[0]; 930 LLVMValueRef terms[NUM_CHANNELS]; 931 LLVMValueRef mask; 932 unsigned chan_index; 933 934 memset(&terms, 0, sizeof terms); 935 936 FOR_EACH_CHANNEL( chan_index ) { 937 unsigned swizzle; 938 939 /* Unswizzle channel */ 940 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 941 942 /* Check if the component has not been already tested. */ 943 assert(swizzle < NUM_CHANNELS); 944 if( !terms[swizzle] ) 945 /* TODO: change the comparison operator instead of setting the sign */ 946 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 947 } 948 949 mask = NULL; 950 FOR_EACH_CHANNEL( chan_index ) { 951 if(terms[chan_index]) { 952 LLVMValueRef chan_mask; 953 954 /* 955 * If term < 0 then mask = 0 else mask = ~0. 956 */ 957 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 958 959 if(mask) 960 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); 961 else 962 mask = chan_mask; 963 } 964 } 965 966 if(mask) { 967 lp_build_mask_update(bld->mask, mask); 968 969 /* XXX: figure out if we are at the end of the shader and skip this: 970 */ 971 lp_build_mask_check(bld->mask); 972 } 973} 974 975 976/** 977 * Predicated fragment kill. 978 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 979 * The only predication is the execution mask which will apply if 980 * we're inside a loop or conditional. 981 */ 982static void 983emit_kilp(struct lp_build_tgsi_soa_context *bld, 984 const struct tgsi_full_instruction *inst) 985{ 986 LLVMValueRef mask; 987 988 /* For those channels which are "alive", disable fragment shader 989 * execution. 990 */ 991 if (bld->exec_mask.has_mask) { 992 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); 993 } 994 else { 995 mask = bld->base.zero; 996 } 997 998 lp_build_mask_update(bld->mask, mask); 999 1000 /* XXX: figure out if we are at the end of the shader and skip this: 1001 */ 1002 lp_build_mask_check(bld->mask); 1003} 1004 1005static void 1006emit_declaration( 1007 struct lp_build_tgsi_soa_context *bld, 1008 const struct tgsi_full_declaration *decl) 1009{ 1010 LLVMTypeRef vec_type = bld->base.vec_type; 1011 1012 unsigned first = decl->Range.First; 1013 unsigned last = decl->Range.Last; 1014 unsigned idx, i; 1015 1016 for (idx = first; idx <= last; ++idx) { 1017 assert(last <= bld->info->file_max[decl->Declaration.File]); 1018 switch (decl->Declaration.File) { 1019 case TGSI_FILE_TEMPORARY: 1020 assert(idx < LP_MAX_TGSI_TEMPS); 1021 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 1022 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), 1023 last*4 + 4, 0); 1024 bld->temps_array = lp_build_array_alloca(bld->base.builder, 1025 vec_type, array_size, ""); 1026 } else { 1027 for (i = 0; i < NUM_CHANNELS; i++) 1028 bld->temps[idx][i] = lp_build_alloca(bld->base.builder, 1029 vec_type, ""); 1030 } 1031 break; 1032 1033 case TGSI_FILE_OUTPUT: 1034 for (i = 0; i < NUM_CHANNELS; i++) 1035 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, 1036 vec_type, ""); 1037 break; 1038 1039 case TGSI_FILE_ADDRESS: 1040 assert(idx < LP_MAX_TGSI_ADDRS); 1041 for (i = 0; i < NUM_CHANNELS; i++) 1042 bld->addr[idx][i] = lp_build_alloca(bld->base.builder, 1043 vec_type, ""); 1044 break; 1045 1046 case TGSI_FILE_PREDICATE: 1047 assert(idx < LP_MAX_TGSI_PREDS); 1048 for (i = 0; i < NUM_CHANNELS; i++) 1049 bld->preds[idx][i] = lp_build_alloca(bld->base.builder, 1050 vec_type, ""); 1051 break; 1052 1053 default: 1054 /* don't need to declare other vars */ 1055 break; 1056 } 1057 } 1058} 1059 1060 1061/** 1062 * Emit LLVM for one TGSI instruction. 1063 * \param return TRUE for success, FALSE otherwise 1064 */ 1065static boolean 1066emit_instruction( 1067 struct lp_build_tgsi_soa_context *bld, 1068 const struct tgsi_full_instruction *inst, 1069 const struct tgsi_opcode_info *info, 1070 int *pc) 1071{ 1072 unsigned chan_index; 1073 LLVMValueRef src0, src1, src2; 1074 LLVMValueRef tmp0, tmp1, tmp2; 1075 LLVMValueRef tmp3 = NULL; 1076 LLVMValueRef tmp4 = NULL; 1077 LLVMValueRef tmp5 = NULL; 1078 LLVMValueRef tmp6 = NULL; 1079 LLVMValueRef tmp7 = NULL; 1080 LLVMValueRef res; 1081 LLVMValueRef dst0[NUM_CHANNELS]; 1082 1083 /* 1084 * Stores and write masks are handled in a general fashion after the long 1085 * instruction opcode switch statement. 1086 * 1087 * Although not stricitly necessary, we avoid generating instructions for 1088 * channels which won't be stored, in cases where's that easy. For some 1089 * complex instructions, like texture sampling, it is more convenient to 1090 * assume a full writemask and then let LLVM optimization passes eliminate 1091 * redundant code. 1092 */ 1093 1094 (*pc)++; 1095 1096 assert(info->num_dst <= 1); 1097 if (info->num_dst) { 1098 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1099 dst0[chan_index] = bld->base.undef; 1100 } 1101 } 1102 1103 switch (inst->Instruction.Opcode) { 1104 case TGSI_OPCODE_ARL: 1105 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1106 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1107 tmp0 = lp_build_floor(&bld->base, tmp0); 1108 dst0[chan_index] = tmp0; 1109 } 1110 break; 1111 1112 case TGSI_OPCODE_MOV: 1113 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1114 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 1115 } 1116 break; 1117 1118 case TGSI_OPCODE_LIT: 1119 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 1120 dst0[CHAN_X] = bld->base.one; 1121 } 1122 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1123 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1124 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 1125 } 1126 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1127 /* XMM[1] = SrcReg[0].yyyy */ 1128 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1129 /* XMM[1] = max(XMM[1], 0) */ 1130 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 1131 /* XMM[2] = SrcReg[0].wwww */ 1132 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 1133 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 1134 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1135 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 1136 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 1137 } 1138 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 1139 dst0[CHAN_W] = bld->base.one; 1140 } 1141 break; 1142 1143 case TGSI_OPCODE_RCP: 1144 /* TGSI_OPCODE_RECIP */ 1145 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1146 res = lp_build_rcp(&bld->base, src0); 1147 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1148 dst0[chan_index] = res; 1149 } 1150 break; 1151 1152 case TGSI_OPCODE_RSQ: 1153 /* TGSI_OPCODE_RECIPSQRT */ 1154 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1155 src0 = lp_build_abs(&bld->base, src0); 1156 res = lp_build_rsqrt(&bld->base, src0); 1157 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1158 dst0[chan_index] = res; 1159 } 1160 break; 1161 1162 case TGSI_OPCODE_EXP: 1163 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1164 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1165 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1166 LLVMValueRef *p_exp2_int_part = NULL; 1167 LLVMValueRef *p_frac_part = NULL; 1168 LLVMValueRef *p_exp2 = NULL; 1169 1170 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1171 1172 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1173 p_exp2_int_part = &tmp0; 1174 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1175 p_frac_part = &tmp1; 1176 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1177 p_exp2 = &tmp2; 1178 1179 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 1180 1181 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1182 dst0[CHAN_X] = tmp0; 1183 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1184 dst0[CHAN_Y] = tmp1; 1185 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1186 dst0[CHAN_Z] = tmp2; 1187 } 1188 /* dst.w = 1.0 */ 1189 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1190 dst0[CHAN_W] = bld->base.one; 1191 } 1192 break; 1193 1194 case TGSI_OPCODE_LOG: 1195 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1196 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1197 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1198 LLVMValueRef *p_floor_log2 = NULL; 1199 LLVMValueRef *p_exp = NULL; 1200 LLVMValueRef *p_log2 = NULL; 1201 1202 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1203 src0 = lp_build_abs( &bld->base, src0 ); 1204 1205 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1206 p_floor_log2 = &tmp0; 1207 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1208 p_exp = &tmp1; 1209 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1210 p_log2 = &tmp2; 1211 1212 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 1213 1214 /* dst.x = floor(lg2(abs(src.x))) */ 1215 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1216 dst0[CHAN_X] = tmp0; 1217 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 1218 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 1219 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 1220 } 1221 /* dst.z = lg2(abs(src.x)) */ 1222 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1223 dst0[CHAN_Z] = tmp2; 1224 } 1225 /* dst.w = 1.0 */ 1226 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1227 dst0[CHAN_W] = bld->base.one; 1228 } 1229 break; 1230 1231 case TGSI_OPCODE_MUL: 1232 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1233 src0 = emit_fetch( bld, inst, 0, chan_index ); 1234 src1 = emit_fetch( bld, inst, 1, chan_index ); 1235 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 1236 } 1237 break; 1238 1239 case TGSI_OPCODE_ADD: 1240 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1241 src0 = emit_fetch( bld, inst, 0, chan_index ); 1242 src1 = emit_fetch( bld, inst, 1, chan_index ); 1243 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 1244 } 1245 break; 1246 1247 case TGSI_OPCODE_DP3: 1248 /* TGSI_OPCODE_DOT3 */ 1249 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1250 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1251 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1252 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1253 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1254 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1255 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1256 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1257 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1258 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1259 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1260 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1261 dst0[chan_index] = tmp0; 1262 } 1263 break; 1264 1265 case TGSI_OPCODE_DP4: 1266 /* TGSI_OPCODE_DOT4 */ 1267 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1268 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1269 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1270 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1271 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1272 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1273 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1274 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1275 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1276 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1277 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1278 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1279 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1280 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1281 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1282 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1283 dst0[chan_index] = tmp0; 1284 } 1285 break; 1286 1287 case TGSI_OPCODE_DST: 1288 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1289 dst0[CHAN_X] = bld->base.one; 1290 } 1291 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1292 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1293 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1294 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1295 } 1296 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1297 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1298 } 1299 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1300 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1301 } 1302 break; 1303 1304 case TGSI_OPCODE_MIN: 1305 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1306 src0 = emit_fetch( bld, inst, 0, chan_index ); 1307 src1 = emit_fetch( bld, inst, 1, chan_index ); 1308 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1309 } 1310 break; 1311 1312 case TGSI_OPCODE_MAX: 1313 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1314 src0 = emit_fetch( bld, inst, 0, chan_index ); 1315 src1 = emit_fetch( bld, inst, 1, chan_index ); 1316 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1317 } 1318 break; 1319 1320 case TGSI_OPCODE_SLT: 1321 /* TGSI_OPCODE_SETLT */ 1322 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1323 src0 = emit_fetch( bld, inst, 0, chan_index ); 1324 src1 = emit_fetch( bld, inst, 1, chan_index ); 1325 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1326 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1327 } 1328 break; 1329 1330 case TGSI_OPCODE_SGE: 1331 /* TGSI_OPCODE_SETGE */ 1332 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1333 src0 = emit_fetch( bld, inst, 0, chan_index ); 1334 src1 = emit_fetch( bld, inst, 1, chan_index ); 1335 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1336 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1337 } 1338 break; 1339 1340 case TGSI_OPCODE_MAD: 1341 /* TGSI_OPCODE_MADD */ 1342 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1343 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1344 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1345 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1346 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1347 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1348 dst0[chan_index] = tmp0; 1349 } 1350 break; 1351 1352 case TGSI_OPCODE_SUB: 1353 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1354 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1355 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1356 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1357 } 1358 break; 1359 1360 case TGSI_OPCODE_LRP: 1361 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1362 src0 = emit_fetch( bld, inst, 0, chan_index ); 1363 src1 = emit_fetch( bld, inst, 1, chan_index ); 1364 src2 = emit_fetch( bld, inst, 2, chan_index ); 1365 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1366 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1367 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1368 } 1369 break; 1370 1371 case TGSI_OPCODE_CND: 1372 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1373 src0 = emit_fetch( bld, inst, 0, chan_index ); 1374 src1 = emit_fetch( bld, inst, 1, chan_index ); 1375 src2 = emit_fetch( bld, inst, 2, chan_index ); 1376 tmp1 = lp_build_const_vec(bld->base.type, 0.5); 1377 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1378 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1379 } 1380 break; 1381 1382 case TGSI_OPCODE_DP2A: 1383 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1384 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1385 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1386 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1387 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1388 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1389 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1390 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1391 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1392 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1393 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1394 } 1395 break; 1396 1397 case TGSI_OPCODE_FRC: 1398 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1399 src0 = emit_fetch( bld, inst, 0, chan_index ); 1400 tmp0 = lp_build_floor(&bld->base, src0); 1401 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1402 dst0[chan_index] = tmp0; 1403 } 1404 break; 1405 1406 case TGSI_OPCODE_CLAMP: 1407 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1408 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1409 src1 = emit_fetch( bld, inst, 1, chan_index ); 1410 src2 = emit_fetch( bld, inst, 2, chan_index ); 1411 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1412 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1413 dst0[chan_index] = tmp0; 1414 } 1415 break; 1416 1417 case TGSI_OPCODE_FLR: 1418 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1419 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1420 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1421 } 1422 break; 1423 1424 case TGSI_OPCODE_ROUND: 1425 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1426 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1427 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1428 } 1429 break; 1430 1431 case TGSI_OPCODE_EX2: { 1432 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1433 tmp0 = lp_build_exp2( &bld->base, tmp0); 1434 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1435 dst0[chan_index] = tmp0; 1436 } 1437 break; 1438 } 1439 1440 case TGSI_OPCODE_LG2: 1441 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1442 tmp0 = lp_build_log2( &bld->base, tmp0); 1443 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1444 dst0[chan_index] = tmp0; 1445 } 1446 break; 1447 1448 case TGSI_OPCODE_POW: 1449 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1450 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1451 res = lp_build_pow( &bld->base, src0, src1 ); 1452 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1453 dst0[chan_index] = res; 1454 } 1455 break; 1456 1457 case TGSI_OPCODE_XPD: 1458 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1459 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1460 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1461 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1462 } 1463 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1464 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1465 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1466 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1467 } 1468 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1469 tmp2 = tmp0; 1470 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1471 tmp5 = tmp3; 1472 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1473 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1474 dst0[CHAN_X] = tmp2; 1475 } 1476 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1477 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1478 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1479 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1480 } 1481 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1482 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1483 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1484 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1485 dst0[CHAN_Y] = tmp3; 1486 } 1487 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1488 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1489 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1490 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1491 dst0[CHAN_Z] = tmp5; 1492 } 1493 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1494 dst0[CHAN_W] = bld->base.one; 1495 } 1496 break; 1497 1498 case TGSI_OPCODE_ABS: 1499 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1500 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1501 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1502 } 1503 break; 1504 1505 case TGSI_OPCODE_RCC: 1506 /* deprecated? */ 1507 assert(0); 1508 return FALSE; 1509 1510 case TGSI_OPCODE_DPH: 1511 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1512 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1513 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1514 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1515 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1516 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1517 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1518 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1519 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1520 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1521 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1522 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1523 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1524 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1525 dst0[chan_index] = tmp0; 1526 } 1527 break; 1528 1529 case TGSI_OPCODE_COS: 1530 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1531 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1532 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1533 dst0[chan_index] = tmp0; 1534 } 1535 break; 1536 1537 case TGSI_OPCODE_DDX: 1538 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1539 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1540 } 1541 break; 1542 1543 case TGSI_OPCODE_DDY: 1544 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1545 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1546 } 1547 break; 1548 1549 case TGSI_OPCODE_KILP: 1550 /* predicated kill */ 1551 emit_kilp( bld, inst ); 1552 break; 1553 1554 case TGSI_OPCODE_KIL: 1555 /* conditional kill */ 1556 emit_kil( bld, inst ); 1557 break; 1558 1559 case TGSI_OPCODE_PK2H: 1560 return FALSE; 1561 break; 1562 1563 case TGSI_OPCODE_PK2US: 1564 return FALSE; 1565 break; 1566 1567 case TGSI_OPCODE_PK4B: 1568 return FALSE; 1569 break; 1570 1571 case TGSI_OPCODE_PK4UB: 1572 return FALSE; 1573 break; 1574 1575 case TGSI_OPCODE_RFL: 1576 return FALSE; 1577 break; 1578 1579 case TGSI_OPCODE_SEQ: 1580 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1581 src0 = emit_fetch( bld, inst, 0, chan_index ); 1582 src1 = emit_fetch( bld, inst, 1, chan_index ); 1583 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1584 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1585 } 1586 break; 1587 1588 case TGSI_OPCODE_SFL: 1589 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1590 dst0[chan_index] = bld->base.zero; 1591 } 1592 break; 1593 1594 case TGSI_OPCODE_SGT: 1595 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1596 src0 = emit_fetch( bld, inst, 0, chan_index ); 1597 src1 = emit_fetch( bld, inst, 1, chan_index ); 1598 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1599 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1600 } 1601 break; 1602 1603 case TGSI_OPCODE_SIN: 1604 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1605 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1606 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1607 dst0[chan_index] = tmp0; 1608 } 1609 break; 1610 1611 case TGSI_OPCODE_SLE: 1612 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1613 src0 = emit_fetch( bld, inst, 0, chan_index ); 1614 src1 = emit_fetch( bld, inst, 1, chan_index ); 1615 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1616 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1617 } 1618 break; 1619 1620 case TGSI_OPCODE_SNE: 1621 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1622 src0 = emit_fetch( bld, inst, 0, chan_index ); 1623 src1 = emit_fetch( bld, inst, 1, chan_index ); 1624 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1625 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1626 } 1627 break; 1628 1629 case TGSI_OPCODE_STR: 1630 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1631 dst0[chan_index] = bld->base.one; 1632 } 1633 break; 1634 1635 case TGSI_OPCODE_TEX: 1636 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); 1637 break; 1638 1639 case TGSI_OPCODE_TXD: 1640 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1641 break; 1642 1643 case TGSI_OPCODE_UP2H: 1644 /* deprecated */ 1645 assert (0); 1646 return FALSE; 1647 break; 1648 1649 case TGSI_OPCODE_UP2US: 1650 /* deprecated */ 1651 assert(0); 1652 return FALSE; 1653 break; 1654 1655 case TGSI_OPCODE_UP4B: 1656 /* deprecated */ 1657 assert(0); 1658 return FALSE; 1659 break; 1660 1661 case TGSI_OPCODE_UP4UB: 1662 /* deprecated */ 1663 assert(0); 1664 return FALSE; 1665 break; 1666 1667 case TGSI_OPCODE_X2D: 1668 /* deprecated? */ 1669 assert(0); 1670 return FALSE; 1671 break; 1672 1673 case TGSI_OPCODE_ARA: 1674 /* deprecated */ 1675 assert(0); 1676 return FALSE; 1677 break; 1678 1679 case TGSI_OPCODE_ARR: 1680 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1681 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1682 tmp0 = lp_build_round(&bld->base, tmp0); 1683 dst0[chan_index] = tmp0; 1684 } 1685 break; 1686 1687 case TGSI_OPCODE_BRA: 1688 /* deprecated */ 1689 assert(0); 1690 return FALSE; 1691 break; 1692 1693 case TGSI_OPCODE_CAL: 1694 lp_exec_mask_call(&bld->exec_mask, 1695 inst->Label.Label, 1696 pc); 1697 1698 break; 1699 1700 case TGSI_OPCODE_RET: 1701 lp_exec_mask_ret(&bld->exec_mask, pc); 1702 break; 1703 1704 case TGSI_OPCODE_END: 1705 *pc = -1; 1706 break; 1707 1708 case TGSI_OPCODE_SSG: 1709 /* TGSI_OPCODE_SGN */ 1710 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1711 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1712 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 1713 } 1714 break; 1715 1716 case TGSI_OPCODE_CMP: 1717 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1718 src0 = emit_fetch( bld, inst, 0, chan_index ); 1719 src1 = emit_fetch( bld, inst, 1, chan_index ); 1720 src2 = emit_fetch( bld, inst, 2, chan_index ); 1721 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 1722 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 1723 } 1724 break; 1725 1726 case TGSI_OPCODE_SCS: 1727 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1728 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1729 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 1730 } 1731 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1732 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1733 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 1734 } 1735 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1736 dst0[CHAN_Z] = bld->base.zero; 1737 } 1738 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1739 dst0[CHAN_W] = bld->base.one; 1740 } 1741 break; 1742 1743 case TGSI_OPCODE_TXB: 1744 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); 1745 break; 1746 1747 case TGSI_OPCODE_NRM: 1748 /* fall-through */ 1749 case TGSI_OPCODE_NRM4: 1750 /* 3 or 4-component normalization */ 1751 { 1752 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1753 1754 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 1755 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 1756 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 1757 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 1758 1759 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1760 1761 /* xmm4 = src.x */ 1762 /* xmm0 = src.x * src.x */ 1763 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1764 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1765 tmp4 = tmp0; 1766 } 1767 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 1768 1769 /* xmm5 = src.y */ 1770 /* xmm0 = xmm0 + src.y * src.y */ 1771 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 1772 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1773 tmp5 = tmp1; 1774 } 1775 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1776 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1777 1778 /* xmm6 = src.z */ 1779 /* xmm0 = xmm0 + src.z * src.z */ 1780 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 1781 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1782 tmp6 = tmp1; 1783 } 1784 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1785 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1786 1787 if (dims == 4) { 1788 /* xmm7 = src.w */ 1789 /* xmm0 = xmm0 + src.w * src.w */ 1790 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 1791 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 1792 tmp7 = tmp1; 1793 } 1794 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1795 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1796 } 1797 1798 /* xmm1 = 1 / sqrt(xmm0) */ 1799 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 1800 1801 /* dst.x = xmm1 * src.x */ 1802 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1803 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 1804 } 1805 1806 /* dst.y = xmm1 * src.y */ 1807 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1808 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 1809 } 1810 1811 /* dst.z = xmm1 * src.z */ 1812 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1813 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 1814 } 1815 1816 /* dst.w = xmm1 * src.w */ 1817 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 1818 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 1819 } 1820 } 1821 1822 /* dst.w = 1.0 */ 1823 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 1824 dst0[CHAN_W] = bld->base.one; 1825 } 1826 } 1827 break; 1828 1829 case TGSI_OPCODE_DIV: 1830 /* deprecated */ 1831 assert( 0 ); 1832 return FALSE; 1833 break; 1834 1835 case TGSI_OPCODE_DP2: 1836 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1837 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1838 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1839 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1840 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1841 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1842 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1843 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1844 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1845 } 1846 break; 1847 1848 case TGSI_OPCODE_TXL: 1849 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 1850 break; 1851 1852 case TGSI_OPCODE_TXP: 1853 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); 1854 break; 1855 1856 case TGSI_OPCODE_BRK: 1857 lp_exec_break(&bld->exec_mask); 1858 break; 1859 1860 case TGSI_OPCODE_IF: 1861 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1862 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 1863 tmp0, bld->base.zero); 1864 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 1865 break; 1866 1867 case TGSI_OPCODE_BGNLOOP: 1868 lp_exec_bgnloop(&bld->exec_mask); 1869 break; 1870 1871 case TGSI_OPCODE_BGNSUB: 1872 lp_exec_mask_bgnsub(&bld->exec_mask); 1873 break; 1874 1875 case TGSI_OPCODE_ELSE: 1876 lp_exec_mask_cond_invert(&bld->exec_mask); 1877 break; 1878 1879 case TGSI_OPCODE_ENDIF: 1880 lp_exec_mask_cond_pop(&bld->exec_mask); 1881 break; 1882 1883 case TGSI_OPCODE_ENDLOOP: 1884 lp_exec_endloop(&bld->exec_mask); 1885 break; 1886 1887 case TGSI_OPCODE_ENDSUB: 1888 lp_exec_mask_endsub(&bld->exec_mask, pc); 1889 break; 1890 1891 case TGSI_OPCODE_PUSHA: 1892 /* deprecated? */ 1893 assert(0); 1894 return FALSE; 1895 break; 1896 1897 case TGSI_OPCODE_POPA: 1898 /* deprecated? */ 1899 assert(0); 1900 return FALSE; 1901 break; 1902 1903 case TGSI_OPCODE_CEIL: 1904 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1905 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1906 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 1907 } 1908 break; 1909 1910 case TGSI_OPCODE_I2F: 1911 /* deprecated? */ 1912 assert(0); 1913 return FALSE; 1914 break; 1915 1916 case TGSI_OPCODE_NOT: 1917 /* deprecated? */ 1918 assert(0); 1919 return FALSE; 1920 break; 1921 1922 case TGSI_OPCODE_TRUNC: 1923 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1924 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1925 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 1926 } 1927 break; 1928 1929 case TGSI_OPCODE_SHL: 1930 /* deprecated? */ 1931 assert(0); 1932 return FALSE; 1933 break; 1934 1935 case TGSI_OPCODE_ISHR: 1936 /* deprecated? */ 1937 assert(0); 1938 return FALSE; 1939 break; 1940 1941 case TGSI_OPCODE_AND: 1942 /* deprecated? */ 1943 assert(0); 1944 return FALSE; 1945 break; 1946 1947 case TGSI_OPCODE_OR: 1948 /* deprecated? */ 1949 assert(0); 1950 return FALSE; 1951 break; 1952 1953 case TGSI_OPCODE_MOD: 1954 /* deprecated? */ 1955 assert(0); 1956 return FALSE; 1957 break; 1958 1959 case TGSI_OPCODE_XOR: 1960 /* deprecated? */ 1961 assert(0); 1962 return FALSE; 1963 break; 1964 1965 case TGSI_OPCODE_SAD: 1966 /* deprecated? */ 1967 assert(0); 1968 return FALSE; 1969 break; 1970 1971 case TGSI_OPCODE_TXF: 1972 /* deprecated? */ 1973 assert(0); 1974 return FALSE; 1975 break; 1976 1977 case TGSI_OPCODE_TXQ: 1978 /* deprecated? */ 1979 assert(0); 1980 return FALSE; 1981 break; 1982 1983 case TGSI_OPCODE_CONT: 1984 lp_exec_continue(&bld->exec_mask); 1985 break; 1986 1987 case TGSI_OPCODE_EMIT: 1988 return FALSE; 1989 break; 1990 1991 case TGSI_OPCODE_ENDPRIM: 1992 return FALSE; 1993 break; 1994 1995 case TGSI_OPCODE_NOP: 1996 break; 1997 1998 default: 1999 return FALSE; 2000 } 2001 2002 if(info->num_dst) { 2003 LLVMValueRef pred[NUM_CHANNELS]; 2004 2005 emit_fetch_predicate( bld, inst, pred ); 2006 2007 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2008 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); 2009 } 2010 } 2011 2012 return TRUE; 2013} 2014 2015 2016void 2017lp_build_tgsi_soa(LLVMBuilderRef builder, 2018 const struct tgsi_token *tokens, 2019 struct lp_type type, 2020 struct lp_build_mask_context *mask, 2021 LLVMValueRef consts_ptr, 2022 const LLVMValueRef *pos, 2023 const LLVMValueRef (*inputs)[NUM_CHANNELS], 2024 LLVMValueRef (*outputs)[NUM_CHANNELS], 2025 struct lp_build_sampler_soa *sampler, 2026 const struct tgsi_shader_info *info) 2027{ 2028 struct lp_build_tgsi_soa_context bld; 2029 struct tgsi_parse_context parse; 2030 uint num_immediates = 0; 2031 uint num_instructions = 0; 2032 unsigned i; 2033 int pc = 0; 2034 2035 struct lp_type res_type; 2036 2037 assert(type.length <= LP_MAX_VECTOR_LENGTH); 2038 memset(&res_type, 0, sizeof res_type); 2039 res_type.width = type.width; 2040 res_type.length = type.length; 2041 res_type.sign = 1; 2042 2043 /* Setup build context */ 2044 memset(&bld, 0, sizeof bld); 2045 lp_build_context_init(&bld.base, builder, type); 2046 lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type)); 2047 bld.mask = mask; 2048 bld.pos = pos; 2049 bld.inputs = inputs; 2050 bld.outputs = outputs; 2051 bld.consts_ptr = consts_ptr; 2052 bld.sampler = sampler; 2053 bld.info = info; 2054 bld.indirect_files = info->indirect_files; 2055 bld.instructions = (struct tgsi_full_instruction *) 2056 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); 2057 bld.max_instructions = LP_MAX_INSTRUCTIONS; 2058 2059 if (!bld.instructions) { 2060 return; 2061 } 2062 2063 lp_exec_mask_init(&bld.exec_mask, &bld.base); 2064 2065 tgsi_parse_init( &parse, tokens ); 2066 2067 while( !tgsi_parse_end_of_tokens( &parse ) ) { 2068 tgsi_parse_token( &parse ); 2069 2070 switch( parse.FullToken.Token.Type ) { 2071 case TGSI_TOKEN_TYPE_DECLARATION: 2072 /* Inputs already interpolated */ 2073 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 2074 break; 2075 2076 case TGSI_TOKEN_TYPE_INSTRUCTION: 2077 { 2078 /* save expanded instruction */ 2079 if (num_instructions == bld.max_instructions) { 2080 struct tgsi_full_instruction *instructions; 2081 instructions = REALLOC(bld.instructions, 2082 bld.max_instructions 2083 * sizeof(struct tgsi_full_instruction), 2084 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 2085 * sizeof(struct tgsi_full_instruction)); 2086 if (!instructions) { 2087 break; 2088 } 2089 bld.instructions = instructions; 2090 bld.max_instructions += LP_MAX_INSTRUCTIONS; 2091 } 2092 2093 memcpy(bld.instructions + num_instructions, 2094 &parse.FullToken.FullInstruction, 2095 sizeof(bld.instructions[0])); 2096 2097 num_instructions++; 2098 } 2099 2100 break; 2101 2102 case TGSI_TOKEN_TYPE_IMMEDIATE: 2103 /* simply copy the immediate values into the next immediates[] slot */ 2104 { 2105 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 2106 assert(size <= 4); 2107 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 2108 for( i = 0; i < size; ++i ) 2109 bld.immediates[num_immediates][i] = 2110 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); 2111 for( i = size; i < 4; ++i ) 2112 bld.immediates[num_immediates][i] = bld.base.undef; 2113 num_immediates++; 2114 } 2115 break; 2116 2117 case TGSI_TOKEN_TYPE_PROPERTY: 2118 break; 2119 2120 default: 2121 assert( 0 ); 2122 } 2123 } 2124 2125 while (pc != -1) { 2126 struct tgsi_full_instruction *instr = bld.instructions + pc; 2127 const struct tgsi_opcode_info *opcode_info = 2128 tgsi_get_opcode_info(instr->Instruction.Opcode); 2129 if (!emit_instruction( &bld, instr, opcode_info, &pc )) 2130 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 2131 opcode_info->mnemonic); 2132 } 2133 2134 if (0) { 2135 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); 2136 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2137 debug_printf("11111111111111111111111111111 \n"); 2138 tgsi_dump(tokens, 0); 2139 lp_debug_dump_value(function); 2140 debug_printf("2222222222222222222222222222 \n"); 2141 } 2142 tgsi_parse_free( &parse ); 2143 2144 if (0) { 2145 LLVMModuleRef module = LLVMGetGlobalParent( 2146 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); 2147 LLVMDumpModule(module); 2148 2149 } 2150 2151 FREE( bld.instructions ); 2152} 2153 2154