lp_bld_tgsi_soa.c revision 82b71db03ddaf0eed504412c9169db37cf9bdadc
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_exec.h" 46#include "tgsi/tgsi_info.h" 47#include "tgsi/tgsi_parse.h" 48#include "tgsi/tgsi_util.h" 49#include "tgsi/tgsi_scan.h" 50#include "lp_bld_type.h" 51#include "lp_bld_const.h" 52#include "lp_bld_arit.h" 53#include "lp_bld_bitarit.h" 54#include "lp_bld_gather.h" 55#include "lp_bld_init.h" 56#include "lp_bld_logic.h" 57#include "lp_bld_swizzle.h" 58#include "lp_bld_flow.h" 59#include "lp_bld_quad.h" 60#include "lp_bld_tgsi.h" 61#include "lp_bld_limits.h" 62#include "lp_bld_debug.h" 63#include "lp_bld_printf.h" 64 65 66#define NUM_CHANNELS 4 67 68#define LP_MAX_INSTRUCTIONS 256 69 70 71struct lp_exec_mask { 72 struct lp_build_context *bld; 73 74 boolean has_mask; 75 76 LLVMTypeRef int_vec_type; 77 78 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 79 int cond_stack_size; 80 LLVMValueRef cond_mask; 81 82 LLVMBasicBlockRef loop_block; 83 LLVMValueRef cont_mask; 84 LLVMValueRef break_mask; 85 LLVMValueRef break_var; 86 struct { 87 LLVMBasicBlockRef loop_block; 88 LLVMValueRef cont_mask; 89 LLVMValueRef break_mask; 90 LLVMValueRef break_var; 91 } loop_stack[LP_MAX_TGSI_NESTING]; 92 int loop_stack_size; 93 94 LLVMValueRef ret_mask; 95 struct { 96 int pc; 97 LLVMValueRef ret_mask; 98 } call_stack[LP_MAX_TGSI_NESTING]; 99 int call_stack_size; 100 101 LLVMValueRef exec_mask; 102}; 103 104struct lp_build_tgsi_soa_context 105{ 106 struct lp_build_context base; 107 108 /* Builder for vector integer masks and indices */ 109 struct lp_build_context uint_bld; 110 111 /* Builder for scalar elements of shader's data type (float) */ 112 struct lp_build_context elem_bld; 113 114 LLVMValueRef consts_ptr; 115 const LLVMValueRef *pos; 116 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 117 LLVMValueRef (*outputs)[NUM_CHANNELS]; 118 119 const struct lp_build_sampler_soa *sampler; 120 121 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 122 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 123 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 124 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; 125 126 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 127 * set in the indirect_files field. 128 * The temps[] array above is unused then. 129 */ 130 LLVMValueRef temps_array; 131 132 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is 133 * set in the indirect_files field. 134 * The outputs[] array above is unused then. 135 */ 136 LLVMValueRef outputs_array; 137 138 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is 139 * set in the indirect_files field. 140 * The inputs[] array above is unused then. 141 */ 142 LLVMValueRef inputs_array; 143 144 LLVMValueRef system_values_array; 145 146 const struct tgsi_shader_info *info; 147 /** bitmask indicating which register files are accessed indirectly */ 148 unsigned indirect_files; 149 150 struct lp_build_mask_context *mask; 151 struct lp_exec_mask exec_mask; 152 153 struct tgsi_full_instruction *instructions; 154 uint max_instructions; 155}; 156 157static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 158{ 159 mask->bld = bld; 160 mask->has_mask = FALSE; 161 mask->cond_stack_size = 0; 162 mask->loop_stack_size = 0; 163 mask->call_stack_size = 0; 164 165 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); 166 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 167 LLVMConstAllOnes(mask->int_vec_type); 168} 169 170static void lp_exec_mask_update(struct lp_exec_mask *mask) 171{ 172 LLVMBuilderRef builder = mask->bld->gallivm->builder; 173 174 if (mask->loop_stack_size) { 175 /*for loops we need to update the entire mask at runtime */ 176 LLVMValueRef tmp; 177 assert(mask->break_mask); 178 tmp = LLVMBuildAnd(builder, 179 mask->cont_mask, 180 mask->break_mask, 181 "maskcb"); 182 mask->exec_mask = LLVMBuildAnd(builder, 183 mask->cond_mask, 184 tmp, 185 "maskfull"); 186 } else 187 mask->exec_mask = mask->cond_mask; 188 189 if (mask->call_stack_size) { 190 mask->exec_mask = LLVMBuildAnd(builder, 191 mask->exec_mask, 192 mask->ret_mask, 193 "callmask"); 194 } 195 196 mask->has_mask = (mask->cond_stack_size > 0 || 197 mask->loop_stack_size > 0 || 198 mask->call_stack_size > 0); 199} 200 201static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 202 LLVMValueRef val) 203{ 204 LLVMBuilderRef builder = mask->bld->gallivm->builder; 205 206 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 207 if (mask->cond_stack_size == 0) { 208 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 209 } 210 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 211 assert(LLVMTypeOf(val) == mask->int_vec_type); 212 mask->cond_mask = LLVMBuildAnd(builder, 213 mask->cond_mask, 214 val, 215 ""); 216 lp_exec_mask_update(mask); 217} 218 219static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 220{ 221 LLVMBuilderRef builder = mask->bld->gallivm->builder; 222 LLVMValueRef prev_mask; 223 LLVMValueRef inv_mask; 224 225 assert(mask->cond_stack_size); 226 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 227 if (mask->cond_stack_size == 1) { 228 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 229 } 230 231 inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); 232 233 mask->cond_mask = LLVMBuildAnd(builder, 234 inv_mask, 235 prev_mask, ""); 236 lp_exec_mask_update(mask); 237} 238 239static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 240{ 241 assert(mask->cond_stack_size); 242 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 243 lp_exec_mask_update(mask); 244} 245 246static void lp_exec_bgnloop(struct lp_exec_mask *mask) 247{ 248 LLVMBuilderRef builder = mask->bld->gallivm->builder; 249 250 if (mask->loop_stack_size == 0) { 251 assert(mask->loop_block == NULL); 252 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 253 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 254 assert(mask->break_var == NULL); 255 } 256 257 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 258 259 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 260 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 261 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 262 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 263 ++mask->loop_stack_size; 264 265 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); 266 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 267 268 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); 269 LLVMBuildBr(builder, mask->loop_block); 270 LLVMPositionBuilderAtEnd(builder, mask->loop_block); 271 272 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, ""); 273 274 lp_exec_mask_update(mask); 275} 276 277static void lp_exec_break(struct lp_exec_mask *mask) 278{ 279 LLVMBuilderRef builder = mask->bld->gallivm->builder; 280 LLVMValueRef exec_mask = LLVMBuildNot(builder, 281 mask->exec_mask, 282 "break"); 283 284 mask->break_mask = LLVMBuildAnd(builder, 285 mask->break_mask, 286 exec_mask, "break_full"); 287 288 lp_exec_mask_update(mask); 289} 290 291static void lp_exec_continue(struct lp_exec_mask *mask) 292{ 293 LLVMBuilderRef builder = mask->bld->gallivm->builder; 294 LLVMValueRef exec_mask = LLVMBuildNot(builder, 295 mask->exec_mask, 296 ""); 297 298 mask->cont_mask = LLVMBuildAnd(builder, 299 mask->cont_mask, 300 exec_mask, ""); 301 302 lp_exec_mask_update(mask); 303} 304 305 306static void lp_exec_endloop(struct gallivm_state *gallivm, 307 struct lp_exec_mask *mask) 308{ 309 LLVMBuilderRef builder = mask->bld->gallivm->builder; 310 LLVMBasicBlockRef endloop; 311 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, 312 mask->bld->type.width * 313 mask->bld->type.length); 314 LLVMValueRef i1cond; 315 316 assert(mask->break_mask); 317 318 /* 319 * Restore the cont_mask, but don't pop 320 */ 321 assert(mask->loop_stack_size); 322 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 323 lp_exec_mask_update(mask); 324 325 /* 326 * Unlike the continue mask, the break_mask must be preserved across loop 327 * iterations 328 */ 329 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 330 331 /* i1cond = (mask == 0) */ 332 i1cond = LLVMBuildICmp( 333 builder, 334 LLVMIntNE, 335 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), 336 LLVMConstNull(reg_type), ""); 337 338 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); 339 340 LLVMBuildCondBr(builder, 341 i1cond, mask->loop_block, endloop); 342 343 LLVMPositionBuilderAtEnd(builder, endloop); 344 345 assert(mask->loop_stack_size); 346 --mask->loop_stack_size; 347 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 348 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 349 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 350 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 351 352 lp_exec_mask_update(mask); 353} 354 355/* stores val into an address pointed to by dst. 356 * mask->exec_mask is used to figure out which bits of val 357 * should be stored into the address 358 * (0 means don't store this bit, 1 means do store). 359 */ 360static void lp_exec_mask_store(struct lp_exec_mask *mask, 361 LLVMValueRef pred, 362 LLVMValueRef val, 363 LLVMValueRef dst) 364{ 365 LLVMBuilderRef builder = mask->bld->gallivm->builder; 366 367 /* Mix the predicate and execution mask */ 368 if (mask->has_mask) { 369 if (pred) { 370 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 371 } else { 372 pred = mask->exec_mask; 373 } 374 } 375 376 if (pred) { 377 LLVMValueRef real_val, dst_val; 378 379 dst_val = LLVMBuildLoad(builder, dst, ""); 380 real_val = lp_build_select(mask->bld, 381 pred, 382 val, dst_val); 383 384 LLVMBuildStore(builder, real_val, dst); 385 } else 386 LLVMBuildStore(builder, val, dst); 387} 388 389static void lp_exec_mask_call(struct lp_exec_mask *mask, 390 int func, 391 int *pc) 392{ 393 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 394 mask->call_stack[mask->call_stack_size].pc = *pc; 395 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 396 mask->call_stack_size++; 397 *pc = func; 398} 399 400static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 401{ 402 LLVMBuilderRef builder = mask->bld->gallivm->builder; 403 LLVMValueRef exec_mask; 404 405 if (mask->call_stack_size == 0) { 406 /* returning from main() */ 407 *pc = -1; 408 return; 409 } 410 exec_mask = LLVMBuildNot(builder, 411 mask->exec_mask, 412 "ret"); 413 414 mask->ret_mask = LLVMBuildAnd(builder, 415 mask->ret_mask, 416 exec_mask, "ret_full"); 417 418 lp_exec_mask_update(mask); 419} 420 421static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 422{ 423} 424 425static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 426{ 427 assert(mask->call_stack_size); 428 mask->call_stack_size--; 429 *pc = mask->call_stack[mask->call_stack_size].pc; 430 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 431 lp_exec_mask_update(mask); 432} 433 434 435/** 436 * Return pointer to a temporary register channel (src or dest). 437 * Note that indirect addressing cannot be handled here. 438 * \param index which temporary register 439 * \param chan which channel of the temp register. 440 */ 441static LLVMValueRef 442get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 443 unsigned index, 444 unsigned chan) 445{ 446 LLVMBuilderRef builder = bld->base.gallivm->builder; 447 assert(chan < 4); 448 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 449 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan); 450 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, ""); 451 } 452 else { 453 return bld->temps[index][chan]; 454 } 455} 456 457/** 458 * Return pointer to a output register channel (src or dest). 459 * Note that indirect addressing cannot be handled here. 460 * \param index which output register 461 * \param chan which channel of the output register. 462 */ 463static LLVMValueRef 464get_output_ptr(struct lp_build_tgsi_soa_context *bld, 465 unsigned index, 466 unsigned chan) 467{ 468 LLVMBuilderRef builder = bld->base.gallivm->builder; 469 assert(chan < 4); 470 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 471 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, 472 index * 4 + chan); 473 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, ""); 474 } 475 else { 476 return bld->outputs[index][chan]; 477 } 478} 479 480/** 481 * Gather vector. 482 * XXX the lp_build_gather() function should be capable of doing this 483 * with a little work. 484 */ 485static LLVMValueRef 486build_gather(struct lp_build_tgsi_soa_context *bld, 487 LLVMValueRef base_ptr, 488 LLVMValueRef indexes) 489{ 490 LLVMBuilderRef builder = bld->base.gallivm->builder; 491 LLVMValueRef res = bld->base.undef; 492 unsigned i; 493 494 /* 495 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 496 */ 497 for (i = 0; i < bld->base.type.length; i++) { 498 LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i); 499 LLVMValueRef index = LLVMBuildExtractElement(builder, 500 indexes, ii, ""); 501 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, 502 &index, 1, "gather_ptr"); 503 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 504 505 res = LLVMBuildInsertElement(builder, res, scalar, ii, ""); 506 } 507 508 return res; 509} 510 511 512/** 513 * Scatter/store vector. 514 */ 515static void 516emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 517 LLVMValueRef base_ptr, 518 LLVMValueRef indexes, 519 LLVMValueRef values, 520 struct lp_exec_mask *mask, 521 LLVMValueRef pred) 522{ 523 struct gallivm_state *gallivm = bld->base.gallivm; 524 LLVMBuilderRef builder = gallivm->builder; 525 unsigned i; 526 527 /* Mix the predicate and execution mask */ 528 if (mask->has_mask) { 529 if (pred) { 530 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 531 } 532 else { 533 pred = mask->exec_mask; 534 } 535 } 536 537 /* 538 * Loop over elements of index_vec, store scalar value. 539 */ 540 for (i = 0; i < bld->base.type.length; i++) { 541 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 542 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 543 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 544 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 545 LLVMValueRef scalar_pred = pred ? 546 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 547 548 if (0) 549 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", 550 ii, val, index, scalar_ptr); 551 552 if (scalar_pred) { 553 LLVMValueRef real_val, dst_val; 554 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 555 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 556 LLVMBuildStore(builder, real_val, scalar_ptr); 557 } 558 else { 559 LLVMBuildStore(builder, val, scalar_ptr); 560 } 561 } 562} 563 564 565/** 566 * Read the current value of the ADDR register, convert the floats to 567 * ints, add the base index and return the vector of offsets. 568 * The offsets will be used to index into the constant buffer or 569 * temporary register file. 570 */ 571static LLVMValueRef 572get_indirect_index(struct lp_build_tgsi_soa_context *bld, 573 unsigned reg_file, unsigned reg_index, 574 const struct tgsi_src_register *indirect_reg) 575{ 576 LLVMBuilderRef builder = bld->base.gallivm->builder; 577 struct lp_build_context *uint_bld = &bld->uint_bld; 578 /* always use X component of address register */ 579 unsigned swizzle = indirect_reg->SwizzleX; 580 LLVMValueRef base; 581 LLVMValueRef rel; 582 LLVMValueRef max_index; 583 LLVMValueRef index; 584 585 assert(bld->indirect_files & (1 << reg_file)); 586 587 base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index); 588 589 assert(swizzle < 4); 590 rel = LLVMBuildLoad(builder, 591 bld->addr[indirect_reg->Index][swizzle], 592 "load addr reg"); 593 594 /* for indexing we want integers */ 595 rel = LLVMBuildFPToSI(builder, 596 rel, 597 uint_bld->vec_type, ""); 598 599 index = lp_build_add(uint_bld, base, rel); 600 601 max_index = lp_build_const_int_vec(bld->base.gallivm, 602 uint_bld->type, 603 bld->info->file_max[reg_file]); 604 605 assert(!uint_bld->type.sign); 606 index = lp_build_min(uint_bld, index, max_index); 607 608 return index; 609} 610 611 612/** 613 * Register fetch. 614 */ 615static LLVMValueRef 616emit_fetch( 617 struct lp_build_tgsi_soa_context *bld, 618 const struct tgsi_full_instruction *inst, 619 unsigned src_op, 620 const unsigned chan_index ) 621{ 622 struct gallivm_state *gallivm = bld->base.gallivm; 623 LLVMBuilderRef builder = gallivm->builder; 624 struct lp_build_context *uint_bld = &bld->uint_bld; 625 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 626 const unsigned swizzle = 627 tgsi_util_get_full_src_register_swizzle(reg, chan_index); 628 LLVMValueRef res; 629 LLVMValueRef indirect_index = NULL; 630 631 if (swizzle > 3) { 632 assert(0 && "invalid swizzle in emit_fetch()"); 633 return bld->base.undef; 634 } 635 636 if (reg->Register.Indirect) { 637 indirect_index = get_indirect_index(bld, 638 reg->Register.File, 639 reg->Register.Index, 640 ®->Indirect); 641 } else { 642 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 643 } 644 645 switch (reg->Register.File) { 646 case TGSI_FILE_CONSTANT: 647 if (reg->Register.Indirect) { 648 LLVMValueRef swizzle_vec = 649 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); 650 LLVMValueRef index_vec; /* index into the const buffer */ 651 652 /* index_vec = indirect_index * 4 + swizzle */ 653 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 654 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 655 656 /* Gather values from the constant buffer */ 657 res = build_gather(bld, bld->consts_ptr, index_vec); 658 } 659 else { 660 LLVMValueRef index; /* index into the const buffer */ 661 LLVMValueRef scalar, scalar_ptr; 662 663 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle); 664 665 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, 666 &index, 1, ""); 667 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 668 669 res = lp_build_broadcast_scalar(&bld->base, scalar); 670 } 671 break; 672 673 case TGSI_FILE_IMMEDIATE: 674 res = bld->immediates[reg->Register.Index][swizzle]; 675 assert(res); 676 break; 677 678 case TGSI_FILE_INPUT: 679 if (reg->Register.Indirect) { 680 LLVMValueRef swizzle_vec = 681 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); 682 LLVMValueRef length_vec = 683 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); 684 LLVMValueRef index_vec; /* index into the const buffer */ 685 LLVMValueRef inputs_array; 686 LLVMTypeRef float4_ptr_type; 687 688 /* index_vec = (indirect_index * 4 + swizzle) * length */ 689 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 690 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 691 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 692 693 /* cast inputs_array pointer to float* */ 694 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 695 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, 696 float4_ptr_type, ""); 697 698 /* Gather values from the temporary register array */ 699 res = build_gather(bld, inputs_array, index_vec); 700 } else { 701 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 702 LLVMValueRef lindex = lp_build_const_int32(gallivm, 703 reg->Register.Index * 4 + swizzle); 704 LLVMValueRef input_ptr = LLVMBuildGEP(builder, 705 bld->inputs_array, &lindex, 1, ""); 706 res = LLVMBuildLoad(builder, input_ptr, ""); 707 } 708 else { 709 res = bld->inputs[reg->Register.Index][swizzle]; 710 } 711 } 712 assert(res); 713 break; 714 715 case TGSI_FILE_TEMPORARY: 716 if (reg->Register.Indirect) { 717 LLVMValueRef swizzle_vec = 718 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); 719 LLVMValueRef length_vec = 720 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, 721 bld->base.type.length); 722 LLVMValueRef index_vec; /* index into the const buffer */ 723 LLVMValueRef temps_array; 724 LLVMTypeRef float4_ptr_type; 725 726 /* index_vec = (indirect_index * 4 + swizzle) * length */ 727 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 728 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 729 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 730 731 /* cast temps_array pointer to float* */ 732 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0); 733 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 734 float4_ptr_type, ""); 735 736 /* Gather values from the temporary register array */ 737 res = build_gather(bld, temps_array, index_vec); 738 } 739 else { 740 LLVMValueRef temp_ptr; 741 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); 742 res = LLVMBuildLoad(builder, temp_ptr, ""); 743 if (!res) 744 return bld->base.undef; 745 } 746 break; 747 748 case TGSI_FILE_SYSTEM_VALUE: 749 assert(!reg->Register.Indirect); 750 { 751 LLVMValueRef index; /* index into the system value array */ 752 LLVMValueRef scalar, scalar_ptr; 753 754 index = lp_build_const_int32(gallivm, 755 reg->Register.Index * 4 + swizzle); 756 757 scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, 758 &index, 1, ""); 759 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 760 761 res = lp_build_broadcast_scalar(&bld->base, scalar); 762 } 763 break; 764 765 default: 766 assert(0 && "invalid src register in emit_fetch()"); 767 return bld->base.undef; 768 } 769 770 if (reg->Register.Absolute) { 771 res = lp_build_abs( &bld->base, res ); 772 } 773 774 if (reg->Register.Negate) { 775 res = lp_build_negate( &bld->base, res ); 776 } 777 778 return res; 779} 780 781 782/** 783 * Register fetch with derivatives. 784 */ 785static void 786emit_fetch_deriv( 787 struct lp_build_tgsi_soa_context *bld, 788 const struct tgsi_full_instruction *inst, 789 unsigned index, 790 const unsigned chan_index, 791 LLVMValueRef *res, 792 LLVMValueRef *ddx, 793 LLVMValueRef *ddy) 794{ 795 LLVMValueRef src; 796 797 src = emit_fetch(bld, inst, index, chan_index); 798 799 if(res) 800 *res = src; 801 802 /* TODO: use interpolation coeffs for inputs */ 803 804 if(ddx) 805 *ddx = lp_build_ddx(&bld->base, src); 806 807 if(ddy) 808 *ddy = lp_build_ddy(&bld->base, src); 809} 810 811 812/** 813 * Predicate. 814 */ 815static void 816emit_fetch_predicate( 817 struct lp_build_tgsi_soa_context *bld, 818 const struct tgsi_full_instruction *inst, 819 LLVMValueRef *pred) 820{ 821 LLVMBuilderRef builder = bld->base.gallivm->builder; 822 unsigned index; 823 unsigned char swizzles[4]; 824 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 825 LLVMValueRef value; 826 unsigned chan; 827 828 if (!inst->Instruction.Predicate) { 829 TGSI_FOR_EACH_CHANNEL( chan ) { 830 pred[chan] = NULL; 831 } 832 return; 833 } 834 835 swizzles[0] = inst->Predicate.SwizzleX; 836 swizzles[1] = inst->Predicate.SwizzleY; 837 swizzles[2] = inst->Predicate.SwizzleZ; 838 swizzles[3] = inst->Predicate.SwizzleW; 839 840 index = inst->Predicate.Index; 841 assert(index < LP_MAX_TGSI_PREDS); 842 843 TGSI_FOR_EACH_CHANNEL( chan ) { 844 unsigned swizzle = swizzles[chan]; 845 846 /* 847 * Only fetch the predicate register channels that are actually listed 848 * in the swizzles 849 */ 850 if (!unswizzled[swizzle]) { 851 value = LLVMBuildLoad(builder, 852 bld->preds[index][swizzle], ""); 853 854 /* 855 * Convert the value to an integer mask. 856 * 857 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 858 * is needlessly causing two comparisons due to storing the intermediate 859 * result as float vector instead of an integer mask vector. 860 */ 861 value = lp_build_compare(bld->base.gallivm, 862 bld->base.type, 863 PIPE_FUNC_NOTEQUAL, 864 value, 865 bld->base.zero); 866 if (inst->Predicate.Negate) { 867 value = LLVMBuildNot(builder, value, ""); 868 } 869 870 unswizzled[swizzle] = value; 871 } else { 872 value = unswizzled[swizzle]; 873 } 874 875 pred[chan] = value; 876 } 877} 878 879 880/** 881 * Register store. 882 */ 883static void 884emit_store( 885 struct lp_build_tgsi_soa_context *bld, 886 const struct tgsi_full_instruction *inst, 887 unsigned index, 888 unsigned chan_index, 889 LLVMValueRef pred, 890 LLVMValueRef value) 891{ 892 struct gallivm_state *gallivm = bld->base.gallivm; 893 LLVMBuilderRef builder = gallivm->builder; 894 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 895 struct lp_build_context *uint_bld = &bld->uint_bld; 896 LLVMValueRef indirect_index = NULL; 897 898 switch( inst->Instruction.Saturate ) { 899 case TGSI_SAT_NONE: 900 break; 901 902 case TGSI_SAT_ZERO_ONE: 903 value = lp_build_max(&bld->base, value, bld->base.zero); 904 value = lp_build_min(&bld->base, value, bld->base.one); 905 break; 906 907 case TGSI_SAT_MINUS_PLUS_ONE: 908 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0)); 909 value = lp_build_min(&bld->base, value, bld->base.one); 910 break; 911 912 default: 913 assert(0); 914 } 915 916 if (reg->Register.Indirect) { 917 indirect_index = get_indirect_index(bld, 918 reg->Register.File, 919 reg->Register.Index, 920 ®->Indirect); 921 } else { 922 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 923 } 924 925 switch( reg->Register.File ) { 926 case TGSI_FILE_OUTPUT: 927 if (reg->Register.Indirect) { 928 LLVMValueRef chan_vec = 929 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 930 LLVMValueRef length_vec = 931 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); 932 LLVMValueRef index_vec; /* indexes into the temp registers */ 933 LLVMValueRef outputs_array; 934 LLVMValueRef pixel_offsets; 935 LLVMTypeRef float_ptr_type; 936 int i; 937 938 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 939 pixel_offsets = uint_bld->undef; 940 for (i = 0; i < bld->base.type.length; i++) { 941 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 942 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 943 ii, ii, ""); 944 } 945 946 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 947 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 948 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 949 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 950 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 951 952 float_ptr_type = 953 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 954 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, 955 float_ptr_type, ""); 956 957 /* Scatter store values into temp registers */ 958 emit_mask_scatter(bld, outputs_array, index_vec, value, 959 &bld->exec_mask, pred); 960 } 961 else { 962 LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index, 963 chan_index); 964 lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr); 965 } 966 break; 967 968 case TGSI_FILE_TEMPORARY: 969 if (reg->Register.Indirect) { 970 LLVMValueRef chan_vec = 971 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 972 LLVMValueRef length_vec = 973 lp_build_const_int_vec(gallivm, uint_bld->type, 974 bld->base.type.length); 975 LLVMValueRef index_vec; /* indexes into the temp registers */ 976 LLVMValueRef temps_array; 977 LLVMValueRef pixel_offsets; 978 LLVMTypeRef float_ptr_type; 979 int i; 980 981 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 982 pixel_offsets = uint_bld->undef; 983 for (i = 0; i < bld->base.type.length; i++) { 984 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 985 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 986 ii, ii, ""); 987 } 988 989 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 990 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 991 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 992 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 993 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 994 995 float_ptr_type = 996 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 997 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 998 float_ptr_type, ""); 999 1000 /* Scatter store values into temp registers */ 1001 emit_mask_scatter(bld, temps_array, index_vec, value, 1002 &bld->exec_mask, pred); 1003 } 1004 else { 1005 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 1006 chan_index); 1007 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); 1008 } 1009 break; 1010 1011 case TGSI_FILE_ADDRESS: 1012 lp_exec_mask_store(&bld->exec_mask, pred, value, 1013 bld->addr[reg->Register.Index][chan_index]); 1014 break; 1015 1016 case TGSI_FILE_PREDICATE: 1017 lp_exec_mask_store(&bld->exec_mask, pred, value, 1018 bld->preds[reg->Register.Index][chan_index]); 1019 break; 1020 1021 default: 1022 assert( 0 ); 1023 } 1024} 1025 1026 1027/** 1028 * High-level instruction translators. 1029 */ 1030 1031static void 1032emit_tex( struct lp_build_tgsi_soa_context *bld, 1033 const struct tgsi_full_instruction *inst, 1034 enum lp_build_tex_modifier modifier, 1035 LLVMValueRef *texel) 1036{ 1037 LLVMBuilderRef builder = bld->base.gallivm->builder; 1038 unsigned unit; 1039 LLVMValueRef lod_bias, explicit_lod; 1040 LLVMValueRef oow = NULL; 1041 LLVMValueRef coords[3]; 1042 LLVMValueRef ddx[3]; 1043 LLVMValueRef ddy[3]; 1044 unsigned num_coords; 1045 unsigned i; 1046 1047 if (!bld->sampler) { 1048 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 1049 for (i = 0; i < 4; i++) { 1050 texel[i] = bld->base.undef; 1051 } 1052 return; 1053 } 1054 1055 switch (inst->Texture.Texture) { 1056 case TGSI_TEXTURE_1D: 1057 num_coords = 1; 1058 break; 1059 case TGSI_TEXTURE_1D_ARRAY: 1060 case TGSI_TEXTURE_2D: 1061 case TGSI_TEXTURE_RECT: 1062 num_coords = 2; 1063 break; 1064 case TGSI_TEXTURE_SHADOW1D: 1065 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1066 case TGSI_TEXTURE_SHADOW2D: 1067 case TGSI_TEXTURE_SHADOWRECT: 1068 case TGSI_TEXTURE_2D_ARRAY: 1069 case TGSI_TEXTURE_3D: 1070 case TGSI_TEXTURE_CUBE: 1071 num_coords = 3; 1072 break; 1073 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1074 num_coords = 4; 1075 break; 1076 default: 1077 assert(0); 1078 return; 1079 } 1080 1081 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 1082 lod_bias = emit_fetch( bld, inst, 0, 3 ); 1083 explicit_lod = NULL; 1084 } 1085 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 1086 lod_bias = NULL; 1087 explicit_lod = emit_fetch( bld, inst, 0, 3 ); 1088 } 1089 else { 1090 lod_bias = NULL; 1091 explicit_lod = NULL; 1092 } 1093 1094 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 1095 oow = emit_fetch( bld, inst, 0, 3 ); 1096 oow = lp_build_rcp(&bld->base, oow); 1097 } 1098 1099 for (i = 0; i < num_coords; i++) { 1100 coords[i] = emit_fetch( bld, inst, 0, i ); 1101 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 1102 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 1103 } 1104 for (i = num_coords; i < 3; i++) { 1105 coords[i] = bld->base.undef; 1106 } 1107 1108 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 1109 LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0); 1110 for (i = 0; i < num_coords; i++) { 1111 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); 1112 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); 1113 ddx[i] = LLVMBuildExtractElement(builder, src1, index0, ""); 1114 ddy[i] = LLVMBuildExtractElement(builder, src2, index0, ""); 1115 } 1116 unit = inst->Src[3].Register.Index; 1117 } else { 1118 for (i = 0; i < num_coords; i++) { 1119 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); 1120 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); 1121 } 1122 unit = inst->Src[1].Register.Index; 1123 } 1124 for (i = num_coords; i < 3; i++) { 1125 ddx[i] = LLVMGetUndef(bld->base.elem_type); 1126 ddy[i] = LLVMGetUndef(bld->base.elem_type); 1127 } 1128 1129 bld->sampler->emit_fetch_texel(bld->sampler, 1130 bld->base.gallivm, 1131 bld->base.type, 1132 unit, num_coords, coords, 1133 ddx, ddy, 1134 lod_bias, explicit_lod, 1135 texel); 1136} 1137 1138static boolean 1139near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 1140 int pc) 1141{ 1142 int i; 1143 1144 for (i = 0; i < 5; i++) { 1145 unsigned opcode; 1146 1147 if (pc + i >= bld->info->num_instructions) 1148 return TRUE; 1149 1150 opcode = bld->instructions[pc + i].Instruction.Opcode; 1151 1152 if (opcode == TGSI_OPCODE_END) 1153 return TRUE; 1154 1155 if (opcode == TGSI_OPCODE_TEX || 1156 opcode == TGSI_OPCODE_TXP || 1157 opcode == TGSI_OPCODE_TXD || 1158 opcode == TGSI_OPCODE_TXB || 1159 opcode == TGSI_OPCODE_TXL || 1160 opcode == TGSI_OPCODE_TXF || 1161 opcode == TGSI_OPCODE_TXQ || 1162 opcode == TGSI_OPCODE_CAL || 1163 opcode == TGSI_OPCODE_CALLNZ || 1164 opcode == TGSI_OPCODE_IF || 1165 opcode == TGSI_OPCODE_IFC || 1166 opcode == TGSI_OPCODE_BGNLOOP || 1167 opcode == TGSI_OPCODE_SWITCH) 1168 return FALSE; 1169 } 1170 1171 return TRUE; 1172} 1173 1174 1175 1176/** 1177 * Kill fragment if any of the src register values are negative. 1178 */ 1179static void 1180emit_kil( 1181 struct lp_build_tgsi_soa_context *bld, 1182 const struct tgsi_full_instruction *inst, 1183 int pc) 1184{ 1185 LLVMBuilderRef builder = bld->base.gallivm->builder; 1186 const struct tgsi_full_src_register *reg = &inst->Src[0]; 1187 LLVMValueRef terms[NUM_CHANNELS]; 1188 LLVMValueRef mask; 1189 unsigned chan_index; 1190 1191 memset(&terms, 0, sizeof terms); 1192 1193 TGSI_FOR_EACH_CHANNEL( chan_index ) { 1194 unsigned swizzle; 1195 1196 /* Unswizzle channel */ 1197 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1198 1199 /* Check if the component has not been already tested. */ 1200 assert(swizzle < NUM_CHANNELS); 1201 if( !terms[swizzle] ) 1202 /* TODO: change the comparison operator instead of setting the sign */ 1203 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 1204 } 1205 1206 mask = NULL; 1207 TGSI_FOR_EACH_CHANNEL( chan_index ) { 1208 if(terms[chan_index]) { 1209 LLVMValueRef chan_mask; 1210 1211 /* 1212 * If term < 0 then mask = 0 else mask = ~0. 1213 */ 1214 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 1215 1216 if(mask) 1217 mask = LLVMBuildAnd(builder, mask, chan_mask, ""); 1218 else 1219 mask = chan_mask; 1220 } 1221 } 1222 1223 if(mask) { 1224 lp_build_mask_update(bld->mask, mask); 1225 1226 if (!near_end_of_shader(bld, pc)) 1227 lp_build_mask_check(bld->mask); 1228 } 1229} 1230 1231 1232/** 1233 * Predicated fragment kill. 1234 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 1235 * The only predication is the execution mask which will apply if 1236 * we're inside a loop or conditional. 1237 */ 1238static void 1239emit_kilp(struct lp_build_tgsi_soa_context *bld, 1240 const struct tgsi_full_instruction *inst, 1241 int pc) 1242{ 1243 LLVMBuilderRef builder = bld->base.gallivm->builder; 1244 LLVMValueRef mask; 1245 1246 /* For those channels which are "alive", disable fragment shader 1247 * execution. 1248 */ 1249 if (bld->exec_mask.has_mask) { 1250 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 1251 } 1252 else { 1253 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type); 1254 mask = zero; 1255 } 1256 1257 lp_build_mask_update(bld->mask, mask); 1258 1259 if (!near_end_of_shader(bld, pc)) 1260 lp_build_mask_check(bld->mask); 1261} 1262 1263 1264/** 1265 * Emit code which will dump the value of all the temporary registers 1266 * to stdout. 1267 */ 1268static void 1269emit_dump_temps(struct lp_build_tgsi_soa_context *bld) 1270{ 1271 struct gallivm_state *gallivm = bld->base.gallivm; 1272 LLVMBuilderRef builder = gallivm->builder; 1273 LLVMValueRef temp_ptr; 1274 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); 1275 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1); 1276 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); 1277 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); 1278 int index; 1279 int n = bld->info->file_max[TGSI_FILE_TEMPORARY]; 1280 1281 for (index = 0; index < n; index++) { 1282 LLVMValueRef idx = lp_build_const_int32(gallivm, index); 1283 LLVMValueRef v[4][4], res; 1284 int chan; 1285 1286 lp_build_printf(gallivm, "TEMP[%d]:\n", idx); 1287 1288 for (chan = 0; chan < 4; chan++) { 1289 temp_ptr = get_temp_ptr(bld, index, chan); 1290 res = LLVMBuildLoad(builder, temp_ptr, ""); 1291 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); 1292 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); 1293 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); 1294 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); 1295 } 1296 1297 lp_build_printf(gallivm, " X: %f %f %f %f\n", 1298 v[0][0], v[0][1], v[0][2], v[0][3]); 1299 lp_build_printf(gallivm, " Y: %f %f %f %f\n", 1300 v[1][0], v[1][1], v[1][2], v[1][3]); 1301 lp_build_printf(gallivm, " Z: %f %f %f %f\n", 1302 v[2][0], v[2][1], v[2][2], v[2][3]); 1303 lp_build_printf(gallivm, " W: %f %f %f %f\n", 1304 v[3][0], v[3][1], v[3][2], v[3][3]); 1305 } 1306} 1307 1308 1309 1310static void 1311emit_declaration( 1312 struct lp_build_tgsi_soa_context *bld, 1313 const struct tgsi_full_declaration *decl) 1314{ 1315 struct gallivm_state *gallivm = bld->base.gallivm; 1316 LLVMTypeRef vec_type = bld->base.vec_type; 1317 const unsigned first = decl->Range.First; 1318 const unsigned last = decl->Range.Last; 1319 unsigned idx, i; 1320 1321 for (idx = first; idx <= last; ++idx) { 1322 assert(last <= bld->info->file_max[decl->Declaration.File]); 1323 switch (decl->Declaration.File) { 1324 case TGSI_FILE_TEMPORARY: 1325 assert(idx < LP_MAX_TGSI_TEMPS); 1326 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 1327 for (i = 0; i < NUM_CHANNELS; i++) 1328 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); 1329 } 1330 break; 1331 1332 case TGSI_FILE_OUTPUT: 1333 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 1334 for (i = 0; i < NUM_CHANNELS; i++) 1335 bld->outputs[idx][i] = lp_build_alloca(gallivm, 1336 vec_type, "output"); 1337 } 1338 break; 1339 1340 case TGSI_FILE_ADDRESS: 1341 assert(idx < LP_MAX_TGSI_ADDRS); 1342 for (i = 0; i < NUM_CHANNELS; i++) 1343 bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr"); 1344 break; 1345 1346 case TGSI_FILE_PREDICATE: 1347 assert(idx < LP_MAX_TGSI_PREDS); 1348 for (i = 0; i < NUM_CHANNELS; i++) 1349 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, 1350 "predicate"); 1351 break; 1352 1353 default: 1354 /* don't need to declare other vars */ 1355 break; 1356 } 1357 } 1358} 1359 1360 1361/** 1362 * Emit LLVM for one TGSI instruction. 1363 * \param return TRUE for success, FALSE otherwise 1364 */ 1365static boolean 1366emit_instruction( 1367 struct lp_build_tgsi_soa_context *bld, 1368 const struct tgsi_full_instruction *inst, 1369 const struct tgsi_opcode_info *info, 1370 int *pc) 1371{ 1372 unsigned chan_index; 1373 LLVMValueRef src0, src1, src2; 1374 LLVMValueRef tmp0, tmp1, tmp2; 1375 LLVMValueRef tmp3 = NULL; 1376 LLVMValueRef tmp4 = NULL; 1377 LLVMValueRef tmp5 = NULL; 1378 LLVMValueRef tmp6 = NULL; 1379 LLVMValueRef tmp7 = NULL; 1380 LLVMValueRef res; 1381 LLVMValueRef dst0[NUM_CHANNELS]; 1382 1383 /* 1384 * Stores and write masks are handled in a general fashion after the long 1385 * instruction opcode switch statement. 1386 * 1387 * Although not stricitly necessary, we avoid generating instructions for 1388 * channels which won't be stored, in cases where's that easy. For some 1389 * complex instructions, like texture sampling, it is more convenient to 1390 * assume a full writemask and then let LLVM optimization passes eliminate 1391 * redundant code. 1392 */ 1393 1394 (*pc)++; 1395 1396 assert(info->num_dst <= 1); 1397 if (info->num_dst) { 1398 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1399 dst0[chan_index] = bld->base.undef; 1400 } 1401 } 1402 1403 switch (inst->Instruction.Opcode) { 1404 case TGSI_OPCODE_ARL: 1405 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1406 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1407 tmp0 = lp_build_floor(&bld->base, tmp0); 1408 dst0[chan_index] = tmp0; 1409 } 1410 break; 1411 1412 case TGSI_OPCODE_MOV: 1413 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1414 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 1415 } 1416 break; 1417 1418 case TGSI_OPCODE_LIT: 1419 if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) ) { 1420 dst0[TGSI_CHAN_X] = bld->base.one; 1421 } 1422 if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) { 1423 src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1424 dst0[TGSI_CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 1425 } 1426 if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) { 1427 /* XMM[1] = SrcReg[0].yyyy */ 1428 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); 1429 /* XMM[1] = max(XMM[1], 0) */ 1430 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 1431 /* XMM[2] = SrcReg[0].wwww */ 1432 tmp2 = emit_fetch( bld, inst, 0, TGSI_CHAN_W ); 1433 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 1434 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1435 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 1436 dst0[TGSI_CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 1437 } 1438 if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) ) { 1439 dst0[TGSI_CHAN_W] = bld->base.one; 1440 } 1441 break; 1442 1443 case TGSI_OPCODE_RCP: 1444 /* TGSI_OPCODE_RECIP */ 1445 src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1446 res = lp_build_rcp(&bld->base, src0); 1447 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1448 dst0[chan_index] = res; 1449 } 1450 break; 1451 1452 case TGSI_OPCODE_RSQ: 1453 /* TGSI_OPCODE_RECIPSQRT */ 1454 src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1455 src0 = lp_build_abs(&bld->base, src0); 1456 res = lp_build_rsqrt(&bld->base, src0); 1457 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1458 dst0[chan_index] = res; 1459 } 1460 break; 1461 1462 case TGSI_OPCODE_EXP: 1463 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) || 1464 TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) || 1465 TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) { 1466 LLVMValueRef *p_exp2_int_part = NULL; 1467 LLVMValueRef *p_frac_part = NULL; 1468 LLVMValueRef *p_exp2 = NULL; 1469 1470 src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1471 1472 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X )) 1473 p_exp2_int_part = &tmp0; 1474 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) 1475 p_frac_part = &tmp1; 1476 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) 1477 p_exp2 = &tmp2; 1478 1479 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 1480 1481 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X )) 1482 dst0[TGSI_CHAN_X] = tmp0; 1483 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) 1484 dst0[TGSI_CHAN_Y] = tmp1; 1485 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) 1486 dst0[TGSI_CHAN_Z] = tmp2; 1487 } 1488 /* dst.w = 1.0 */ 1489 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) { 1490 dst0[TGSI_CHAN_W] = bld->base.one; 1491 } 1492 break; 1493 1494 case TGSI_OPCODE_LOG: 1495 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) || 1496 TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) || 1497 TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) { 1498 LLVMValueRef *p_floor_log2 = NULL; 1499 LLVMValueRef *p_exp = NULL; 1500 LLVMValueRef *p_log2 = NULL; 1501 1502 src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1503 src0 = lp_build_abs( &bld->base, src0 ); 1504 1505 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X )) 1506 p_floor_log2 = &tmp0; 1507 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) 1508 p_exp = &tmp1; 1509 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) 1510 p_log2 = &tmp2; 1511 1512 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 1513 1514 /* dst.x = floor(lg2(abs(src.x))) */ 1515 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X )) 1516 dst0[TGSI_CHAN_X] = tmp0; 1517 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 1518 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y )) { 1519 dst0[TGSI_CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 1520 } 1521 /* dst.z = lg2(abs(src.x)) */ 1522 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z )) 1523 dst0[TGSI_CHAN_Z] = tmp2; 1524 } 1525 /* dst.w = 1.0 */ 1526 if (TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W )) { 1527 dst0[TGSI_CHAN_W] = bld->base.one; 1528 } 1529 break; 1530 1531 case TGSI_OPCODE_MUL: 1532 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1533 src0 = emit_fetch( bld, inst, 0, chan_index ); 1534 src1 = emit_fetch( bld, inst, 1, chan_index ); 1535 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 1536 } 1537 break; 1538 1539 case TGSI_OPCODE_ADD: 1540 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1541 src0 = emit_fetch( bld, inst, 0, chan_index ); 1542 src1 = emit_fetch( bld, inst, 1, chan_index ); 1543 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 1544 } 1545 break; 1546 1547 case TGSI_OPCODE_DP3: 1548 /* TGSI_OPCODE_DOT3 */ 1549 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1550 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); 1551 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1552 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); 1553 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); 1554 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1555 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1556 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z ); 1557 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z ); 1558 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1559 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1560 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1561 dst0[chan_index] = tmp0; 1562 } 1563 break; 1564 1565 case TGSI_OPCODE_DP4: 1566 /* TGSI_OPCODE_DOT4 */ 1567 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1568 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); 1569 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1570 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); 1571 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); 1572 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1573 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1574 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z ); 1575 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z ); 1576 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1577 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1578 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_W ); 1579 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_W ); 1580 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1581 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1582 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1583 dst0[chan_index] = tmp0; 1584 } 1585 break; 1586 1587 case TGSI_OPCODE_DST: 1588 TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) { 1589 dst0[TGSI_CHAN_X] = bld->base.one; 1590 } 1591 TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) { 1592 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); 1593 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); 1594 dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1595 } 1596 TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) { 1597 dst0[TGSI_CHAN_Z] = emit_fetch( bld, inst, 0, TGSI_CHAN_Z ); 1598 } 1599 TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) { 1600 dst0[TGSI_CHAN_W] = emit_fetch( bld, inst, 1, TGSI_CHAN_W ); 1601 } 1602 break; 1603 1604 case TGSI_OPCODE_MIN: 1605 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1606 src0 = emit_fetch( bld, inst, 0, chan_index ); 1607 src1 = emit_fetch( bld, inst, 1, chan_index ); 1608 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1609 } 1610 break; 1611 1612 case TGSI_OPCODE_MAX: 1613 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1614 src0 = emit_fetch( bld, inst, 0, chan_index ); 1615 src1 = emit_fetch( bld, inst, 1, chan_index ); 1616 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1617 } 1618 break; 1619 1620 case TGSI_OPCODE_SLT: 1621 /* TGSI_OPCODE_SETLT */ 1622 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1623 src0 = emit_fetch( bld, inst, 0, chan_index ); 1624 src1 = emit_fetch( bld, inst, 1, chan_index ); 1625 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1626 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1627 } 1628 break; 1629 1630 case TGSI_OPCODE_SGE: 1631 /* TGSI_OPCODE_SETGE */ 1632 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1633 src0 = emit_fetch( bld, inst, 0, chan_index ); 1634 src1 = emit_fetch( bld, inst, 1, chan_index ); 1635 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1636 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1637 } 1638 break; 1639 1640 case TGSI_OPCODE_MAD: 1641 /* TGSI_OPCODE_MADD */ 1642 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1643 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1644 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1645 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1646 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1647 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1648 dst0[chan_index] = tmp0; 1649 } 1650 break; 1651 1652 case TGSI_OPCODE_SUB: 1653 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1654 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1655 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1656 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1657 } 1658 break; 1659 1660 case TGSI_OPCODE_LRP: 1661 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1662 src0 = emit_fetch( bld, inst, 0, chan_index ); 1663 src1 = emit_fetch( bld, inst, 1, chan_index ); 1664 src2 = emit_fetch( bld, inst, 2, chan_index ); 1665 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1666 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1667 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1668 } 1669 break; 1670 1671 case TGSI_OPCODE_CND: 1672 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1673 src0 = emit_fetch( bld, inst, 0, chan_index ); 1674 src1 = emit_fetch( bld, inst, 1, chan_index ); 1675 src2 = emit_fetch( bld, inst, 2, chan_index ); 1676 tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5); 1677 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1678 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1679 } 1680 break; 1681 1682 case TGSI_OPCODE_DP2A: 1683 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); /* xmm0 = src[0].x */ 1684 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); /* xmm1 = src[1].x */ 1685 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1686 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); /* xmm1 = src[0].y */ 1687 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); /* xmm2 = src[1].y */ 1688 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1689 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1690 tmp1 = emit_fetch( bld, inst, 2, TGSI_CHAN_X ); /* xmm1 = src[2].x */ 1691 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1692 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1693 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1694 } 1695 break; 1696 1697 case TGSI_OPCODE_FRC: 1698 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1699 src0 = emit_fetch( bld, inst, 0, chan_index ); 1700 tmp0 = lp_build_floor(&bld->base, src0); 1701 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1702 dst0[chan_index] = tmp0; 1703 } 1704 break; 1705 1706 case TGSI_OPCODE_CLAMP: 1707 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1708 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1709 src1 = emit_fetch( bld, inst, 1, chan_index ); 1710 src2 = emit_fetch( bld, inst, 2, chan_index ); 1711 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1712 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1713 dst0[chan_index] = tmp0; 1714 } 1715 break; 1716 1717 case TGSI_OPCODE_FLR: 1718 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1719 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1720 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1721 } 1722 break; 1723 1724 case TGSI_OPCODE_ROUND: 1725 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1726 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1727 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1728 } 1729 break; 1730 1731 case TGSI_OPCODE_EX2: { 1732 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1733 tmp0 = lp_build_exp2( &bld->base, tmp0); 1734 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1735 dst0[chan_index] = tmp0; 1736 } 1737 break; 1738 } 1739 1740 case TGSI_OPCODE_LG2: 1741 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1742 tmp0 = lp_build_log2( &bld->base, tmp0); 1743 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1744 dst0[chan_index] = tmp0; 1745 } 1746 break; 1747 1748 case TGSI_OPCODE_POW: 1749 src0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1750 src1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); 1751 res = lp_build_pow( &bld->base, src0, src1 ); 1752 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1753 dst0[chan_index] = res; 1754 } 1755 break; 1756 1757 case TGSI_OPCODE_XPD: 1758 if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) || 1759 TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) ) { 1760 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z ); 1761 tmp3 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z ); 1762 } 1763 if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) || 1764 TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) { 1765 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); 1766 tmp4 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); 1767 } 1768 TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) { 1769 tmp2 = tmp0; 1770 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1771 tmp5 = tmp3; 1772 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1773 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1774 dst0[TGSI_CHAN_X] = tmp2; 1775 } 1776 if(TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) || 1777 TGSI_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) ) { 1778 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); 1779 tmp5 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1780 } 1781 TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) { 1782 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1783 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1784 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1785 dst0[TGSI_CHAN_Y] = tmp3; 1786 } 1787 TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) { 1788 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1789 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1790 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1791 dst0[TGSI_CHAN_Z] = tmp5; 1792 } 1793 TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) { 1794 dst0[TGSI_CHAN_W] = bld->base.one; 1795 } 1796 break; 1797 1798 case TGSI_OPCODE_ABS: 1799 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1800 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1801 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1802 } 1803 break; 1804 1805 case TGSI_OPCODE_RCC: 1806 /* deprecated? */ 1807 assert(0); 1808 return FALSE; 1809 1810 case TGSI_OPCODE_DPH: 1811 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1812 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); 1813 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1814 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); 1815 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); 1816 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1817 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1818 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Z ); 1819 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Z ); 1820 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1821 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1822 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_W ); 1823 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1824 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1825 dst0[chan_index] = tmp0; 1826 } 1827 break; 1828 1829 case TGSI_OPCODE_COS: 1830 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1831 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1832 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1833 dst0[chan_index] = tmp0; 1834 } 1835 break; 1836 1837 case TGSI_OPCODE_DDX: 1838 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1839 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1840 } 1841 break; 1842 1843 case TGSI_OPCODE_DDY: 1844 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1845 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1846 } 1847 break; 1848 1849 case TGSI_OPCODE_KILP: 1850 /* predicated kill */ 1851 emit_kilp( bld, inst, (*pc)-1 ); 1852 break; 1853 1854 case TGSI_OPCODE_KIL: 1855 /* conditional kill */ 1856 emit_kil( bld, inst, (*pc)-1 ); 1857 break; 1858 1859 case TGSI_OPCODE_PK2H: 1860 return FALSE; 1861 break; 1862 1863 case TGSI_OPCODE_PK2US: 1864 return FALSE; 1865 break; 1866 1867 case TGSI_OPCODE_PK4B: 1868 return FALSE; 1869 break; 1870 1871 case TGSI_OPCODE_PK4UB: 1872 return FALSE; 1873 break; 1874 1875 case TGSI_OPCODE_RFL: 1876 return FALSE; 1877 break; 1878 1879 case TGSI_OPCODE_SEQ: 1880 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1881 src0 = emit_fetch( bld, inst, 0, chan_index ); 1882 src1 = emit_fetch( bld, inst, 1, chan_index ); 1883 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1884 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1885 } 1886 break; 1887 1888 case TGSI_OPCODE_SFL: 1889 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1890 dst0[chan_index] = bld->base.zero; 1891 } 1892 break; 1893 1894 case TGSI_OPCODE_SGT: 1895 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1896 src0 = emit_fetch( bld, inst, 0, chan_index ); 1897 src1 = emit_fetch( bld, inst, 1, chan_index ); 1898 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1899 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1900 } 1901 break; 1902 1903 case TGSI_OPCODE_SIN: 1904 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 1905 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1906 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1907 dst0[chan_index] = tmp0; 1908 } 1909 break; 1910 1911 case TGSI_OPCODE_SLE: 1912 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1913 src0 = emit_fetch( bld, inst, 0, chan_index ); 1914 src1 = emit_fetch( bld, inst, 1, chan_index ); 1915 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1916 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1917 } 1918 break; 1919 1920 case TGSI_OPCODE_SNE: 1921 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1922 src0 = emit_fetch( bld, inst, 0, chan_index ); 1923 src1 = emit_fetch( bld, inst, 1, chan_index ); 1924 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1925 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1926 } 1927 break; 1928 1929 case TGSI_OPCODE_STR: 1930 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1931 dst0[chan_index] = bld->base.one; 1932 } 1933 break; 1934 1935 case TGSI_OPCODE_TEX: 1936 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); 1937 break; 1938 1939 case TGSI_OPCODE_TXD: 1940 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1941 break; 1942 1943 case TGSI_OPCODE_UP2H: 1944 /* deprecated */ 1945 assert (0); 1946 return FALSE; 1947 break; 1948 1949 case TGSI_OPCODE_UP2US: 1950 /* deprecated */ 1951 assert(0); 1952 return FALSE; 1953 break; 1954 1955 case TGSI_OPCODE_UP4B: 1956 /* deprecated */ 1957 assert(0); 1958 return FALSE; 1959 break; 1960 1961 case TGSI_OPCODE_UP4UB: 1962 /* deprecated */ 1963 assert(0); 1964 return FALSE; 1965 break; 1966 1967 case TGSI_OPCODE_X2D: 1968 /* deprecated? */ 1969 assert(0); 1970 return FALSE; 1971 break; 1972 1973 case TGSI_OPCODE_ARA: 1974 /* deprecated */ 1975 assert(0); 1976 return FALSE; 1977 break; 1978 1979 case TGSI_OPCODE_ARR: 1980 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1981 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1982 tmp0 = lp_build_round(&bld->base, tmp0); 1983 dst0[chan_index] = tmp0; 1984 } 1985 break; 1986 1987 case TGSI_OPCODE_BRA: 1988 /* deprecated */ 1989 assert(0); 1990 return FALSE; 1991 break; 1992 1993 case TGSI_OPCODE_CAL: 1994 lp_exec_mask_call(&bld->exec_mask, 1995 inst->Label.Label, 1996 pc); 1997 1998 break; 1999 2000 case TGSI_OPCODE_RET: 2001 lp_exec_mask_ret(&bld->exec_mask, pc); 2002 break; 2003 2004 case TGSI_OPCODE_END: 2005 if (0) { 2006 /* for debugging */ 2007 emit_dump_temps(bld); 2008 } 2009 *pc = -1; 2010 break; 2011 2012 case TGSI_OPCODE_SSG: 2013 /* TGSI_OPCODE_SGN */ 2014 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2015 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2016 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 2017 } 2018 break; 2019 2020 case TGSI_OPCODE_CMP: 2021 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2022 src0 = emit_fetch( bld, inst, 0, chan_index ); 2023 src1 = emit_fetch( bld, inst, 1, chan_index ); 2024 src2 = emit_fetch( bld, inst, 2, chan_index ); 2025 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 2026 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 2027 } 2028 break; 2029 2030 case TGSI_OPCODE_SCS: 2031 TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_X ) { 2032 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 2033 dst0[TGSI_CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 2034 } 2035 TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Y ) { 2036 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); 2037 dst0[TGSI_CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 2038 } 2039 TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_Z ) { 2040 dst0[TGSI_CHAN_Z] = bld->base.zero; 2041 } 2042 TGSI_IF_IS_DST0_CHANNEL_ENABLED( inst, TGSI_CHAN_W ) { 2043 dst0[TGSI_CHAN_W] = bld->base.one; 2044 } 2045 break; 2046 2047 case TGSI_OPCODE_TXB: 2048 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); 2049 break; 2050 2051 case TGSI_OPCODE_NRM: 2052 /* fall-through */ 2053 case TGSI_OPCODE_NRM4: 2054 /* 3 or 4-component normalization */ 2055 { 2056 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 2057 2058 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) || 2059 TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y) || 2060 TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z) || 2061 (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims == 4)) { 2062 2063 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 2064 2065 /* xmm4 = src.x */ 2066 /* xmm0 = src.x * src.x */ 2067 tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X); 2068 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) { 2069 tmp4 = tmp0; 2070 } 2071 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 2072 2073 /* xmm5 = src.y */ 2074 /* xmm0 = xmm0 + src.y * src.y */ 2075 tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Y); 2076 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) { 2077 tmp5 = tmp1; 2078 } 2079 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2080 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2081 2082 /* xmm6 = src.z */ 2083 /* xmm0 = xmm0 + src.z * src.z */ 2084 tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_Z); 2085 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) { 2086 tmp6 = tmp1; 2087 } 2088 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2089 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2090 2091 if (dims == 4) { 2092 /* xmm7 = src.w */ 2093 /* xmm0 = xmm0 + src.w * src.w */ 2094 tmp1 = emit_fetch(bld, inst, 0, TGSI_CHAN_W); 2095 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W)) { 2096 tmp7 = tmp1; 2097 } 2098 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2099 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2100 } 2101 2102 /* xmm1 = 1 / sqrt(xmm0) */ 2103 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 2104 2105 /* dst.x = xmm1 * src.x */ 2106 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X)) { 2107 dst0[TGSI_CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 2108 } 2109 2110 /* dst.y = xmm1 * src.y */ 2111 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Y)) { 2112 dst0[TGSI_CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 2113 } 2114 2115 /* dst.z = xmm1 * src.z */ 2116 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_Z)) { 2117 dst0[TGSI_CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 2118 } 2119 2120 /* dst.w = xmm1 * src.w */ 2121 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_X) && dims == 4) { 2122 dst0[TGSI_CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 2123 } 2124 } 2125 2126 /* dst.w = 1.0 */ 2127 if (TGSI_IS_DST0_CHANNEL_ENABLED(inst, TGSI_CHAN_W) && dims == 3) { 2128 dst0[TGSI_CHAN_W] = bld->base.one; 2129 } 2130 } 2131 break; 2132 2133 case TGSI_OPCODE_DIV: 2134 /* deprecated */ 2135 assert( 0 ); 2136 return FALSE; 2137 break; 2138 2139 case TGSI_OPCODE_DP2: 2140 tmp0 = emit_fetch( bld, inst, 0, TGSI_CHAN_X ); /* xmm0 = src[0].x */ 2141 tmp1 = emit_fetch( bld, inst, 1, TGSI_CHAN_X ); /* xmm1 = src[1].x */ 2142 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 2143 tmp1 = emit_fetch( bld, inst, 0, TGSI_CHAN_Y ); /* xmm1 = src[0].y */ 2144 tmp2 = emit_fetch( bld, inst, 1, TGSI_CHAN_Y ); /* xmm2 = src[1].y */ 2145 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 2146 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 2147 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2148 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 2149 } 2150 break; 2151 2152 case TGSI_OPCODE_TXL: 2153 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 2154 break; 2155 2156 case TGSI_OPCODE_TXP: 2157 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); 2158 break; 2159 2160 case TGSI_OPCODE_BRK: 2161 lp_exec_break(&bld->exec_mask); 2162 break; 2163 2164 case TGSI_OPCODE_IF: 2165 tmp0 = emit_fetch(bld, inst, 0, TGSI_CHAN_X); 2166 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 2167 tmp0, bld->base.zero); 2168 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 2169 break; 2170 2171 case TGSI_OPCODE_BGNLOOP: 2172 lp_exec_bgnloop(&bld->exec_mask); 2173 break; 2174 2175 case TGSI_OPCODE_BGNSUB: 2176 lp_exec_mask_bgnsub(&bld->exec_mask); 2177 break; 2178 2179 case TGSI_OPCODE_ELSE: 2180 lp_exec_mask_cond_invert(&bld->exec_mask); 2181 break; 2182 2183 case TGSI_OPCODE_ENDIF: 2184 lp_exec_mask_cond_pop(&bld->exec_mask); 2185 break; 2186 2187 case TGSI_OPCODE_ENDLOOP: 2188 lp_exec_endloop(bld->base.gallivm, &bld->exec_mask); 2189 break; 2190 2191 case TGSI_OPCODE_ENDSUB: 2192 lp_exec_mask_endsub(&bld->exec_mask, pc); 2193 break; 2194 2195 case TGSI_OPCODE_PUSHA: 2196 /* deprecated? */ 2197 assert(0); 2198 return FALSE; 2199 break; 2200 2201 case TGSI_OPCODE_POPA: 2202 /* deprecated? */ 2203 assert(0); 2204 return FALSE; 2205 break; 2206 2207 case TGSI_OPCODE_CEIL: 2208 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2209 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2210 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 2211 } 2212 break; 2213 2214 case TGSI_OPCODE_I2F: 2215 /* deprecated? */ 2216 assert(0); 2217 return FALSE; 2218 break; 2219 2220 case TGSI_OPCODE_NOT: 2221 /* deprecated? */ 2222 assert(0); 2223 return FALSE; 2224 break; 2225 2226 case TGSI_OPCODE_TRUNC: 2227 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2228 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2229 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 2230 } 2231 break; 2232 2233 case TGSI_OPCODE_SHL: 2234 /* deprecated? */ 2235 assert(0); 2236 return FALSE; 2237 break; 2238 2239 case TGSI_OPCODE_ISHR: 2240 /* deprecated? */ 2241 assert(0); 2242 return FALSE; 2243 break; 2244 2245 case TGSI_OPCODE_AND: 2246 /* deprecated? */ 2247 assert(0); 2248 return FALSE; 2249 break; 2250 2251 case TGSI_OPCODE_OR: 2252 /* deprecated? */ 2253 assert(0); 2254 return FALSE; 2255 break; 2256 2257 case TGSI_OPCODE_MOD: 2258 /* deprecated? */ 2259 assert(0); 2260 return FALSE; 2261 break; 2262 2263 case TGSI_OPCODE_XOR: 2264 /* deprecated? */ 2265 assert(0); 2266 return FALSE; 2267 break; 2268 2269 case TGSI_OPCODE_SAD: 2270 /* deprecated? */ 2271 assert(0); 2272 return FALSE; 2273 break; 2274 2275 case TGSI_OPCODE_TXF: 2276 /* deprecated? */ 2277 assert(0); 2278 return FALSE; 2279 break; 2280 2281 case TGSI_OPCODE_TXQ: 2282 /* deprecated? */ 2283 assert(0); 2284 return FALSE; 2285 break; 2286 2287 case TGSI_OPCODE_CONT: 2288 lp_exec_continue(&bld->exec_mask); 2289 break; 2290 2291 case TGSI_OPCODE_EMIT: 2292 return FALSE; 2293 break; 2294 2295 case TGSI_OPCODE_ENDPRIM: 2296 return FALSE; 2297 break; 2298 2299 case TGSI_OPCODE_NOP: 2300 break; 2301 2302 default: 2303 return FALSE; 2304 } 2305 2306 if(info->num_dst) { 2307 LLVMValueRef pred[NUM_CHANNELS]; 2308 2309 emit_fetch_predicate( bld, inst, pred ); 2310 2311 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2312 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); 2313 } 2314 } 2315 2316 return TRUE; 2317} 2318 2319 2320void 2321lp_build_tgsi_soa(struct gallivm_state *gallivm, 2322 const struct tgsi_token *tokens, 2323 struct lp_type type, 2324 struct lp_build_mask_context *mask, 2325 LLVMValueRef consts_ptr, 2326 LLVMValueRef system_values_array, 2327 const LLVMValueRef *pos, 2328 const LLVMValueRef (*inputs)[NUM_CHANNELS], 2329 LLVMValueRef (*outputs)[NUM_CHANNELS], 2330 struct lp_build_sampler_soa *sampler, 2331 const struct tgsi_shader_info *info) 2332{ 2333 struct lp_build_tgsi_soa_context bld; 2334 struct tgsi_parse_context parse; 2335 uint num_immediates = 0; 2336 uint num_instructions = 0; 2337 unsigned i; 2338 int pc = 0; 2339 2340 struct lp_type res_type; 2341 2342 assert(type.length <= LP_MAX_VECTOR_LENGTH); 2343 memset(&res_type, 0, sizeof res_type); 2344 res_type.width = type.width; 2345 res_type.length = type.length; 2346 res_type.sign = 1; 2347 2348 /* Setup build context */ 2349 memset(&bld, 0, sizeof bld); 2350 lp_build_context_init(&bld.base, gallivm, type); 2351 lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type)); 2352 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); 2353 bld.mask = mask; 2354 bld.pos = pos; 2355 bld.inputs = inputs; 2356 bld.outputs = outputs; 2357 bld.consts_ptr = consts_ptr; 2358 bld.sampler = sampler; 2359 bld.info = info; 2360 bld.indirect_files = info->indirect_files; 2361 bld.instructions = (struct tgsi_full_instruction *) 2362 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); 2363 bld.max_instructions = LP_MAX_INSTRUCTIONS; 2364 2365 if (!bld.instructions) { 2366 return; 2367 } 2368 2369 lp_exec_mask_init(&bld.exec_mask, &bld.base); 2370 2371 if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 2372 LLVMValueRef array_size = 2373 lp_build_const_int32(gallivm, 2374 info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); 2375 bld.temps_array = lp_build_array_alloca(gallivm, 2376 bld.base.vec_type, array_size, 2377 "temp_array"); 2378 } 2379 2380 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2381 LLVMValueRef array_size = 2382 lp_build_const_int32(gallivm, 2383 info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); 2384 bld.outputs_array = lp_build_array_alloca(gallivm, 2385 bld.base.vec_type, array_size, 2386 "output_array"); 2387 } 2388 2389 /* If we have indirect addressing in inputs we need to copy them into 2390 * our alloca array to be able to iterate over them */ 2391 if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) { 2392 unsigned index, chan; 2393 LLVMTypeRef vec_type = bld.base.vec_type; 2394 LLVMValueRef array_size = 2395 lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4); 2396 bld.inputs_array = lp_build_array_alloca(gallivm, 2397 vec_type, array_size, 2398 "input_array"); 2399 2400 assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1); 2401 2402 for (index = 0; index < info->num_inputs; ++index) { 2403 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2404 LLVMValueRef lindex = 2405 lp_build_const_int32(gallivm, index * 4 + chan); 2406 LLVMValueRef input_ptr = 2407 LLVMBuildGEP(gallivm->builder, bld.inputs_array, 2408 &lindex, 1, ""); 2409 LLVMValueRef value = bld.inputs[index][chan]; 2410 if (value) 2411 LLVMBuildStore(gallivm->builder, value, input_ptr); 2412 } 2413 } 2414 } 2415 2416 bld.system_values_array = system_values_array; 2417 2418 tgsi_parse_init( &parse, tokens ); 2419 2420 while( !tgsi_parse_end_of_tokens( &parse ) ) { 2421 tgsi_parse_token( &parse ); 2422 2423 switch( parse.FullToken.Token.Type ) { 2424 case TGSI_TOKEN_TYPE_DECLARATION: 2425 /* Inputs already interpolated */ 2426 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 2427 break; 2428 2429 case TGSI_TOKEN_TYPE_INSTRUCTION: 2430 { 2431 /* save expanded instruction */ 2432 if (num_instructions == bld.max_instructions) { 2433 struct tgsi_full_instruction *instructions; 2434 instructions = REALLOC(bld.instructions, 2435 bld.max_instructions 2436 * sizeof(struct tgsi_full_instruction), 2437 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 2438 * sizeof(struct tgsi_full_instruction)); 2439 if (!instructions) { 2440 break; 2441 } 2442 bld.instructions = instructions; 2443 bld.max_instructions += LP_MAX_INSTRUCTIONS; 2444 } 2445 2446 memcpy(bld.instructions + num_instructions, 2447 &parse.FullToken.FullInstruction, 2448 sizeof(bld.instructions[0])); 2449 2450 num_instructions++; 2451 } 2452 2453 break; 2454 2455 case TGSI_TOKEN_TYPE_IMMEDIATE: 2456 /* simply copy the immediate values into the next immediates[] slot */ 2457 { 2458 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 2459 assert(size <= 4); 2460 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 2461 for( i = 0; i < size; ++i ) 2462 bld.immediates[num_immediates][i] = 2463 lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float); 2464 for( i = size; i < 4; ++i ) 2465 bld.immediates[num_immediates][i] = bld.base.undef; 2466 num_immediates++; 2467 } 2468 break; 2469 2470 case TGSI_TOKEN_TYPE_PROPERTY: 2471 break; 2472 2473 default: 2474 assert( 0 ); 2475 } 2476 } 2477 2478 while (pc != -1) { 2479 struct tgsi_full_instruction *instr = bld.instructions + pc; 2480 const struct tgsi_opcode_info *opcode_info = 2481 tgsi_get_opcode_info(instr->Instruction.Opcode); 2482 if (!emit_instruction( &bld, instr, opcode_info, &pc )) 2483 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 2484 opcode_info->mnemonic); 2485 } 2486 2487 /* If we have indirect addressing in outputs we need to copy our alloca array 2488 * to the outputs slots specified by the called */ 2489 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2490 unsigned index, chan; 2491 assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1); 2492 for (index = 0; index < info->num_outputs; ++index) { 2493 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2494 bld.outputs[index][chan] = get_output_ptr(&bld, index, chan); 2495 } 2496 } 2497 } 2498 2499 if (0) { 2500 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 2501 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2502 debug_printf("11111111111111111111111111111 \n"); 2503 tgsi_dump(tokens, 0); 2504 lp_debug_dump_value(function); 2505 debug_printf("2222222222222222222222222222 \n"); 2506 } 2507 tgsi_parse_free( &parse ); 2508 2509 if (0) { 2510 LLVMModuleRef module = LLVMGetGlobalParent( 2511 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 2512 LLVMDumpModule(module); 2513 2514 } 2515 2516 FREE( bld.instructions ); 2517} 2518 2519 2520/** 2521 * Build up the system values array out of individual values such as 2522 * the instance ID, front-face, primitive ID, etc. The shader info is 2523 * used to determine which system values are needed and where to put 2524 * them in the system values array. 2525 * 2526 * XXX only instance ID is implemented at this time. 2527 * 2528 * The system values register file is similar to the constants buffer. 2529 * Example declaration: 2530 * DCL SV[0], INSTANCEID 2531 * Example instruction: 2532 * MOVE foo, SV[0].xxxx; 2533 * 2534 * \return LLVM float array (interpreted as float [][4]) 2535 */ 2536LLVMValueRef 2537lp_build_system_values_array(struct gallivm_state *gallivm, 2538 const struct tgsi_shader_info *info, 2539 LLVMValueRef instance_id, 2540 LLVMValueRef facing) 2541{ 2542 LLVMValueRef size = lp_build_const_int32(gallivm, 4 * info->num_system_values); 2543 LLVMTypeRef float_t = LLVMFloatTypeInContext(gallivm->context); 2544 LLVMValueRef array = lp_build_array_alloca(gallivm, float_t, 2545 size, "sysvals_array"); 2546 unsigned i; 2547 2548 for (i = 0; i < info->num_system_values; i++) { 2549 LLVMValueRef index = lp_build_const_int32(gallivm, i * 4); 2550 LLVMValueRef ptr, value = 0; 2551 2552 switch (info->system_value_semantic_name[i]) { 2553 case TGSI_SEMANTIC_INSTANCEID: 2554 /* convert instance ID from int to float */ 2555 value = LLVMBuildSIToFP(gallivm->builder, instance_id, float_t, 2556 "sysval_instanceid"); 2557 break; 2558 case TGSI_SEMANTIC_FACE: 2559 /* fall-through */ 2560 default: 2561 assert(0 && "unexpected semantic in build_system_values_array()"); 2562 } 2563 2564 ptr = LLVMBuildGEP(gallivm->builder, array, &index, 1, ""); 2565 LLVMBuildStore(gallivm->builder, value, ptr); 2566 } 2567 2568 return array; 2569} 2570