lp_bld_tgsi_soa.c revision 31200d0688b67a0d764ad7fe4c2761d0f8d993d8
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_scan.h" 49#include "lp_bld_type.h" 50#include "lp_bld_const.h" 51#include "lp_bld_arit.h" 52#include "lp_bld_bitarit.h" 53#include "lp_bld_gather.h" 54#include "lp_bld_init.h" 55#include "lp_bld_logic.h" 56#include "lp_bld_swizzle.h" 57#include "lp_bld_flow.h" 58#include "lp_bld_quad.h" 59#include "lp_bld_tgsi.h" 60#include "lp_bld_limits.h" 61#include "lp_bld_debug.h" 62#include "lp_bld_printf.h" 63 64 65#define FOR_EACH_CHANNEL( CHAN )\ 66 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 67 68#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 69 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 70 71#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 72 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 73 74#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 75 FOR_EACH_CHANNEL( CHAN )\ 76 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 77 78#define CHAN_X 0 79#define CHAN_Y 1 80#define CHAN_Z 2 81#define CHAN_W 3 82#define NUM_CHANNELS 4 83 84#define LP_MAX_INSTRUCTIONS 256 85 86 87struct lp_exec_mask { 88 struct lp_build_context *bld; 89 90 boolean has_mask; 91 92 LLVMTypeRef int_vec_type; 93 94 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 95 int cond_stack_size; 96 LLVMValueRef cond_mask; 97 98 LLVMBasicBlockRef loop_block; 99 LLVMValueRef cont_mask; 100 LLVMValueRef break_mask; 101 LLVMValueRef break_var; 102 struct { 103 LLVMBasicBlockRef loop_block; 104 LLVMValueRef cont_mask; 105 LLVMValueRef break_mask; 106 LLVMValueRef break_var; 107 } loop_stack[LP_MAX_TGSI_NESTING]; 108 int loop_stack_size; 109 110 LLVMValueRef ret_mask; 111 struct { 112 int pc; 113 LLVMValueRef ret_mask; 114 } call_stack[LP_MAX_TGSI_NESTING]; 115 int call_stack_size; 116 117 LLVMValueRef exec_mask; 118}; 119 120struct lp_build_tgsi_soa_context 121{ 122 struct lp_build_context base; 123 124 /* Builder for vector integer masks and indices */ 125 struct lp_build_context uint_bld; 126 127 /* Builder for scalar elements of shader's data type (float) */ 128 struct lp_build_context elem_bld; 129 130 LLVMValueRef consts_ptr; 131 const LLVMValueRef *pos; 132 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 133 LLVMValueRef (*outputs)[NUM_CHANNELS]; 134 135 const struct lp_build_sampler_soa *sampler; 136 137 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 138 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 139 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 140 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; 141 142 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 143 * set in the indirect_files field. 144 * The temps[] array above is unused then. 145 */ 146 LLVMValueRef temps_array; 147 148 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is 149 * set in the indirect_files field. 150 * The outputs[] array above is unused then. 151 */ 152 LLVMValueRef outputs_array; 153 154 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is 155 * set in the indirect_files field. 156 * The inputs[] array above is unused then. 157 */ 158 LLVMValueRef inputs_array; 159 160 LLVMValueRef system_values_array; 161 162 const struct tgsi_shader_info *info; 163 /** bitmask indicating which register files are accessed indirectly */ 164 unsigned indirect_files; 165 166 struct lp_build_mask_context *mask; 167 struct lp_exec_mask exec_mask; 168 169 struct tgsi_full_instruction *instructions; 170 uint max_instructions; 171}; 172 173static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 174{ 175 mask->bld = bld; 176 mask->has_mask = FALSE; 177 mask->cond_stack_size = 0; 178 mask->loop_stack_size = 0; 179 mask->call_stack_size = 0; 180 181 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); 182 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 183 LLVMConstAllOnes(mask->int_vec_type); 184} 185 186static void lp_exec_mask_update(struct lp_exec_mask *mask) 187{ 188 LLVMBuilderRef builder = mask->bld->gallivm->builder; 189 190 if (mask->loop_stack_size) { 191 /*for loops we need to update the entire mask at runtime */ 192 LLVMValueRef tmp; 193 assert(mask->break_mask); 194 tmp = LLVMBuildAnd(builder, 195 mask->cont_mask, 196 mask->break_mask, 197 "maskcb"); 198 mask->exec_mask = LLVMBuildAnd(builder, 199 mask->cond_mask, 200 tmp, 201 "maskfull"); 202 } else 203 mask->exec_mask = mask->cond_mask; 204 205 if (mask->call_stack_size) { 206 mask->exec_mask = LLVMBuildAnd(builder, 207 mask->exec_mask, 208 mask->ret_mask, 209 "callmask"); 210 } 211 212 mask->has_mask = (mask->cond_stack_size > 0 || 213 mask->loop_stack_size > 0 || 214 mask->call_stack_size > 0); 215} 216 217static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 218 LLVMValueRef val) 219{ 220 LLVMBuilderRef builder = mask->bld->gallivm->builder; 221 222 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 223 if (mask->cond_stack_size == 0) { 224 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 225 } 226 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 227 assert(LLVMTypeOf(val) == mask->int_vec_type); 228 mask->cond_mask = LLVMBuildAnd(builder, 229 mask->cond_mask, 230 val, 231 ""); 232 lp_exec_mask_update(mask); 233} 234 235static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 236{ 237 LLVMBuilderRef builder = mask->bld->gallivm->builder; 238 LLVMValueRef prev_mask; 239 LLVMValueRef inv_mask; 240 241 assert(mask->cond_stack_size); 242 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 243 if (mask->cond_stack_size == 1) { 244 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 245 } 246 247 inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); 248 249 mask->cond_mask = LLVMBuildAnd(builder, 250 inv_mask, 251 prev_mask, ""); 252 lp_exec_mask_update(mask); 253} 254 255static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 256{ 257 assert(mask->cond_stack_size); 258 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 259 lp_exec_mask_update(mask); 260} 261 262static void lp_exec_bgnloop(struct lp_exec_mask *mask) 263{ 264 LLVMBuilderRef builder = mask->bld->gallivm->builder; 265 266 if (mask->loop_stack_size == 0) { 267 assert(mask->loop_block == NULL); 268 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 269 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 270 assert(mask->break_var == NULL); 271 } 272 273 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 274 275 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 276 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 277 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 278 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 279 ++mask->loop_stack_size; 280 281 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); 282 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 283 284 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); 285 LLVMBuildBr(builder, mask->loop_block); 286 LLVMPositionBuilderAtEnd(builder, mask->loop_block); 287 288 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, ""); 289 290 lp_exec_mask_update(mask); 291} 292 293static void lp_exec_break(struct lp_exec_mask *mask) 294{ 295 LLVMBuilderRef builder = mask->bld->gallivm->builder; 296 LLVMValueRef exec_mask = LLVMBuildNot(builder, 297 mask->exec_mask, 298 "break"); 299 300 mask->break_mask = LLVMBuildAnd(builder, 301 mask->break_mask, 302 exec_mask, "break_full"); 303 304 lp_exec_mask_update(mask); 305} 306 307static void lp_exec_continue(struct lp_exec_mask *mask) 308{ 309 LLVMBuilderRef builder = mask->bld->gallivm->builder; 310 LLVMValueRef exec_mask = LLVMBuildNot(builder, 311 mask->exec_mask, 312 ""); 313 314 mask->cont_mask = LLVMBuildAnd(builder, 315 mask->cont_mask, 316 exec_mask, ""); 317 318 lp_exec_mask_update(mask); 319} 320 321 322static void lp_exec_endloop(struct gallivm_state *gallivm, 323 struct lp_exec_mask *mask) 324{ 325 LLVMBuilderRef builder = mask->bld->gallivm->builder; 326 LLVMBasicBlockRef endloop; 327 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, 328 mask->bld->type.width * 329 mask->bld->type.length); 330 LLVMValueRef i1cond; 331 332 assert(mask->break_mask); 333 334 /* 335 * Restore the cont_mask, but don't pop 336 */ 337 assert(mask->loop_stack_size); 338 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 339 lp_exec_mask_update(mask); 340 341 /* 342 * Unlike the continue mask, the break_mask must be preserved across loop 343 * iterations 344 */ 345 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 346 347 /* i1cond = (mask == 0) */ 348 i1cond = LLVMBuildICmp( 349 builder, 350 LLVMIntNE, 351 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), 352 LLVMConstNull(reg_type), ""); 353 354 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); 355 356 LLVMBuildCondBr(builder, 357 i1cond, mask->loop_block, endloop); 358 359 LLVMPositionBuilderAtEnd(builder, endloop); 360 361 assert(mask->loop_stack_size); 362 --mask->loop_stack_size; 363 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 364 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 365 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 366 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 367 368 lp_exec_mask_update(mask); 369} 370 371/* stores val into an address pointed to by dst. 372 * mask->exec_mask is used to figure out which bits of val 373 * should be stored into the address 374 * (0 means don't store this bit, 1 means do store). 375 */ 376static void lp_exec_mask_store(struct lp_exec_mask *mask, 377 LLVMValueRef pred, 378 LLVMValueRef val, 379 LLVMValueRef dst) 380{ 381 LLVMBuilderRef builder = mask->bld->gallivm->builder; 382 383 /* Mix the predicate and execution mask */ 384 if (mask->has_mask) { 385 if (pred) { 386 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 387 } else { 388 pred = mask->exec_mask; 389 } 390 } 391 392 if (pred) { 393 LLVMValueRef real_val, dst_val; 394 395 dst_val = LLVMBuildLoad(builder, dst, ""); 396 real_val = lp_build_select(mask->bld, 397 pred, 398 val, dst_val); 399 400 LLVMBuildStore(builder, real_val, dst); 401 } else 402 LLVMBuildStore(builder, val, dst); 403} 404 405static void lp_exec_mask_call(struct lp_exec_mask *mask, 406 int func, 407 int *pc) 408{ 409 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 410 mask->call_stack[mask->call_stack_size].pc = *pc; 411 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 412 mask->call_stack_size++; 413 *pc = func; 414} 415 416static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 417{ 418 LLVMBuilderRef builder = mask->bld->gallivm->builder; 419 LLVMValueRef exec_mask; 420 421 if (mask->call_stack_size == 0) { 422 /* returning from main() */ 423 *pc = -1; 424 return; 425 } 426 exec_mask = LLVMBuildNot(builder, 427 mask->exec_mask, 428 "ret"); 429 430 mask->ret_mask = LLVMBuildAnd(builder, 431 mask->ret_mask, 432 exec_mask, "ret_full"); 433 434 lp_exec_mask_update(mask); 435} 436 437static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 438{ 439} 440 441static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 442{ 443 assert(mask->call_stack_size); 444 mask->call_stack_size--; 445 *pc = mask->call_stack[mask->call_stack_size].pc; 446 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 447 lp_exec_mask_update(mask); 448} 449 450 451/** 452 * Return pointer to a temporary register channel (src or dest). 453 * Note that indirect addressing cannot be handled here. 454 * \param index which temporary register 455 * \param chan which channel of the temp register. 456 */ 457static LLVMValueRef 458get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 459 unsigned index, 460 unsigned chan) 461{ 462 LLVMBuilderRef builder = bld->base.gallivm->builder; 463 assert(chan < 4); 464 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 465 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan); 466 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, ""); 467 } 468 else { 469 return bld->temps[index][chan]; 470 } 471} 472 473/** 474 * Return pointer to a output register channel (src or dest). 475 * Note that indirect addressing cannot be handled here. 476 * \param index which output register 477 * \param chan which channel of the output register. 478 */ 479static LLVMValueRef 480get_output_ptr(struct lp_build_tgsi_soa_context *bld, 481 unsigned index, 482 unsigned chan) 483{ 484 LLVMBuilderRef builder = bld->base.gallivm->builder; 485 assert(chan < 4); 486 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 487 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, 488 index * 4 + chan); 489 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, ""); 490 } 491 else { 492 return bld->outputs[index][chan]; 493 } 494} 495 496/** 497 * Gather vector. 498 * XXX the lp_build_gather() function should be capable of doing this 499 * with a little work. 500 */ 501static LLVMValueRef 502build_gather(struct lp_build_tgsi_soa_context *bld, 503 LLVMValueRef base_ptr, 504 LLVMValueRef indexes) 505{ 506 LLVMBuilderRef builder = bld->base.gallivm->builder; 507 LLVMValueRef res = bld->base.undef; 508 unsigned i; 509 510 /* 511 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 512 */ 513 for (i = 0; i < bld->base.type.length; i++) { 514 LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i); 515 LLVMValueRef index = LLVMBuildExtractElement(builder, 516 indexes, ii, ""); 517 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, 518 &index, 1, "gather_ptr"); 519 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 520 521 res = LLVMBuildInsertElement(builder, res, scalar, ii, ""); 522 } 523 524 return res; 525} 526 527 528/** 529 * Scatter/store vector. 530 */ 531static void 532emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 533 LLVMValueRef base_ptr, 534 LLVMValueRef indexes, 535 LLVMValueRef values, 536 struct lp_exec_mask *mask, 537 LLVMValueRef pred) 538{ 539 struct gallivm_state *gallivm = bld->base.gallivm; 540 LLVMBuilderRef builder = gallivm->builder; 541 unsigned i; 542 543 /* Mix the predicate and execution mask */ 544 if (mask->has_mask) { 545 if (pred) { 546 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 547 } 548 else { 549 pred = mask->exec_mask; 550 } 551 } 552 553 /* 554 * Loop over elements of index_vec, store scalar value. 555 */ 556 for (i = 0; i < bld->base.type.length; i++) { 557 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 558 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 559 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 560 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 561 LLVMValueRef scalar_pred = pred ? 562 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 563 564 if (0) 565 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", 566 ii, val, index, scalar_ptr); 567 568 if (scalar_pred) { 569 LLVMValueRef real_val, dst_val; 570 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 571 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 572 LLVMBuildStore(builder, real_val, scalar_ptr); 573 } 574 else { 575 LLVMBuildStore(builder, val, scalar_ptr); 576 } 577 } 578} 579 580 581/** 582 * Read the current value of the ADDR register, convert the floats to 583 * ints, add the base index and return the vector of offsets. 584 * The offsets will be used to index into the constant buffer or 585 * temporary register file. 586 */ 587static LLVMValueRef 588get_indirect_index(struct lp_build_tgsi_soa_context *bld, 589 unsigned reg_file, unsigned reg_index, 590 const struct tgsi_src_register *indirect_reg) 591{ 592 LLVMBuilderRef builder = bld->base.gallivm->builder; 593 struct lp_build_context *uint_bld = &bld->uint_bld; 594 /* always use X component of address register */ 595 unsigned swizzle = indirect_reg->SwizzleX; 596 LLVMValueRef base; 597 LLVMValueRef rel; 598 LLVMValueRef max_index; 599 LLVMValueRef index; 600 601 assert(bld->indirect_files & (1 << reg_file)); 602 603 base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index); 604 605 assert(swizzle < 4); 606 rel = LLVMBuildLoad(builder, 607 bld->addr[indirect_reg->Index][swizzle], 608 "load addr reg"); 609 610 /* for indexing we want integers */ 611 rel = LLVMBuildFPToSI(builder, 612 rel, 613 uint_bld->vec_type, ""); 614 615 index = lp_build_add(uint_bld, base, rel); 616 617 max_index = lp_build_const_int_vec(bld->base.gallivm, 618 uint_bld->type, 619 bld->info->file_max[reg_file]); 620 621 assert(!uint_bld->type.sign); 622 index = lp_build_min(uint_bld, index, max_index); 623 624 return index; 625} 626 627 628/** 629 * Register fetch. 630 */ 631static LLVMValueRef 632emit_fetch( 633 struct lp_build_tgsi_soa_context *bld, 634 const struct tgsi_full_instruction *inst, 635 unsigned src_op, 636 const unsigned chan_index ) 637{ 638 struct gallivm_state *gallivm = bld->base.gallivm; 639 LLVMBuilderRef builder = gallivm->builder; 640 struct lp_build_context *uint_bld = &bld->uint_bld; 641 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 642 const unsigned swizzle = 643 tgsi_util_get_full_src_register_swizzle(reg, chan_index); 644 LLVMValueRef res; 645 LLVMValueRef indirect_index = NULL; 646 647 if (swizzle > 3) { 648 assert(0 && "invalid swizzle in emit_fetch()"); 649 return bld->base.undef; 650 } 651 652 if (reg->Register.Indirect) { 653 indirect_index = get_indirect_index(bld, 654 reg->Register.File, 655 reg->Register.Index, 656 ®->Indirect); 657 } else { 658 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 659 } 660 661 switch (reg->Register.File) { 662 case TGSI_FILE_CONSTANT: 663 if (reg->Register.Indirect) { 664 LLVMValueRef swizzle_vec = 665 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); 666 LLVMValueRef index_vec; /* index into the const buffer */ 667 668 /* index_vec = indirect_index * 4 + swizzle */ 669 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 670 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 671 672 /* Gather values from the constant buffer */ 673 res = build_gather(bld, bld->consts_ptr, index_vec); 674 } 675 else { 676 LLVMValueRef index; /* index into the const buffer */ 677 LLVMValueRef scalar, scalar_ptr; 678 679 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle); 680 681 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, 682 &index, 1, ""); 683 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 684 685 res = lp_build_broadcast_scalar(&bld->base, scalar); 686 } 687 break; 688 689 case TGSI_FILE_IMMEDIATE: 690 res = bld->immediates[reg->Register.Index][swizzle]; 691 assert(res); 692 break; 693 694 case TGSI_FILE_INPUT: 695 if (reg->Register.Indirect) { 696 LLVMValueRef swizzle_vec = 697 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); 698 LLVMValueRef length_vec = 699 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); 700 LLVMValueRef index_vec; /* index into the const buffer */ 701 LLVMValueRef inputs_array; 702 LLVMTypeRef float4_ptr_type; 703 704 /* index_vec = (indirect_index * 4 + swizzle) * length */ 705 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 706 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 707 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 708 709 /* cast inputs_array pointer to float* */ 710 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 711 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, 712 float4_ptr_type, ""); 713 714 /* Gather values from the temporary register array */ 715 res = build_gather(bld, inputs_array, index_vec); 716 } else { 717 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 718 LLVMValueRef lindex = lp_build_const_int32(gallivm, 719 reg->Register.Index * 4 + swizzle); 720 LLVMValueRef input_ptr = LLVMBuildGEP(builder, 721 bld->inputs_array, &lindex, 1, ""); 722 res = LLVMBuildLoad(builder, input_ptr, ""); 723 } 724 else { 725 res = bld->inputs[reg->Register.Index][swizzle]; 726 } 727 } 728 assert(res); 729 break; 730 731 case TGSI_FILE_TEMPORARY: 732 if (reg->Register.Indirect) { 733 LLVMValueRef swizzle_vec = 734 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); 735 LLVMValueRef length_vec = 736 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, 737 bld->base.type.length); 738 LLVMValueRef index_vec; /* index into the const buffer */ 739 LLVMValueRef temps_array; 740 LLVMTypeRef float4_ptr_type; 741 742 /* index_vec = (indirect_index * 4 + swizzle) * length */ 743 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 744 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 745 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 746 747 /* cast temps_array pointer to float* */ 748 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0); 749 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 750 float4_ptr_type, ""); 751 752 /* Gather values from the temporary register array */ 753 res = build_gather(bld, temps_array, index_vec); 754 } 755 else { 756 LLVMValueRef temp_ptr; 757 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); 758 res = LLVMBuildLoad(builder, temp_ptr, ""); 759 if (!res) 760 return bld->base.undef; 761 } 762 break; 763 764 case TGSI_FILE_SYSTEM_VALUE: 765 assert(!reg->Register.Indirect); 766 { 767 LLVMValueRef index; /* index into the system value array */ 768 LLVMValueRef scalar, scalar_ptr; 769 770 index = lp_build_const_int32(gallivm, 771 reg->Register.Index * 4 + swizzle); 772 773 scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, 774 &index, 1, ""); 775 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 776 777 res = lp_build_broadcast_scalar(&bld->base, scalar); 778 } 779 break; 780 781 default: 782 assert(0 && "invalid src register in emit_fetch()"); 783 return bld->base.undef; 784 } 785 786 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 787 case TGSI_UTIL_SIGN_CLEAR: 788 res = lp_build_abs( &bld->base, res ); 789 break; 790 791 case TGSI_UTIL_SIGN_SET: 792 res = lp_build_abs( &bld->base, res ); 793 /* fall through */ 794 case TGSI_UTIL_SIGN_TOGGLE: 795 res = lp_build_negate( &bld->base, res ); 796 break; 797 798 case TGSI_UTIL_SIGN_KEEP: 799 break; 800 } 801 802 return res; 803} 804 805 806/** 807 * Register fetch with derivatives. 808 */ 809static void 810emit_fetch_deriv( 811 struct lp_build_tgsi_soa_context *bld, 812 const struct tgsi_full_instruction *inst, 813 unsigned index, 814 const unsigned chan_index, 815 LLVMValueRef *res, 816 LLVMValueRef *ddx, 817 LLVMValueRef *ddy) 818{ 819 LLVMValueRef src; 820 821 src = emit_fetch(bld, inst, index, chan_index); 822 823 if(res) 824 *res = src; 825 826 /* TODO: use interpolation coeffs for inputs */ 827 828 if(ddx) 829 *ddx = lp_build_ddx(&bld->base, src); 830 831 if(ddy) 832 *ddy = lp_build_ddy(&bld->base, src); 833} 834 835 836/** 837 * Predicate. 838 */ 839static void 840emit_fetch_predicate( 841 struct lp_build_tgsi_soa_context *bld, 842 const struct tgsi_full_instruction *inst, 843 LLVMValueRef *pred) 844{ 845 LLVMBuilderRef builder = bld->base.gallivm->builder; 846 unsigned index; 847 unsigned char swizzles[4]; 848 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 849 LLVMValueRef value; 850 unsigned chan; 851 852 if (!inst->Instruction.Predicate) { 853 FOR_EACH_CHANNEL( chan ) { 854 pred[chan] = NULL; 855 } 856 return; 857 } 858 859 swizzles[0] = inst->Predicate.SwizzleX; 860 swizzles[1] = inst->Predicate.SwizzleY; 861 swizzles[2] = inst->Predicate.SwizzleZ; 862 swizzles[3] = inst->Predicate.SwizzleW; 863 864 index = inst->Predicate.Index; 865 assert(index < LP_MAX_TGSI_PREDS); 866 867 FOR_EACH_CHANNEL( chan ) { 868 unsigned swizzle = swizzles[chan]; 869 870 /* 871 * Only fetch the predicate register channels that are actually listed 872 * in the swizzles 873 */ 874 if (!unswizzled[swizzle]) { 875 value = LLVMBuildLoad(builder, 876 bld->preds[index][swizzle], ""); 877 878 /* 879 * Convert the value to an integer mask. 880 * 881 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 882 * is needlessly causing two comparisons due to storing the intermediate 883 * result as float vector instead of an integer mask vector. 884 */ 885 value = lp_build_compare(bld->base.gallivm, 886 bld->base.type, 887 PIPE_FUNC_NOTEQUAL, 888 value, 889 bld->base.zero); 890 if (inst->Predicate.Negate) { 891 value = LLVMBuildNot(builder, value, ""); 892 } 893 894 unswizzled[swizzle] = value; 895 } else { 896 value = unswizzled[swizzle]; 897 } 898 899 pred[chan] = value; 900 } 901} 902 903 904/** 905 * Register store. 906 */ 907static void 908emit_store( 909 struct lp_build_tgsi_soa_context *bld, 910 const struct tgsi_full_instruction *inst, 911 unsigned index, 912 unsigned chan_index, 913 LLVMValueRef pred, 914 LLVMValueRef value) 915{ 916 struct gallivm_state *gallivm = bld->base.gallivm; 917 LLVMBuilderRef builder = gallivm->builder; 918 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 919 struct lp_build_context *uint_bld = &bld->uint_bld; 920 LLVMValueRef indirect_index = NULL; 921 922 switch( inst->Instruction.Saturate ) { 923 case TGSI_SAT_NONE: 924 break; 925 926 case TGSI_SAT_ZERO_ONE: 927 value = lp_build_max(&bld->base, value, bld->base.zero); 928 value = lp_build_min(&bld->base, value, bld->base.one); 929 break; 930 931 case TGSI_SAT_MINUS_PLUS_ONE: 932 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0)); 933 value = lp_build_min(&bld->base, value, bld->base.one); 934 break; 935 936 default: 937 assert(0); 938 } 939 940 if (reg->Register.Indirect) { 941 indirect_index = get_indirect_index(bld, 942 reg->Register.File, 943 reg->Register.Index, 944 ®->Indirect); 945 } else { 946 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 947 } 948 949 switch( reg->Register.File ) { 950 case TGSI_FILE_OUTPUT: 951 if (reg->Register.Indirect) { 952 LLVMValueRef chan_vec = 953 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 954 LLVMValueRef length_vec = 955 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); 956 LLVMValueRef index_vec; /* indexes into the temp registers */ 957 LLVMValueRef outputs_array; 958 LLVMValueRef pixel_offsets; 959 LLVMTypeRef float_ptr_type; 960 int i; 961 962 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 963 pixel_offsets = uint_bld->undef; 964 for (i = 0; i < bld->base.type.length; i++) { 965 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 966 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 967 ii, ii, ""); 968 } 969 970 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 971 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 972 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 973 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 974 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 975 976 float_ptr_type = 977 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 978 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, 979 float_ptr_type, ""); 980 981 /* Scatter store values into temp registers */ 982 emit_mask_scatter(bld, outputs_array, index_vec, value, 983 &bld->exec_mask, pred); 984 } 985 else { 986 LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index, 987 chan_index); 988 lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr); 989 } 990 break; 991 992 case TGSI_FILE_TEMPORARY: 993 if (reg->Register.Indirect) { 994 LLVMValueRef chan_vec = 995 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 996 LLVMValueRef length_vec = 997 lp_build_const_int_vec(gallivm, uint_bld->type, 998 bld->base.type.length); 999 LLVMValueRef index_vec; /* indexes into the temp registers */ 1000 LLVMValueRef temps_array; 1001 LLVMValueRef pixel_offsets; 1002 LLVMTypeRef float_ptr_type; 1003 int i; 1004 1005 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 1006 pixel_offsets = uint_bld->undef; 1007 for (i = 0; i < bld->base.type.length; i++) { 1008 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 1009 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 1010 ii, ii, ""); 1011 } 1012 1013 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 1014 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 1015 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 1016 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 1017 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 1018 1019 float_ptr_type = 1020 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1021 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 1022 float_ptr_type, ""); 1023 1024 /* Scatter store values into temp registers */ 1025 emit_mask_scatter(bld, temps_array, index_vec, value, 1026 &bld->exec_mask, pred); 1027 } 1028 else { 1029 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 1030 chan_index); 1031 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); 1032 } 1033 break; 1034 1035 case TGSI_FILE_ADDRESS: 1036 lp_exec_mask_store(&bld->exec_mask, pred, value, 1037 bld->addr[reg->Register.Index][chan_index]); 1038 break; 1039 1040 case TGSI_FILE_PREDICATE: 1041 lp_exec_mask_store(&bld->exec_mask, pred, value, 1042 bld->preds[reg->Register.Index][chan_index]); 1043 break; 1044 1045 default: 1046 assert( 0 ); 1047 } 1048} 1049 1050 1051/** 1052 * High-level instruction translators. 1053 */ 1054 1055static void 1056emit_tex( struct lp_build_tgsi_soa_context *bld, 1057 const struct tgsi_full_instruction *inst, 1058 enum lp_build_tex_modifier modifier, 1059 LLVMValueRef *texel) 1060{ 1061 LLVMBuilderRef builder = bld->base.gallivm->builder; 1062 unsigned unit; 1063 LLVMValueRef lod_bias, explicit_lod; 1064 LLVMValueRef oow = NULL; 1065 LLVMValueRef coords[3]; 1066 LLVMValueRef ddx[3]; 1067 LLVMValueRef ddy[3]; 1068 unsigned num_coords; 1069 unsigned i; 1070 1071 if (!bld->sampler) { 1072 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 1073 for (i = 0; i < 4; i++) { 1074 texel[i] = bld->base.undef; 1075 } 1076 return; 1077 } 1078 1079 switch (inst->Texture.Texture) { 1080 case TGSI_TEXTURE_1D: 1081 num_coords = 1; 1082 break; 1083 case TGSI_TEXTURE_2D: 1084 case TGSI_TEXTURE_RECT: 1085 num_coords = 2; 1086 break; 1087 case TGSI_TEXTURE_SHADOW1D: 1088 case TGSI_TEXTURE_SHADOW2D: 1089 case TGSI_TEXTURE_SHADOWRECT: 1090 case TGSI_TEXTURE_3D: 1091 case TGSI_TEXTURE_CUBE: 1092 num_coords = 3; 1093 break; 1094 default: 1095 assert(0); 1096 return; 1097 } 1098 1099 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 1100 lod_bias = emit_fetch( bld, inst, 0, 3 ); 1101 explicit_lod = NULL; 1102 } 1103 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 1104 lod_bias = NULL; 1105 explicit_lod = emit_fetch( bld, inst, 0, 3 ); 1106 } 1107 else { 1108 lod_bias = NULL; 1109 explicit_lod = NULL; 1110 } 1111 1112 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 1113 oow = emit_fetch( bld, inst, 0, 3 ); 1114 oow = lp_build_rcp(&bld->base, oow); 1115 } 1116 1117 for (i = 0; i < num_coords; i++) { 1118 coords[i] = emit_fetch( bld, inst, 0, i ); 1119 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 1120 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 1121 } 1122 for (i = num_coords; i < 3; i++) { 1123 coords[i] = bld->base.undef; 1124 } 1125 1126 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 1127 LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0); 1128 for (i = 0; i < num_coords; i++) { 1129 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); 1130 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); 1131 ddx[i] = LLVMBuildExtractElement(builder, src1, index0, ""); 1132 ddy[i] = LLVMBuildExtractElement(builder, src2, index0, ""); 1133 } 1134 unit = inst->Src[3].Register.Index; 1135 } else { 1136 for (i = 0; i < num_coords; i++) { 1137 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); 1138 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); 1139 } 1140 unit = inst->Src[1].Register.Index; 1141 } 1142 for (i = num_coords; i < 3; i++) { 1143 ddx[i] = LLVMGetUndef(bld->base.elem_type); 1144 ddy[i] = LLVMGetUndef(bld->base.elem_type); 1145 } 1146 1147 bld->sampler->emit_fetch_texel(bld->sampler, 1148 bld->base.gallivm, 1149 bld->base.type, 1150 unit, num_coords, coords, 1151 ddx, ddy, 1152 lod_bias, explicit_lod, 1153 texel); 1154} 1155 1156static boolean 1157near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 1158 int pc) 1159{ 1160 int i; 1161 1162 for (i = 0; i < 5; i++) { 1163 unsigned opcode; 1164 1165 if (pc + i >= bld->info->num_instructions) 1166 return TRUE; 1167 1168 opcode = bld->instructions[pc + i].Instruction.Opcode; 1169 1170 if (opcode == TGSI_OPCODE_END) 1171 return TRUE; 1172 1173 if (opcode == TGSI_OPCODE_TEX || 1174 opcode == TGSI_OPCODE_TXP || 1175 opcode == TGSI_OPCODE_TXD || 1176 opcode == TGSI_OPCODE_TXB || 1177 opcode == TGSI_OPCODE_TXL || 1178 opcode == TGSI_OPCODE_TXF || 1179 opcode == TGSI_OPCODE_TXQ || 1180 opcode == TGSI_OPCODE_CAL || 1181 opcode == TGSI_OPCODE_CALLNZ || 1182 opcode == TGSI_OPCODE_IF || 1183 opcode == TGSI_OPCODE_IFC || 1184 opcode == TGSI_OPCODE_BGNLOOP || 1185 opcode == TGSI_OPCODE_SWITCH) 1186 return FALSE; 1187 } 1188 1189 return TRUE; 1190} 1191 1192 1193 1194/** 1195 * Kill fragment if any of the src register values are negative. 1196 */ 1197static void 1198emit_kil( 1199 struct lp_build_tgsi_soa_context *bld, 1200 const struct tgsi_full_instruction *inst, 1201 int pc) 1202{ 1203 LLVMBuilderRef builder = bld->base.gallivm->builder; 1204 const struct tgsi_full_src_register *reg = &inst->Src[0]; 1205 LLVMValueRef terms[NUM_CHANNELS]; 1206 LLVMValueRef mask; 1207 unsigned chan_index; 1208 1209 memset(&terms, 0, sizeof terms); 1210 1211 FOR_EACH_CHANNEL( chan_index ) { 1212 unsigned swizzle; 1213 1214 /* Unswizzle channel */ 1215 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1216 1217 /* Check if the component has not been already tested. */ 1218 assert(swizzle < NUM_CHANNELS); 1219 if( !terms[swizzle] ) 1220 /* TODO: change the comparison operator instead of setting the sign */ 1221 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 1222 } 1223 1224 mask = NULL; 1225 FOR_EACH_CHANNEL( chan_index ) { 1226 if(terms[chan_index]) { 1227 LLVMValueRef chan_mask; 1228 1229 /* 1230 * If term < 0 then mask = 0 else mask = ~0. 1231 */ 1232 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 1233 1234 if(mask) 1235 mask = LLVMBuildAnd(builder, mask, chan_mask, ""); 1236 else 1237 mask = chan_mask; 1238 } 1239 } 1240 1241 if(mask) { 1242 lp_build_mask_update(bld->mask, mask); 1243 1244 if (!near_end_of_shader(bld, pc)) 1245 lp_build_mask_check(bld->mask); 1246 } 1247} 1248 1249 1250/** 1251 * Predicated fragment kill. 1252 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 1253 * The only predication is the execution mask which will apply if 1254 * we're inside a loop or conditional. 1255 */ 1256static void 1257emit_kilp(struct lp_build_tgsi_soa_context *bld, 1258 const struct tgsi_full_instruction *inst, 1259 int pc) 1260{ 1261 LLVMBuilderRef builder = bld->base.gallivm->builder; 1262 LLVMValueRef mask; 1263 1264 /* For those channels which are "alive", disable fragment shader 1265 * execution. 1266 */ 1267 if (bld->exec_mask.has_mask) { 1268 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 1269 } 1270 else { 1271 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type); 1272 mask = zero; 1273 } 1274 1275 lp_build_mask_update(bld->mask, mask); 1276 1277 if (!near_end_of_shader(bld, pc)) 1278 lp_build_mask_check(bld->mask); 1279} 1280 1281 1282/** 1283 * Emit code which will dump the value of all the temporary registers 1284 * to stdout. 1285 */ 1286static void 1287emit_dump_temps(struct lp_build_tgsi_soa_context *bld) 1288{ 1289 struct gallivm_state *gallivm = bld->base.gallivm; 1290 LLVMBuilderRef builder = gallivm->builder; 1291 LLVMValueRef temp_ptr; 1292 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); 1293 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1); 1294 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); 1295 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); 1296 int index; 1297 int n = bld->info->file_max[TGSI_FILE_TEMPORARY]; 1298 1299 for (index = 0; index < n; index++) { 1300 LLVMValueRef idx = lp_build_const_int32(gallivm, index); 1301 LLVMValueRef v[4][4], res; 1302 int chan; 1303 1304 lp_build_printf(gallivm, "TEMP[%d]:\n", idx); 1305 1306 for (chan = 0; chan < 4; chan++) { 1307 temp_ptr = get_temp_ptr(bld, index, chan); 1308 res = LLVMBuildLoad(builder, temp_ptr, ""); 1309 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); 1310 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); 1311 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); 1312 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); 1313 } 1314 1315 lp_build_printf(gallivm, " X: %f %f %f %f\n", 1316 v[0][0], v[0][1], v[0][2], v[0][3]); 1317 lp_build_printf(gallivm, " Y: %f %f %f %f\n", 1318 v[1][0], v[1][1], v[1][2], v[1][3]); 1319 lp_build_printf(gallivm, " Z: %f %f %f %f\n", 1320 v[2][0], v[2][1], v[2][2], v[2][3]); 1321 lp_build_printf(gallivm, " W: %f %f %f %f\n", 1322 v[3][0], v[3][1], v[3][2], v[3][3]); 1323 } 1324} 1325 1326 1327 1328static void 1329emit_declaration( 1330 struct lp_build_tgsi_soa_context *bld, 1331 const struct tgsi_full_declaration *decl) 1332{ 1333 struct gallivm_state *gallivm = bld->base.gallivm; 1334 LLVMTypeRef vec_type = bld->base.vec_type; 1335 const unsigned first = decl->Range.First; 1336 const unsigned last = decl->Range.Last; 1337 unsigned idx, i; 1338 1339 for (idx = first; idx <= last; ++idx) { 1340 assert(last <= bld->info->file_max[decl->Declaration.File]); 1341 switch (decl->Declaration.File) { 1342 case TGSI_FILE_TEMPORARY: 1343 assert(idx < LP_MAX_TGSI_TEMPS); 1344 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 1345 for (i = 0; i < NUM_CHANNELS; i++) 1346 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); 1347 } 1348 break; 1349 1350 case TGSI_FILE_OUTPUT: 1351 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 1352 for (i = 0; i < NUM_CHANNELS; i++) 1353 bld->outputs[idx][i] = lp_build_alloca(gallivm, 1354 vec_type, "output"); 1355 } 1356 break; 1357 1358 case TGSI_FILE_ADDRESS: 1359 assert(idx < LP_MAX_TGSI_ADDRS); 1360 for (i = 0; i < NUM_CHANNELS; i++) 1361 bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr"); 1362 break; 1363 1364 case TGSI_FILE_PREDICATE: 1365 assert(idx < LP_MAX_TGSI_PREDS); 1366 for (i = 0; i < NUM_CHANNELS; i++) 1367 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, 1368 "predicate"); 1369 break; 1370 1371 default: 1372 /* don't need to declare other vars */ 1373 break; 1374 } 1375 } 1376} 1377 1378 1379/** 1380 * Emit LLVM for one TGSI instruction. 1381 * \param return TRUE for success, FALSE otherwise 1382 */ 1383static boolean 1384emit_instruction( 1385 struct lp_build_tgsi_soa_context *bld, 1386 const struct tgsi_full_instruction *inst, 1387 const struct tgsi_opcode_info *info, 1388 int *pc) 1389{ 1390 unsigned chan_index; 1391 LLVMValueRef src0, src1, src2; 1392 LLVMValueRef tmp0, tmp1, tmp2; 1393 LLVMValueRef tmp3 = NULL; 1394 LLVMValueRef tmp4 = NULL; 1395 LLVMValueRef tmp5 = NULL; 1396 LLVMValueRef tmp6 = NULL; 1397 LLVMValueRef tmp7 = NULL; 1398 LLVMValueRef res; 1399 LLVMValueRef dst0[NUM_CHANNELS]; 1400 1401 /* 1402 * Stores and write masks are handled in a general fashion after the long 1403 * instruction opcode switch statement. 1404 * 1405 * Although not stricitly necessary, we avoid generating instructions for 1406 * channels which won't be stored, in cases where's that easy. For some 1407 * complex instructions, like texture sampling, it is more convenient to 1408 * assume a full writemask and then let LLVM optimization passes eliminate 1409 * redundant code. 1410 */ 1411 1412 (*pc)++; 1413 1414 assert(info->num_dst <= 1); 1415 if (info->num_dst) { 1416 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1417 dst0[chan_index] = bld->base.undef; 1418 } 1419 } 1420 1421 switch (inst->Instruction.Opcode) { 1422 case TGSI_OPCODE_ARL: 1423 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1424 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1425 tmp0 = lp_build_floor(&bld->base, tmp0); 1426 dst0[chan_index] = tmp0; 1427 } 1428 break; 1429 1430 case TGSI_OPCODE_MOV: 1431 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1432 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 1433 } 1434 break; 1435 1436 case TGSI_OPCODE_LIT: 1437 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 1438 dst0[CHAN_X] = bld->base.one; 1439 } 1440 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1441 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1442 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 1443 } 1444 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1445 /* XMM[1] = SrcReg[0].yyyy */ 1446 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1447 /* XMM[1] = max(XMM[1], 0) */ 1448 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 1449 /* XMM[2] = SrcReg[0].wwww */ 1450 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 1451 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 1452 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1453 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 1454 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 1455 } 1456 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 1457 dst0[CHAN_W] = bld->base.one; 1458 } 1459 break; 1460 1461 case TGSI_OPCODE_RCP: 1462 /* TGSI_OPCODE_RECIP */ 1463 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1464 res = lp_build_rcp(&bld->base, src0); 1465 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1466 dst0[chan_index] = res; 1467 } 1468 break; 1469 1470 case TGSI_OPCODE_RSQ: 1471 /* TGSI_OPCODE_RECIPSQRT */ 1472 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1473 src0 = lp_build_abs(&bld->base, src0); 1474 res = lp_build_rsqrt(&bld->base, src0); 1475 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1476 dst0[chan_index] = res; 1477 } 1478 break; 1479 1480 case TGSI_OPCODE_EXP: 1481 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1482 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1483 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1484 LLVMValueRef *p_exp2_int_part = NULL; 1485 LLVMValueRef *p_frac_part = NULL; 1486 LLVMValueRef *p_exp2 = NULL; 1487 1488 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1489 1490 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1491 p_exp2_int_part = &tmp0; 1492 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1493 p_frac_part = &tmp1; 1494 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1495 p_exp2 = &tmp2; 1496 1497 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 1498 1499 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1500 dst0[CHAN_X] = tmp0; 1501 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1502 dst0[CHAN_Y] = tmp1; 1503 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1504 dst0[CHAN_Z] = tmp2; 1505 } 1506 /* dst.w = 1.0 */ 1507 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1508 dst0[CHAN_W] = bld->base.one; 1509 } 1510 break; 1511 1512 case TGSI_OPCODE_LOG: 1513 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1514 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1515 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1516 LLVMValueRef *p_floor_log2 = NULL; 1517 LLVMValueRef *p_exp = NULL; 1518 LLVMValueRef *p_log2 = NULL; 1519 1520 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1521 src0 = lp_build_abs( &bld->base, src0 ); 1522 1523 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1524 p_floor_log2 = &tmp0; 1525 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1526 p_exp = &tmp1; 1527 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1528 p_log2 = &tmp2; 1529 1530 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 1531 1532 /* dst.x = floor(lg2(abs(src.x))) */ 1533 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1534 dst0[CHAN_X] = tmp0; 1535 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 1536 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 1537 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 1538 } 1539 /* dst.z = lg2(abs(src.x)) */ 1540 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1541 dst0[CHAN_Z] = tmp2; 1542 } 1543 /* dst.w = 1.0 */ 1544 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1545 dst0[CHAN_W] = bld->base.one; 1546 } 1547 break; 1548 1549 case TGSI_OPCODE_MUL: 1550 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1551 src0 = emit_fetch( bld, inst, 0, chan_index ); 1552 src1 = emit_fetch( bld, inst, 1, chan_index ); 1553 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 1554 } 1555 break; 1556 1557 case TGSI_OPCODE_ADD: 1558 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1559 src0 = emit_fetch( bld, inst, 0, chan_index ); 1560 src1 = emit_fetch( bld, inst, 1, chan_index ); 1561 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 1562 } 1563 break; 1564 1565 case TGSI_OPCODE_DP3: 1566 /* TGSI_OPCODE_DOT3 */ 1567 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1568 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1569 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1570 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1571 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1572 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1573 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1574 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1575 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1576 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1577 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1578 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1579 dst0[chan_index] = tmp0; 1580 } 1581 break; 1582 1583 case TGSI_OPCODE_DP4: 1584 /* TGSI_OPCODE_DOT4 */ 1585 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1586 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1587 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1588 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1589 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1590 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1591 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1592 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1593 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1594 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1595 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1596 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1597 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1598 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1599 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1600 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1601 dst0[chan_index] = tmp0; 1602 } 1603 break; 1604 1605 case TGSI_OPCODE_DST: 1606 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1607 dst0[CHAN_X] = bld->base.one; 1608 } 1609 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1610 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1611 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1612 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1613 } 1614 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1615 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1616 } 1617 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1618 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1619 } 1620 break; 1621 1622 case TGSI_OPCODE_MIN: 1623 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1624 src0 = emit_fetch( bld, inst, 0, chan_index ); 1625 src1 = emit_fetch( bld, inst, 1, chan_index ); 1626 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1627 } 1628 break; 1629 1630 case TGSI_OPCODE_MAX: 1631 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1632 src0 = emit_fetch( bld, inst, 0, chan_index ); 1633 src1 = emit_fetch( bld, inst, 1, chan_index ); 1634 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1635 } 1636 break; 1637 1638 case TGSI_OPCODE_SLT: 1639 /* TGSI_OPCODE_SETLT */ 1640 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1641 src0 = emit_fetch( bld, inst, 0, chan_index ); 1642 src1 = emit_fetch( bld, inst, 1, chan_index ); 1643 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1644 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1645 } 1646 break; 1647 1648 case TGSI_OPCODE_SGE: 1649 /* TGSI_OPCODE_SETGE */ 1650 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1651 src0 = emit_fetch( bld, inst, 0, chan_index ); 1652 src1 = emit_fetch( bld, inst, 1, chan_index ); 1653 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1654 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1655 } 1656 break; 1657 1658 case TGSI_OPCODE_MAD: 1659 /* TGSI_OPCODE_MADD */ 1660 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1661 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1662 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1663 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1664 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1665 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1666 dst0[chan_index] = tmp0; 1667 } 1668 break; 1669 1670 case TGSI_OPCODE_SUB: 1671 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1672 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1673 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1674 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1675 } 1676 break; 1677 1678 case TGSI_OPCODE_LRP: 1679 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1680 src0 = emit_fetch( bld, inst, 0, chan_index ); 1681 src1 = emit_fetch( bld, inst, 1, chan_index ); 1682 src2 = emit_fetch( bld, inst, 2, chan_index ); 1683 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1684 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1685 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1686 } 1687 break; 1688 1689 case TGSI_OPCODE_CND: 1690 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1691 src0 = emit_fetch( bld, inst, 0, chan_index ); 1692 src1 = emit_fetch( bld, inst, 1, chan_index ); 1693 src2 = emit_fetch( bld, inst, 2, chan_index ); 1694 tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5); 1695 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1696 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1697 } 1698 break; 1699 1700 case TGSI_OPCODE_DP2A: 1701 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1702 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1703 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1704 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1705 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1706 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1707 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1708 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1709 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1710 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1711 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1712 } 1713 break; 1714 1715 case TGSI_OPCODE_FRC: 1716 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1717 src0 = emit_fetch( bld, inst, 0, chan_index ); 1718 tmp0 = lp_build_floor(&bld->base, src0); 1719 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1720 dst0[chan_index] = tmp0; 1721 } 1722 break; 1723 1724 case TGSI_OPCODE_CLAMP: 1725 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1726 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1727 src1 = emit_fetch( bld, inst, 1, chan_index ); 1728 src2 = emit_fetch( bld, inst, 2, chan_index ); 1729 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1730 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1731 dst0[chan_index] = tmp0; 1732 } 1733 break; 1734 1735 case TGSI_OPCODE_FLR: 1736 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1737 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1738 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1739 } 1740 break; 1741 1742 case TGSI_OPCODE_ROUND: 1743 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1744 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1745 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1746 } 1747 break; 1748 1749 case TGSI_OPCODE_EX2: { 1750 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1751 tmp0 = lp_build_exp2( &bld->base, tmp0); 1752 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1753 dst0[chan_index] = tmp0; 1754 } 1755 break; 1756 } 1757 1758 case TGSI_OPCODE_LG2: 1759 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1760 tmp0 = lp_build_log2( &bld->base, tmp0); 1761 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1762 dst0[chan_index] = tmp0; 1763 } 1764 break; 1765 1766 case TGSI_OPCODE_POW: 1767 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1768 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1769 res = lp_build_pow( &bld->base, src0, src1 ); 1770 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1771 dst0[chan_index] = res; 1772 } 1773 break; 1774 1775 case TGSI_OPCODE_XPD: 1776 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1777 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1778 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1779 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1780 } 1781 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1782 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1783 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1784 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1785 } 1786 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1787 tmp2 = tmp0; 1788 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1789 tmp5 = tmp3; 1790 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1791 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1792 dst0[CHAN_X] = tmp2; 1793 } 1794 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1795 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1796 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1797 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1798 } 1799 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1800 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1801 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1802 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1803 dst0[CHAN_Y] = tmp3; 1804 } 1805 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1806 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1807 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1808 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1809 dst0[CHAN_Z] = tmp5; 1810 } 1811 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1812 dst0[CHAN_W] = bld->base.one; 1813 } 1814 break; 1815 1816 case TGSI_OPCODE_ABS: 1817 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1818 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1819 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1820 } 1821 break; 1822 1823 case TGSI_OPCODE_RCC: 1824 /* deprecated? */ 1825 assert(0); 1826 return FALSE; 1827 1828 case TGSI_OPCODE_DPH: 1829 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1830 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1831 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1832 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1833 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1834 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1835 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1836 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1837 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1838 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1839 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1840 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1841 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1842 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1843 dst0[chan_index] = tmp0; 1844 } 1845 break; 1846 1847 case TGSI_OPCODE_COS: 1848 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1849 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1850 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1851 dst0[chan_index] = tmp0; 1852 } 1853 break; 1854 1855 case TGSI_OPCODE_DDX: 1856 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1857 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1858 } 1859 break; 1860 1861 case TGSI_OPCODE_DDY: 1862 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1863 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1864 } 1865 break; 1866 1867 case TGSI_OPCODE_KILP: 1868 /* predicated kill */ 1869 emit_kilp( bld, inst, (*pc)-1 ); 1870 break; 1871 1872 case TGSI_OPCODE_KIL: 1873 /* conditional kill */ 1874 emit_kil( bld, inst, (*pc)-1 ); 1875 break; 1876 1877 case TGSI_OPCODE_PK2H: 1878 return FALSE; 1879 break; 1880 1881 case TGSI_OPCODE_PK2US: 1882 return FALSE; 1883 break; 1884 1885 case TGSI_OPCODE_PK4B: 1886 return FALSE; 1887 break; 1888 1889 case TGSI_OPCODE_PK4UB: 1890 return FALSE; 1891 break; 1892 1893 case TGSI_OPCODE_RFL: 1894 return FALSE; 1895 break; 1896 1897 case TGSI_OPCODE_SEQ: 1898 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1899 src0 = emit_fetch( bld, inst, 0, chan_index ); 1900 src1 = emit_fetch( bld, inst, 1, chan_index ); 1901 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1902 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1903 } 1904 break; 1905 1906 case TGSI_OPCODE_SFL: 1907 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1908 dst0[chan_index] = bld->base.zero; 1909 } 1910 break; 1911 1912 case TGSI_OPCODE_SGT: 1913 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1914 src0 = emit_fetch( bld, inst, 0, chan_index ); 1915 src1 = emit_fetch( bld, inst, 1, chan_index ); 1916 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1917 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1918 } 1919 break; 1920 1921 case TGSI_OPCODE_SIN: 1922 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1923 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1924 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1925 dst0[chan_index] = tmp0; 1926 } 1927 break; 1928 1929 case TGSI_OPCODE_SLE: 1930 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1931 src0 = emit_fetch( bld, inst, 0, chan_index ); 1932 src1 = emit_fetch( bld, inst, 1, chan_index ); 1933 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1934 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1935 } 1936 break; 1937 1938 case TGSI_OPCODE_SNE: 1939 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1940 src0 = emit_fetch( bld, inst, 0, chan_index ); 1941 src1 = emit_fetch( bld, inst, 1, chan_index ); 1942 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1943 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1944 } 1945 break; 1946 1947 case TGSI_OPCODE_STR: 1948 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1949 dst0[chan_index] = bld->base.one; 1950 } 1951 break; 1952 1953 case TGSI_OPCODE_TEX: 1954 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); 1955 break; 1956 1957 case TGSI_OPCODE_TXD: 1958 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1959 break; 1960 1961 case TGSI_OPCODE_UP2H: 1962 /* deprecated */ 1963 assert (0); 1964 return FALSE; 1965 break; 1966 1967 case TGSI_OPCODE_UP2US: 1968 /* deprecated */ 1969 assert(0); 1970 return FALSE; 1971 break; 1972 1973 case TGSI_OPCODE_UP4B: 1974 /* deprecated */ 1975 assert(0); 1976 return FALSE; 1977 break; 1978 1979 case TGSI_OPCODE_UP4UB: 1980 /* deprecated */ 1981 assert(0); 1982 return FALSE; 1983 break; 1984 1985 case TGSI_OPCODE_X2D: 1986 /* deprecated? */ 1987 assert(0); 1988 return FALSE; 1989 break; 1990 1991 case TGSI_OPCODE_ARA: 1992 /* deprecated */ 1993 assert(0); 1994 return FALSE; 1995 break; 1996 1997 case TGSI_OPCODE_ARR: 1998 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1999 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2000 tmp0 = lp_build_round(&bld->base, tmp0); 2001 dst0[chan_index] = tmp0; 2002 } 2003 break; 2004 2005 case TGSI_OPCODE_BRA: 2006 /* deprecated */ 2007 assert(0); 2008 return FALSE; 2009 break; 2010 2011 case TGSI_OPCODE_CAL: 2012 lp_exec_mask_call(&bld->exec_mask, 2013 inst->Label.Label, 2014 pc); 2015 2016 break; 2017 2018 case TGSI_OPCODE_RET: 2019 lp_exec_mask_ret(&bld->exec_mask, pc); 2020 break; 2021 2022 case TGSI_OPCODE_END: 2023 if (0) { 2024 /* for debugging */ 2025 emit_dump_temps(bld); 2026 } 2027 *pc = -1; 2028 break; 2029 2030 case TGSI_OPCODE_SSG: 2031 /* TGSI_OPCODE_SGN */ 2032 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2033 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2034 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 2035 } 2036 break; 2037 2038 case TGSI_OPCODE_CMP: 2039 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2040 src0 = emit_fetch( bld, inst, 0, chan_index ); 2041 src1 = emit_fetch( bld, inst, 1, chan_index ); 2042 src2 = emit_fetch( bld, inst, 2, chan_index ); 2043 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 2044 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 2045 } 2046 break; 2047 2048 case TGSI_OPCODE_SCS: 2049 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 2050 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 2051 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 2052 } 2053 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 2054 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 2055 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 2056 } 2057 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 2058 dst0[CHAN_Z] = bld->base.zero; 2059 } 2060 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 2061 dst0[CHAN_W] = bld->base.one; 2062 } 2063 break; 2064 2065 case TGSI_OPCODE_TXB: 2066 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); 2067 break; 2068 2069 case TGSI_OPCODE_NRM: 2070 /* fall-through */ 2071 case TGSI_OPCODE_NRM4: 2072 /* 3 or 4-component normalization */ 2073 { 2074 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 2075 2076 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 2077 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 2078 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 2079 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 2080 2081 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 2082 2083 /* xmm4 = src.x */ 2084 /* xmm0 = src.x * src.x */ 2085 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 2086 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 2087 tmp4 = tmp0; 2088 } 2089 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 2090 2091 /* xmm5 = src.y */ 2092 /* xmm0 = xmm0 + src.y * src.y */ 2093 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 2094 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 2095 tmp5 = tmp1; 2096 } 2097 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2098 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2099 2100 /* xmm6 = src.z */ 2101 /* xmm0 = xmm0 + src.z * src.z */ 2102 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 2103 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 2104 tmp6 = tmp1; 2105 } 2106 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2107 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2108 2109 if (dims == 4) { 2110 /* xmm7 = src.w */ 2111 /* xmm0 = xmm0 + src.w * src.w */ 2112 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 2113 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 2114 tmp7 = tmp1; 2115 } 2116 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2117 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2118 } 2119 2120 /* xmm1 = 1 / sqrt(xmm0) */ 2121 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 2122 2123 /* dst.x = xmm1 * src.x */ 2124 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 2125 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 2126 } 2127 2128 /* dst.y = xmm1 * src.y */ 2129 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 2130 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 2131 } 2132 2133 /* dst.z = xmm1 * src.z */ 2134 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 2135 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 2136 } 2137 2138 /* dst.w = xmm1 * src.w */ 2139 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 2140 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 2141 } 2142 } 2143 2144 /* dst.w = 1.0 */ 2145 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 2146 dst0[CHAN_W] = bld->base.one; 2147 } 2148 } 2149 break; 2150 2151 case TGSI_OPCODE_DIV: 2152 /* deprecated */ 2153 assert( 0 ); 2154 return FALSE; 2155 break; 2156 2157 case TGSI_OPCODE_DP2: 2158 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 2159 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 2160 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 2161 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 2162 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 2163 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 2164 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 2165 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2166 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 2167 } 2168 break; 2169 2170 case TGSI_OPCODE_TXL: 2171 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 2172 break; 2173 2174 case TGSI_OPCODE_TXP: 2175 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); 2176 break; 2177 2178 case TGSI_OPCODE_BRK: 2179 lp_exec_break(&bld->exec_mask); 2180 break; 2181 2182 case TGSI_OPCODE_IF: 2183 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 2184 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 2185 tmp0, bld->base.zero); 2186 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 2187 break; 2188 2189 case TGSI_OPCODE_BGNLOOP: 2190 lp_exec_bgnloop(&bld->exec_mask); 2191 break; 2192 2193 case TGSI_OPCODE_BGNSUB: 2194 lp_exec_mask_bgnsub(&bld->exec_mask); 2195 break; 2196 2197 case TGSI_OPCODE_ELSE: 2198 lp_exec_mask_cond_invert(&bld->exec_mask); 2199 break; 2200 2201 case TGSI_OPCODE_ENDIF: 2202 lp_exec_mask_cond_pop(&bld->exec_mask); 2203 break; 2204 2205 case TGSI_OPCODE_ENDLOOP: 2206 lp_exec_endloop(bld->base.gallivm, &bld->exec_mask); 2207 break; 2208 2209 case TGSI_OPCODE_ENDSUB: 2210 lp_exec_mask_endsub(&bld->exec_mask, pc); 2211 break; 2212 2213 case TGSI_OPCODE_PUSHA: 2214 /* deprecated? */ 2215 assert(0); 2216 return FALSE; 2217 break; 2218 2219 case TGSI_OPCODE_POPA: 2220 /* deprecated? */ 2221 assert(0); 2222 return FALSE; 2223 break; 2224 2225 case TGSI_OPCODE_CEIL: 2226 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2227 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2228 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 2229 } 2230 break; 2231 2232 case TGSI_OPCODE_I2F: 2233 /* deprecated? */ 2234 assert(0); 2235 return FALSE; 2236 break; 2237 2238 case TGSI_OPCODE_NOT: 2239 /* deprecated? */ 2240 assert(0); 2241 return FALSE; 2242 break; 2243 2244 case TGSI_OPCODE_TRUNC: 2245 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2246 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2247 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 2248 } 2249 break; 2250 2251 case TGSI_OPCODE_SHL: 2252 /* deprecated? */ 2253 assert(0); 2254 return FALSE; 2255 break; 2256 2257 case TGSI_OPCODE_ISHR: 2258 /* deprecated? */ 2259 assert(0); 2260 return FALSE; 2261 break; 2262 2263 case TGSI_OPCODE_AND: 2264 /* deprecated? */ 2265 assert(0); 2266 return FALSE; 2267 break; 2268 2269 case TGSI_OPCODE_OR: 2270 /* deprecated? */ 2271 assert(0); 2272 return FALSE; 2273 break; 2274 2275 case TGSI_OPCODE_MOD: 2276 /* deprecated? */ 2277 assert(0); 2278 return FALSE; 2279 break; 2280 2281 case TGSI_OPCODE_XOR: 2282 /* deprecated? */ 2283 assert(0); 2284 return FALSE; 2285 break; 2286 2287 case TGSI_OPCODE_SAD: 2288 /* deprecated? */ 2289 assert(0); 2290 return FALSE; 2291 break; 2292 2293 case TGSI_OPCODE_TXF: 2294 /* deprecated? */ 2295 assert(0); 2296 return FALSE; 2297 break; 2298 2299 case TGSI_OPCODE_TXQ: 2300 /* deprecated? */ 2301 assert(0); 2302 return FALSE; 2303 break; 2304 2305 case TGSI_OPCODE_CONT: 2306 lp_exec_continue(&bld->exec_mask); 2307 break; 2308 2309 case TGSI_OPCODE_EMIT: 2310 return FALSE; 2311 break; 2312 2313 case TGSI_OPCODE_ENDPRIM: 2314 return FALSE; 2315 break; 2316 2317 case TGSI_OPCODE_NOP: 2318 break; 2319 2320 default: 2321 return FALSE; 2322 } 2323 2324 if(info->num_dst) { 2325 LLVMValueRef pred[NUM_CHANNELS]; 2326 2327 emit_fetch_predicate( bld, inst, pred ); 2328 2329 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2330 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); 2331 } 2332 } 2333 2334 return TRUE; 2335} 2336 2337 2338void 2339lp_build_tgsi_soa(struct gallivm_state *gallivm, 2340 const struct tgsi_token *tokens, 2341 struct lp_type type, 2342 struct lp_build_mask_context *mask, 2343 LLVMValueRef consts_ptr, 2344 LLVMValueRef system_values_array, 2345 const LLVMValueRef *pos, 2346 const LLVMValueRef (*inputs)[NUM_CHANNELS], 2347 LLVMValueRef (*outputs)[NUM_CHANNELS], 2348 struct lp_build_sampler_soa *sampler, 2349 const struct tgsi_shader_info *info) 2350{ 2351 struct lp_build_tgsi_soa_context bld; 2352 struct tgsi_parse_context parse; 2353 uint num_immediates = 0; 2354 uint num_instructions = 0; 2355 unsigned i; 2356 int pc = 0; 2357 2358 struct lp_type res_type; 2359 2360 assert(type.length <= LP_MAX_VECTOR_LENGTH); 2361 memset(&res_type, 0, sizeof res_type); 2362 res_type.width = type.width; 2363 res_type.length = type.length; 2364 res_type.sign = 1; 2365 2366 /* Setup build context */ 2367 memset(&bld, 0, sizeof bld); 2368 lp_build_context_init(&bld.base, gallivm, type); 2369 lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type)); 2370 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); 2371 bld.mask = mask; 2372 bld.pos = pos; 2373 bld.inputs = inputs; 2374 bld.outputs = outputs; 2375 bld.consts_ptr = consts_ptr; 2376 bld.sampler = sampler; 2377 bld.info = info; 2378 bld.indirect_files = info->indirect_files; 2379 bld.instructions = (struct tgsi_full_instruction *) 2380 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); 2381 bld.max_instructions = LP_MAX_INSTRUCTIONS; 2382 2383 if (!bld.instructions) { 2384 return; 2385 } 2386 2387 lp_exec_mask_init(&bld.exec_mask, &bld.base); 2388 2389 if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 2390 LLVMValueRef array_size = 2391 lp_build_const_int32(gallivm, 2392 info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); 2393 bld.temps_array = lp_build_array_alloca(gallivm, 2394 bld.base.vec_type, array_size, 2395 "temp_array"); 2396 } 2397 2398 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2399 LLVMValueRef array_size = 2400 lp_build_const_int32(gallivm, 2401 info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); 2402 bld.outputs_array = lp_build_array_alloca(gallivm, 2403 bld.base.vec_type, array_size, 2404 "output_array"); 2405 } 2406 2407 /* If we have indirect addressing in inputs we need to copy them into 2408 * our alloca array to be able to iterate over them */ 2409 if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) { 2410 unsigned index, chan; 2411 LLVMTypeRef vec_type = bld.base.vec_type; 2412 LLVMValueRef array_size = 2413 lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4); 2414 bld.inputs_array = lp_build_array_alloca(gallivm, 2415 vec_type, array_size, 2416 "input_array"); 2417 2418 assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1); 2419 2420 for (index = 0; index < info->num_inputs; ++index) { 2421 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2422 LLVMValueRef lindex = 2423 lp_build_const_int32(gallivm, index * 4 + chan); 2424 LLVMValueRef input_ptr = 2425 LLVMBuildGEP(gallivm->builder, bld.inputs_array, 2426 &lindex, 1, ""); 2427 LLVMValueRef value = bld.inputs[index][chan]; 2428 if (value) 2429 LLVMBuildStore(gallivm->builder, value, input_ptr); 2430 } 2431 } 2432 } 2433 2434 bld.system_values_array = system_values_array; 2435 2436 tgsi_parse_init( &parse, tokens ); 2437 2438 while( !tgsi_parse_end_of_tokens( &parse ) ) { 2439 tgsi_parse_token( &parse ); 2440 2441 switch( parse.FullToken.Token.Type ) { 2442 case TGSI_TOKEN_TYPE_DECLARATION: 2443 /* Inputs already interpolated */ 2444 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 2445 break; 2446 2447 case TGSI_TOKEN_TYPE_INSTRUCTION: 2448 { 2449 /* save expanded instruction */ 2450 if (num_instructions == bld.max_instructions) { 2451 struct tgsi_full_instruction *instructions; 2452 instructions = REALLOC(bld.instructions, 2453 bld.max_instructions 2454 * sizeof(struct tgsi_full_instruction), 2455 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 2456 * sizeof(struct tgsi_full_instruction)); 2457 if (!instructions) { 2458 break; 2459 } 2460 bld.instructions = instructions; 2461 bld.max_instructions += LP_MAX_INSTRUCTIONS; 2462 } 2463 2464 memcpy(bld.instructions + num_instructions, 2465 &parse.FullToken.FullInstruction, 2466 sizeof(bld.instructions[0])); 2467 2468 num_instructions++; 2469 } 2470 2471 break; 2472 2473 case TGSI_TOKEN_TYPE_IMMEDIATE: 2474 /* simply copy the immediate values into the next immediates[] slot */ 2475 { 2476 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 2477 assert(size <= 4); 2478 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 2479 for( i = 0; i < size; ++i ) 2480 bld.immediates[num_immediates][i] = 2481 lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float); 2482 for( i = size; i < 4; ++i ) 2483 bld.immediates[num_immediates][i] = bld.base.undef; 2484 num_immediates++; 2485 } 2486 break; 2487 2488 case TGSI_TOKEN_TYPE_PROPERTY: 2489 break; 2490 2491 default: 2492 assert( 0 ); 2493 } 2494 } 2495 2496 while (pc != -1) { 2497 struct tgsi_full_instruction *instr = bld.instructions + pc; 2498 const struct tgsi_opcode_info *opcode_info = 2499 tgsi_get_opcode_info(instr->Instruction.Opcode); 2500 if (!emit_instruction( &bld, instr, opcode_info, &pc )) 2501 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 2502 opcode_info->mnemonic); 2503 } 2504 2505 /* If we have indirect addressing in outputs we need to copy our alloca array 2506 * to the outputs slots specified by the called */ 2507 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2508 unsigned index, chan; 2509 assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1); 2510 for (index = 0; index < info->num_outputs; ++index) { 2511 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2512 bld.outputs[index][chan] = get_output_ptr(&bld, index, chan); 2513 } 2514 } 2515 } 2516 2517 if (0) { 2518 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 2519 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2520 debug_printf("11111111111111111111111111111 \n"); 2521 tgsi_dump(tokens, 0); 2522 lp_debug_dump_value(function); 2523 debug_printf("2222222222222222222222222222 \n"); 2524 } 2525 tgsi_parse_free( &parse ); 2526 2527 if (0) { 2528 LLVMModuleRef module = LLVMGetGlobalParent( 2529 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 2530 LLVMDumpModule(module); 2531 2532 } 2533 2534 FREE( bld.instructions ); 2535} 2536 2537 2538/** 2539 * Build up the system values array out of individual values such as 2540 * the instance ID, front-face, primitive ID, etc. The shader info is 2541 * used to determine which system values are needed and where to put 2542 * them in the system values array. 2543 * 2544 * XXX only instance ID is implemented at this time. 2545 * 2546 * The system values register file is similar to the constants buffer. 2547 * Example declaration: 2548 * DCL SV[0], INSTANCEID 2549 * Example instruction: 2550 * MOVE foo, SV[0].xxxx; 2551 * 2552 * \return LLVM float array (interpreted as float [][4]) 2553 */ 2554LLVMValueRef 2555lp_build_system_values_array(struct gallivm_state *gallivm, 2556 const struct tgsi_shader_info *info, 2557 LLVMValueRef instance_id, 2558 LLVMValueRef facing) 2559{ 2560 LLVMValueRef size = lp_build_const_int32(gallivm, 4 * info->num_system_values); 2561 LLVMTypeRef float_t = LLVMFloatTypeInContext(gallivm->context); 2562 LLVMValueRef array = lp_build_array_alloca(gallivm, float_t, 2563 size, "sysvals_array"); 2564 unsigned i; 2565 2566 for (i = 0; i < info->num_system_values; i++) { 2567 LLVMValueRef index = lp_build_const_int32(gallivm, i * 4); 2568 LLVMValueRef ptr, value = 0; 2569 2570 switch (info->system_value_semantic_name[i]) { 2571 case TGSI_SEMANTIC_INSTANCEID: 2572 /* convert instance ID from int to float */ 2573 value = LLVMBuildSIToFP(gallivm->builder, instance_id, float_t, 2574 "sysval_instanceid"); 2575 break; 2576 case TGSI_SEMANTIC_FACE: 2577 /* fall-through */ 2578 default: 2579 assert(0 && "unexpected semantic in build_system_values_array()"); 2580 } 2581 2582 ptr = LLVMBuildGEP(gallivm->builder, array, &index, 1, ""); 2583 LLVMBuildStore(gallivm->builder, value, ptr); 2584 } 2585 2586 return array; 2587} 2588