lp_bld_tgsi_soa.c revision 6299f241e9fdd86e705d144a42d9b1979c13f9ad
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_scan.h" 49#include "lp_bld_type.h" 50#include "lp_bld_const.h" 51#include "lp_bld_arit.h" 52#include "lp_bld_bitarit.h" 53#include "lp_bld_gather.h" 54#include "lp_bld_init.h" 55#include "lp_bld_logic.h" 56#include "lp_bld_swizzle.h" 57#include "lp_bld_flow.h" 58#include "lp_bld_quad.h" 59#include "lp_bld_tgsi.h" 60#include "lp_bld_limits.h" 61#include "lp_bld_debug.h" 62#include "lp_bld_printf.h" 63 64 65#define FOR_EACH_CHANNEL( CHAN )\ 66 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 67 68#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 69 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 70 71#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 72 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 73 74#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 75 FOR_EACH_CHANNEL( CHAN )\ 76 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 77 78#define CHAN_X 0 79#define CHAN_Y 1 80#define CHAN_Z 2 81#define CHAN_W 3 82#define NUM_CHANNELS 4 83 84#define LP_MAX_INSTRUCTIONS 256 85 86 87struct lp_exec_mask { 88 struct lp_build_context *bld; 89 90 boolean has_mask; 91 92 LLVMTypeRef int_vec_type; 93 94 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 95 int cond_stack_size; 96 LLVMValueRef cond_mask; 97 98 LLVMBasicBlockRef loop_block; 99 LLVMValueRef cont_mask; 100 LLVMValueRef break_mask; 101 LLVMValueRef break_var; 102 struct { 103 LLVMBasicBlockRef loop_block; 104 LLVMValueRef cont_mask; 105 LLVMValueRef break_mask; 106 LLVMValueRef break_var; 107 } loop_stack[LP_MAX_TGSI_NESTING]; 108 int loop_stack_size; 109 110 LLVMValueRef ret_mask; 111 struct { 112 int pc; 113 LLVMValueRef ret_mask; 114 } call_stack[LP_MAX_TGSI_NESTING]; 115 int call_stack_size; 116 117 LLVMValueRef exec_mask; 118}; 119 120struct lp_build_tgsi_soa_context 121{ 122 struct lp_build_context base; 123 124 /* Builder for vector integer masks and indices */ 125 struct lp_build_context uint_bld; 126 127 /* Builder for scalar elements of shader's data type (float) */ 128 struct lp_build_context elem_bld; 129 130 LLVMValueRef consts_ptr; 131 const LLVMValueRef *pos; 132 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 133 LLVMValueRef (*outputs)[NUM_CHANNELS]; 134 135 const struct lp_build_sampler_soa *sampler; 136 137 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 138 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 139 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 140 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; 141 142 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 143 * set in the indirect_files field. 144 * The temps[] array above is unused then. 145 */ 146 LLVMValueRef temps_array; 147 148 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is 149 * set in the indirect_files field. 150 * The outputs[] array above is unused then. 151 */ 152 LLVMValueRef outputs_array; 153 154 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is 155 * set in the indirect_files field. 156 * The inputs[] array above is unused then. 157 */ 158 LLVMValueRef inputs_array; 159 160 const struct tgsi_shader_info *info; 161 /** bitmask indicating which register files are accessed indirectly */ 162 unsigned indirect_files; 163 164 struct lp_build_mask_context *mask; 165 struct lp_exec_mask exec_mask; 166 167 struct tgsi_full_instruction *instructions; 168 uint max_instructions; 169}; 170 171static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 172{ 173 mask->bld = bld; 174 mask->has_mask = FALSE; 175 mask->cond_stack_size = 0; 176 mask->loop_stack_size = 0; 177 mask->call_stack_size = 0; 178 179 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); 180 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 181 LLVMConstAllOnes(mask->int_vec_type); 182} 183 184static void lp_exec_mask_update(struct lp_exec_mask *mask) 185{ 186 LLVMBuilderRef builder = mask->bld->gallivm->builder; 187 188 if (mask->loop_stack_size) { 189 /*for loops we need to update the entire mask at runtime */ 190 LLVMValueRef tmp; 191 assert(mask->break_mask); 192 tmp = LLVMBuildAnd(builder, 193 mask->cont_mask, 194 mask->break_mask, 195 "maskcb"); 196 mask->exec_mask = LLVMBuildAnd(builder, 197 mask->cond_mask, 198 tmp, 199 "maskfull"); 200 } else 201 mask->exec_mask = mask->cond_mask; 202 203 if (mask->call_stack_size) { 204 mask->exec_mask = LLVMBuildAnd(builder, 205 mask->exec_mask, 206 mask->ret_mask, 207 "callmask"); 208 } 209 210 mask->has_mask = (mask->cond_stack_size > 0 || 211 mask->loop_stack_size > 0 || 212 mask->call_stack_size > 0); 213} 214 215static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 216 LLVMValueRef val) 217{ 218 LLVMBuilderRef builder = mask->bld->gallivm->builder; 219 220 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 221 if (mask->cond_stack_size == 0) { 222 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 223 } 224 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 225 assert(LLVMTypeOf(val) == mask->int_vec_type); 226 mask->cond_mask = LLVMBuildAnd(builder, 227 mask->cond_mask, 228 val, 229 ""); 230 lp_exec_mask_update(mask); 231} 232 233static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 234{ 235 LLVMBuilderRef builder = mask->bld->gallivm->builder; 236 LLVMValueRef prev_mask; 237 LLVMValueRef inv_mask; 238 239 assert(mask->cond_stack_size); 240 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 241 if (mask->cond_stack_size == 1) { 242 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 243 } 244 245 inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); 246 247 mask->cond_mask = LLVMBuildAnd(builder, 248 inv_mask, 249 prev_mask, ""); 250 lp_exec_mask_update(mask); 251} 252 253static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 254{ 255 assert(mask->cond_stack_size); 256 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 257 lp_exec_mask_update(mask); 258} 259 260static void lp_exec_bgnloop(struct lp_exec_mask *mask) 261{ 262 LLVMBuilderRef builder = mask->bld->gallivm->builder; 263 264 if (mask->loop_stack_size == 0) { 265 assert(mask->loop_block == NULL); 266 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 267 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 268 assert(mask->break_var == NULL); 269 } 270 271 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 272 273 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 274 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 275 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 276 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 277 ++mask->loop_stack_size; 278 279 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); 280 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 281 282 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); 283 LLVMBuildBr(builder, mask->loop_block); 284 LLVMPositionBuilderAtEnd(builder, mask->loop_block); 285 286 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, ""); 287 288 lp_exec_mask_update(mask); 289} 290 291static void lp_exec_break(struct lp_exec_mask *mask) 292{ 293 LLVMBuilderRef builder = mask->bld->gallivm->builder; 294 LLVMValueRef exec_mask = LLVMBuildNot(builder, 295 mask->exec_mask, 296 "break"); 297 298 mask->break_mask = LLVMBuildAnd(builder, 299 mask->break_mask, 300 exec_mask, "break_full"); 301 302 lp_exec_mask_update(mask); 303} 304 305static void lp_exec_continue(struct lp_exec_mask *mask) 306{ 307 LLVMBuilderRef builder = mask->bld->gallivm->builder; 308 LLVMValueRef exec_mask = LLVMBuildNot(builder, 309 mask->exec_mask, 310 ""); 311 312 mask->cont_mask = LLVMBuildAnd(builder, 313 mask->cont_mask, 314 exec_mask, ""); 315 316 lp_exec_mask_update(mask); 317} 318 319 320static void lp_exec_endloop(struct gallivm_state *gallivm, 321 struct lp_exec_mask *mask) 322{ 323 LLVMBuilderRef builder = mask->bld->gallivm->builder; 324 LLVMBasicBlockRef endloop; 325 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, 326 mask->bld->type.width * 327 mask->bld->type.length); 328 LLVMValueRef i1cond; 329 330 assert(mask->break_mask); 331 332 /* 333 * Restore the cont_mask, but don't pop 334 */ 335 assert(mask->loop_stack_size); 336 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 337 lp_exec_mask_update(mask); 338 339 /* 340 * Unlike the continue mask, the break_mask must be preserved across loop 341 * iterations 342 */ 343 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 344 345 /* i1cond = (mask == 0) */ 346 i1cond = LLVMBuildICmp( 347 builder, 348 LLVMIntNE, 349 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), 350 LLVMConstNull(reg_type), ""); 351 352 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); 353 354 LLVMBuildCondBr(builder, 355 i1cond, mask->loop_block, endloop); 356 357 LLVMPositionBuilderAtEnd(builder, endloop); 358 359 assert(mask->loop_stack_size); 360 --mask->loop_stack_size; 361 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 362 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 363 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 364 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 365 366 lp_exec_mask_update(mask); 367} 368 369/* stores val into an address pointed to by dst. 370 * mask->exec_mask is used to figure out which bits of val 371 * should be stored into the address 372 * (0 means don't store this bit, 1 means do store). 373 */ 374static void lp_exec_mask_store(struct lp_exec_mask *mask, 375 LLVMValueRef pred, 376 LLVMValueRef val, 377 LLVMValueRef dst) 378{ 379 LLVMBuilderRef builder = mask->bld->gallivm->builder; 380 381 /* Mix the predicate and execution mask */ 382 if (mask->has_mask) { 383 if (pred) { 384 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 385 } else { 386 pred = mask->exec_mask; 387 } 388 } 389 390 if (pred) { 391 LLVMValueRef real_val, dst_val; 392 393 dst_val = LLVMBuildLoad(builder, dst, ""); 394 real_val = lp_build_select(mask->bld, 395 pred, 396 val, dst_val); 397 398 LLVMBuildStore(builder, real_val, dst); 399 } else 400 LLVMBuildStore(builder, val, dst); 401} 402 403static void lp_exec_mask_call(struct lp_exec_mask *mask, 404 int func, 405 int *pc) 406{ 407 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 408 mask->call_stack[mask->call_stack_size].pc = *pc; 409 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 410 mask->call_stack_size++; 411 *pc = func; 412} 413 414static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 415{ 416 LLVMBuilderRef builder = mask->bld->gallivm->builder; 417 LLVMValueRef exec_mask; 418 419 if (mask->call_stack_size == 0) { 420 /* returning from main() */ 421 *pc = -1; 422 return; 423 } 424 exec_mask = LLVMBuildNot(builder, 425 mask->exec_mask, 426 "ret"); 427 428 mask->ret_mask = LLVMBuildAnd(builder, 429 mask->ret_mask, 430 exec_mask, "ret_full"); 431 432 lp_exec_mask_update(mask); 433} 434 435static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 436{ 437} 438 439static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 440{ 441 assert(mask->call_stack_size); 442 mask->call_stack_size--; 443 *pc = mask->call_stack[mask->call_stack_size].pc; 444 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 445 lp_exec_mask_update(mask); 446} 447 448 449/** 450 * Return pointer to a temporary register channel (src or dest). 451 * Note that indirect addressing cannot be handled here. 452 * \param index which temporary register 453 * \param chan which channel of the temp register. 454 */ 455static LLVMValueRef 456get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 457 unsigned index, 458 unsigned chan) 459{ 460 LLVMBuilderRef builder = bld->base.gallivm->builder; 461 assert(chan < 4); 462 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 463 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan); 464 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, ""); 465 } 466 else { 467 return bld->temps[index][chan]; 468 } 469} 470 471/** 472 * Return pointer to a output register channel (src or dest). 473 * Note that indirect addressing cannot be handled here. 474 * \param index which output register 475 * \param chan which channel of the output register. 476 */ 477static LLVMValueRef 478get_output_ptr(struct lp_build_tgsi_soa_context *bld, 479 unsigned index, 480 unsigned chan) 481{ 482 LLVMBuilderRef builder = bld->base.gallivm->builder; 483 assert(chan < 4); 484 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 485 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, 486 index * 4 + chan); 487 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, ""); 488 } 489 else { 490 return bld->outputs[index][chan]; 491 } 492} 493 494/** 495 * Gather vector. 496 * XXX the lp_build_gather() function should be capable of doing this 497 * with a little work. 498 */ 499static LLVMValueRef 500build_gather(struct lp_build_tgsi_soa_context *bld, 501 LLVMValueRef base_ptr, 502 LLVMValueRef indexes) 503{ 504 LLVMBuilderRef builder = bld->base.gallivm->builder; 505 LLVMValueRef res = bld->base.undef; 506 unsigned i; 507 508 /* 509 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 510 */ 511 for (i = 0; i < bld->base.type.length; i++) { 512 LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i); 513 LLVMValueRef index = LLVMBuildExtractElement(builder, 514 indexes, ii, ""); 515 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, 516 &index, 1, "gather_ptr"); 517 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 518 519 res = LLVMBuildInsertElement(builder, res, scalar, ii, ""); 520 } 521 522 return res; 523} 524 525 526/** 527 * Scatter/store vector. 528 */ 529static void 530emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 531 LLVMValueRef base_ptr, 532 LLVMValueRef indexes, 533 LLVMValueRef values, 534 struct lp_exec_mask *mask, 535 LLVMValueRef pred) 536{ 537 struct gallivm_state *gallivm = bld->base.gallivm; 538 LLVMBuilderRef builder = builder; 539 unsigned i; 540 541 /* Mix the predicate and execution mask */ 542 if (mask->has_mask) { 543 if (pred) { 544 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 545 } 546 else { 547 pred = mask->exec_mask; 548 } 549 } 550 551 /* 552 * Loop over elements of index_vec, store scalar value. 553 */ 554 for (i = 0; i < bld->base.type.length; i++) { 555 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 556 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 557 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 558 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 559 LLVMValueRef scalar_pred = pred ? 560 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 561 562 if (0) 563 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", 564 ii, val, index, scalar_ptr); 565 566 if (scalar_pred) { 567 LLVMValueRef real_val, dst_val; 568 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 569 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 570 LLVMBuildStore(builder, real_val, scalar_ptr); 571 } 572 else { 573 LLVMBuildStore(builder, val, scalar_ptr); 574 } 575 } 576} 577 578 579/** 580 * Read the current value of the ADDR register, convert the floats to 581 * ints, add the base index and return the vector of offsets. 582 * The offsets will be used to index into the constant buffer or 583 * temporary register file. 584 */ 585static LLVMValueRef 586get_indirect_index(struct lp_build_tgsi_soa_context *bld, 587 unsigned reg_file, unsigned reg_index, 588 const struct tgsi_src_register *indirect_reg) 589{ 590 LLVMBuilderRef builder = bld->base.gallivm->builder; 591 struct lp_build_context *uint_bld = &bld->uint_bld; 592 /* always use X component of address register */ 593 unsigned swizzle = indirect_reg->SwizzleX; 594 LLVMValueRef base; 595 LLVMValueRef rel; 596 LLVMValueRef max_index; 597 LLVMValueRef index; 598 599 assert(bld->indirect_files & (1 << reg_file)); 600 601 base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index); 602 603 assert(swizzle < 4); 604 rel = LLVMBuildLoad(builder, 605 bld->addr[indirect_reg->Index][swizzle], 606 "load addr reg"); 607 608 /* for indexing we want integers */ 609 rel = LLVMBuildFPToSI(builder, 610 rel, 611 uint_bld->vec_type, ""); 612 613 index = lp_build_add(uint_bld, base, rel); 614 615 max_index = lp_build_const_int_vec(bld->base.gallivm, 616 uint_bld->type, 617 bld->info->file_max[reg_file]); 618 619 assert(!uint_bld->type.sign); 620 index = lp_build_min(uint_bld, index, max_index); 621 622 return index; 623} 624 625 626/** 627 * Register fetch. 628 */ 629static LLVMValueRef 630emit_fetch( 631 struct lp_build_tgsi_soa_context *bld, 632 const struct tgsi_full_instruction *inst, 633 unsigned src_op, 634 const unsigned chan_index ) 635{ 636 struct gallivm_state *gallivm = bld->base.gallivm; 637 LLVMBuilderRef builder = gallivm->builder; 638 struct lp_build_context *uint_bld = &bld->uint_bld; 639 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 640 const unsigned swizzle = 641 tgsi_util_get_full_src_register_swizzle(reg, chan_index); 642 LLVMValueRef res; 643 LLVMValueRef indirect_index = NULL; 644 645 if (swizzle > 3) { 646 assert(0 && "invalid swizzle in emit_fetch()"); 647 return bld->base.undef; 648 } 649 650 if (reg->Register.Indirect) { 651 indirect_index = get_indirect_index(bld, 652 reg->Register.File, 653 reg->Register.Index, 654 ®->Indirect); 655 } else { 656 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 657 } 658 659 switch (reg->Register.File) { 660 case TGSI_FILE_CONSTANT: 661 if (reg->Register.Indirect) { 662 LLVMValueRef swizzle_vec = 663 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); 664 LLVMValueRef index_vec; /* index into the const buffer */ 665 666 /* index_vec = indirect_index * 4 + swizzle */ 667 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 668 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 669 670 /* Gather values from the constant buffer */ 671 res = build_gather(bld, bld->consts_ptr, index_vec); 672 } 673 else { 674 LLVMValueRef index; /* index into the const buffer */ 675 LLVMValueRef scalar, scalar_ptr; 676 677 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle); 678 679 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, 680 &index, 1, ""); 681 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 682 683 res = lp_build_broadcast_scalar(&bld->base, scalar); 684 } 685 break; 686 687 case TGSI_FILE_IMMEDIATE: 688 res = bld->immediates[reg->Register.Index][swizzle]; 689 assert(res); 690 break; 691 692 case TGSI_FILE_INPUT: 693 if (reg->Register.Indirect) { 694 LLVMValueRef swizzle_vec = 695 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); 696 LLVMValueRef length_vec = 697 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); 698 LLVMValueRef index_vec; /* index into the const buffer */ 699 LLVMValueRef inputs_array; 700 LLVMTypeRef float4_ptr_type; 701 702 /* index_vec = (indirect_index * 4 + swizzle) * length */ 703 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 704 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 705 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 706 707 /* cast inputs_array pointer to float* */ 708 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 709 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, 710 float4_ptr_type, ""); 711 712 /* Gather values from the temporary register array */ 713 res = build_gather(bld, inputs_array, index_vec); 714 } else { 715 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 716 LLVMValueRef lindex = lp_build_const_int32(gallivm, 717 reg->Register.Index * 4 + swizzle); 718 LLVMValueRef input_ptr = LLVMBuildGEP(builder, 719 bld->inputs_array, &lindex, 1, ""); 720 res = LLVMBuildLoad(builder, input_ptr, ""); 721 } 722 else { 723 res = bld->inputs[reg->Register.Index][swizzle]; 724 } 725 } 726 assert(res); 727 break; 728 729 case TGSI_FILE_TEMPORARY: 730 if (reg->Register.Indirect) { 731 LLVMValueRef swizzle_vec = 732 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); 733 LLVMValueRef length_vec = 734 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, 735 bld->base.type.length); 736 LLVMValueRef index_vec; /* index into the const buffer */ 737 LLVMValueRef temps_array; 738 LLVMTypeRef float4_ptr_type; 739 740 /* index_vec = (indirect_index * 4 + swizzle) * length */ 741 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 742 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 743 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 744 745 /* cast temps_array pointer to float* */ 746 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0); 747 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 748 float4_ptr_type, ""); 749 750 /* Gather values from the temporary register array */ 751 res = build_gather(bld, temps_array, index_vec); 752 } 753 else { 754 LLVMValueRef temp_ptr; 755 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); 756 res = LLVMBuildLoad(builder, temp_ptr, ""); 757 if (!res) 758 return bld->base.undef; 759 } 760 break; 761 762 default: 763 assert(0 && "invalid src register in emit_fetch()"); 764 return bld->base.undef; 765 } 766 767 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 768 case TGSI_UTIL_SIGN_CLEAR: 769 res = lp_build_abs( &bld->base, res ); 770 break; 771 772 case TGSI_UTIL_SIGN_SET: 773 res = lp_build_abs( &bld->base, res ); 774 /* fall through */ 775 case TGSI_UTIL_SIGN_TOGGLE: 776 res = lp_build_negate( &bld->base, res ); 777 break; 778 779 case TGSI_UTIL_SIGN_KEEP: 780 break; 781 } 782 783 return res; 784} 785 786 787/** 788 * Register fetch with derivatives. 789 */ 790static void 791emit_fetch_deriv( 792 struct lp_build_tgsi_soa_context *bld, 793 const struct tgsi_full_instruction *inst, 794 unsigned index, 795 const unsigned chan_index, 796 LLVMValueRef *res, 797 LLVMValueRef *ddx, 798 LLVMValueRef *ddy) 799{ 800 LLVMValueRef src; 801 802 src = emit_fetch(bld, inst, index, chan_index); 803 804 if(res) 805 *res = src; 806 807 /* TODO: use interpolation coeffs for inputs */ 808 809 if(ddx) 810 *ddx = lp_build_ddx(&bld->base, src); 811 812 if(ddy) 813 *ddy = lp_build_ddy(&bld->base, src); 814} 815 816 817/** 818 * Predicate. 819 */ 820static void 821emit_fetch_predicate( 822 struct lp_build_tgsi_soa_context *bld, 823 const struct tgsi_full_instruction *inst, 824 LLVMValueRef *pred) 825{ 826 LLVMBuilderRef builder = bld->base.gallivm->builder; 827 unsigned index; 828 unsigned char swizzles[4]; 829 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 830 LLVMValueRef value; 831 unsigned chan; 832 833 if (!inst->Instruction.Predicate) { 834 FOR_EACH_CHANNEL( chan ) { 835 pred[chan] = NULL; 836 } 837 return; 838 } 839 840 swizzles[0] = inst->Predicate.SwizzleX; 841 swizzles[1] = inst->Predicate.SwizzleY; 842 swizzles[2] = inst->Predicate.SwizzleZ; 843 swizzles[3] = inst->Predicate.SwizzleW; 844 845 index = inst->Predicate.Index; 846 assert(index < LP_MAX_TGSI_PREDS); 847 848 FOR_EACH_CHANNEL( chan ) { 849 unsigned swizzle = swizzles[chan]; 850 851 /* 852 * Only fetch the predicate register channels that are actually listed 853 * in the swizzles 854 */ 855 if (!unswizzled[swizzle]) { 856 value = LLVMBuildLoad(builder, 857 bld->preds[index][swizzle], ""); 858 859 /* 860 * Convert the value to an integer mask. 861 * 862 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 863 * is needlessly causing two comparisons due to storing the intermediate 864 * result as float vector instead of an integer mask vector. 865 */ 866 value = lp_build_compare(bld->base.gallivm, 867 bld->base.type, 868 PIPE_FUNC_NOTEQUAL, 869 value, 870 bld->base.zero); 871 if (inst->Predicate.Negate) { 872 value = LLVMBuildNot(builder, value, ""); 873 } 874 875 unswizzled[swizzle] = value; 876 } else { 877 value = unswizzled[swizzle]; 878 } 879 880 pred[chan] = value; 881 } 882} 883 884 885/** 886 * Register store. 887 */ 888static void 889emit_store( 890 struct lp_build_tgsi_soa_context *bld, 891 const struct tgsi_full_instruction *inst, 892 unsigned index, 893 unsigned chan_index, 894 LLVMValueRef pred, 895 LLVMValueRef value) 896{ 897 struct gallivm_state *gallivm = bld->base.gallivm; 898 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 899 struct lp_build_context *uint_bld = &bld->uint_bld; 900 LLVMValueRef indirect_index = NULL; 901 902 switch( inst->Instruction.Saturate ) { 903 case TGSI_SAT_NONE: 904 break; 905 906 case TGSI_SAT_ZERO_ONE: 907 value = lp_build_max(&bld->base, value, bld->base.zero); 908 value = lp_build_min(&bld->base, value, bld->base.one); 909 break; 910 911 case TGSI_SAT_MINUS_PLUS_ONE: 912 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0)); 913 value = lp_build_min(&bld->base, value, bld->base.one); 914 break; 915 916 default: 917 assert(0); 918 } 919 920 if (reg->Register.Indirect) { 921 indirect_index = get_indirect_index(bld, 922 reg->Register.File, 923 reg->Register.Index, 924 ®->Indirect); 925 } else { 926 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 927 } 928 929 switch( reg->Register.File ) { 930 case TGSI_FILE_OUTPUT: 931 if (reg->Register.Indirect) { 932 LLVMBuilderRef builder = builder; 933 LLVMValueRef chan_vec = 934 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 935 LLVMValueRef length_vec = 936 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); 937 LLVMValueRef index_vec; /* indexes into the temp registers */ 938 LLVMValueRef outputs_array; 939 LLVMValueRef pixel_offsets; 940 LLVMTypeRef float_ptr_type; 941 int i; 942 943 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 944 pixel_offsets = uint_bld->undef; 945 for (i = 0; i < bld->base.type.length; i++) { 946 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 947 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 948 ii, ii, ""); 949 } 950 951 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 952 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 953 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 954 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 955 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 956 957 float_ptr_type = 958 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 959 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, 960 float_ptr_type, ""); 961 962 /* Scatter store values into temp registers */ 963 emit_mask_scatter(bld, outputs_array, index_vec, value, 964 &bld->exec_mask, pred); 965 } 966 else { 967 LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index, 968 chan_index); 969 lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr); 970 } 971 break; 972 973 case TGSI_FILE_TEMPORARY: 974 if (reg->Register.Indirect) { 975 LLVMBuilderRef builder = builder; 976 LLVMValueRef chan_vec = 977 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 978 LLVMValueRef length_vec = 979 lp_build_const_int_vec(gallivm, uint_bld->type, 980 bld->base.type.length); 981 LLVMValueRef index_vec; /* indexes into the temp registers */ 982 LLVMValueRef temps_array; 983 LLVMValueRef pixel_offsets; 984 LLVMTypeRef float_ptr_type; 985 int i; 986 987 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 988 pixel_offsets = uint_bld->undef; 989 for (i = 0; i < bld->base.type.length; i++) { 990 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 991 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 992 ii, ii, ""); 993 } 994 995 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 996 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 997 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 998 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 999 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 1000 1001 float_ptr_type = 1002 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1003 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 1004 float_ptr_type, ""); 1005 1006 /* Scatter store values into temp registers */ 1007 emit_mask_scatter(bld, temps_array, index_vec, value, 1008 &bld->exec_mask, pred); 1009 } 1010 else { 1011 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 1012 chan_index); 1013 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); 1014 } 1015 break; 1016 1017 case TGSI_FILE_ADDRESS: 1018 lp_exec_mask_store(&bld->exec_mask, pred, value, 1019 bld->addr[reg->Register.Index][chan_index]); 1020 break; 1021 1022 case TGSI_FILE_PREDICATE: 1023 lp_exec_mask_store(&bld->exec_mask, pred, value, 1024 bld->preds[reg->Register.Index][chan_index]); 1025 break; 1026 1027 default: 1028 assert( 0 ); 1029 } 1030} 1031 1032 1033/** 1034 * High-level instruction translators. 1035 */ 1036 1037static void 1038emit_tex( struct lp_build_tgsi_soa_context *bld, 1039 const struct tgsi_full_instruction *inst, 1040 enum lp_build_tex_modifier modifier, 1041 LLVMValueRef *texel) 1042{ 1043 LLVMBuilderRef builder = bld->base.gallivm->builder; 1044 unsigned unit; 1045 LLVMValueRef lod_bias, explicit_lod; 1046 LLVMValueRef oow = NULL; 1047 LLVMValueRef coords[3]; 1048 LLVMValueRef ddx[3]; 1049 LLVMValueRef ddy[3]; 1050 unsigned num_coords; 1051 unsigned i; 1052 1053 if (!bld->sampler) { 1054 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 1055 for (i = 0; i < 4; i++) { 1056 texel[i] = bld->base.undef; 1057 } 1058 return; 1059 } 1060 1061 switch (inst->Texture.Texture) { 1062 case TGSI_TEXTURE_1D: 1063 num_coords = 1; 1064 break; 1065 case TGSI_TEXTURE_2D: 1066 case TGSI_TEXTURE_RECT: 1067 num_coords = 2; 1068 break; 1069 case TGSI_TEXTURE_SHADOW1D: 1070 case TGSI_TEXTURE_SHADOW2D: 1071 case TGSI_TEXTURE_SHADOWRECT: 1072 case TGSI_TEXTURE_3D: 1073 case TGSI_TEXTURE_CUBE: 1074 num_coords = 3; 1075 break; 1076 default: 1077 assert(0); 1078 return; 1079 } 1080 1081 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 1082 lod_bias = emit_fetch( bld, inst, 0, 3 ); 1083 explicit_lod = NULL; 1084 } 1085 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 1086 lod_bias = NULL; 1087 explicit_lod = emit_fetch( bld, inst, 0, 3 ); 1088 } 1089 else { 1090 lod_bias = NULL; 1091 explicit_lod = NULL; 1092 } 1093 1094 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 1095 oow = emit_fetch( bld, inst, 0, 3 ); 1096 oow = lp_build_rcp(&bld->base, oow); 1097 } 1098 1099 for (i = 0; i < num_coords; i++) { 1100 coords[i] = emit_fetch( bld, inst, 0, i ); 1101 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 1102 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 1103 } 1104 for (i = num_coords; i < 3; i++) { 1105 coords[i] = bld->base.undef; 1106 } 1107 1108 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 1109 LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0); 1110 for (i = 0; i < num_coords; i++) { 1111 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); 1112 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); 1113 ddx[i] = LLVMBuildExtractElement(builder, src1, index0, ""); 1114 ddy[i] = LLVMBuildExtractElement(builder, src2, index0, ""); 1115 } 1116 unit = inst->Src[3].Register.Index; 1117 } else { 1118 for (i = 0; i < num_coords; i++) { 1119 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); 1120 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); 1121 } 1122 unit = inst->Src[1].Register.Index; 1123 } 1124 for (i = num_coords; i < 3; i++) { 1125 ddx[i] = LLVMGetUndef(bld->base.elem_type); 1126 ddy[i] = LLVMGetUndef(bld->base.elem_type); 1127 } 1128 1129 bld->sampler->emit_fetch_texel(bld->sampler, 1130 bld->base.gallivm, 1131 bld->base.type, 1132 unit, num_coords, coords, 1133 ddx, ddy, 1134 lod_bias, explicit_lod, 1135 texel); 1136} 1137 1138static boolean 1139near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 1140 int pc) 1141{ 1142 int i; 1143 1144 for (i = 0; i < 5; i++) { 1145 unsigned opcode; 1146 1147 if (pc + i >= bld->info->num_instructions) 1148 return TRUE; 1149 1150 opcode = bld->instructions[pc + i].Instruction.Opcode; 1151 1152 if (opcode == TGSI_OPCODE_END) 1153 return TRUE; 1154 1155 if (opcode == TGSI_OPCODE_TEX || 1156 opcode == TGSI_OPCODE_TXP || 1157 opcode == TGSI_OPCODE_TXD || 1158 opcode == TGSI_OPCODE_TXB || 1159 opcode == TGSI_OPCODE_TXL || 1160 opcode == TGSI_OPCODE_TXF || 1161 opcode == TGSI_OPCODE_TXQ || 1162 opcode == TGSI_OPCODE_CAL || 1163 opcode == TGSI_OPCODE_CALLNZ || 1164 opcode == TGSI_OPCODE_IF || 1165 opcode == TGSI_OPCODE_IFC || 1166 opcode == TGSI_OPCODE_BGNLOOP || 1167 opcode == TGSI_OPCODE_SWITCH) 1168 return FALSE; 1169 } 1170 1171 return TRUE; 1172} 1173 1174 1175 1176/** 1177 * Kill fragment if any of the src register values are negative. 1178 */ 1179static void 1180emit_kil( 1181 struct lp_build_tgsi_soa_context *bld, 1182 const struct tgsi_full_instruction *inst, 1183 int pc) 1184{ 1185 LLVMBuilderRef builder = bld->base.gallivm->builder; 1186 const struct tgsi_full_src_register *reg = &inst->Src[0]; 1187 LLVMValueRef terms[NUM_CHANNELS]; 1188 LLVMValueRef mask; 1189 unsigned chan_index; 1190 1191 memset(&terms, 0, sizeof terms); 1192 1193 FOR_EACH_CHANNEL( chan_index ) { 1194 unsigned swizzle; 1195 1196 /* Unswizzle channel */ 1197 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1198 1199 /* Check if the component has not been already tested. */ 1200 assert(swizzle < NUM_CHANNELS); 1201 if( !terms[swizzle] ) 1202 /* TODO: change the comparison operator instead of setting the sign */ 1203 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 1204 } 1205 1206 mask = NULL; 1207 FOR_EACH_CHANNEL( chan_index ) { 1208 if(terms[chan_index]) { 1209 LLVMValueRef chan_mask; 1210 1211 /* 1212 * If term < 0 then mask = 0 else mask = ~0. 1213 */ 1214 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 1215 1216 if(mask) 1217 mask = LLVMBuildAnd(builder, mask, chan_mask, ""); 1218 else 1219 mask = chan_mask; 1220 } 1221 } 1222 1223 if(mask) { 1224 lp_build_mask_update(bld->mask, mask); 1225 1226 if (!near_end_of_shader(bld, pc)) 1227 lp_build_mask_check(bld->mask); 1228 } 1229} 1230 1231 1232/** 1233 * Predicated fragment kill. 1234 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 1235 * The only predication is the execution mask which will apply if 1236 * we're inside a loop or conditional. 1237 */ 1238static void 1239emit_kilp(struct lp_build_tgsi_soa_context *bld, 1240 const struct tgsi_full_instruction *inst, 1241 int pc) 1242{ 1243 LLVMBuilderRef builder = bld->base.gallivm->builder; 1244 LLVMValueRef mask; 1245 1246 /* For those channels which are "alive", disable fragment shader 1247 * execution. 1248 */ 1249 if (bld->exec_mask.has_mask) { 1250 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 1251 } 1252 else { 1253 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type); 1254 mask = zero; 1255 } 1256 1257 lp_build_mask_update(bld->mask, mask); 1258 1259 if (!near_end_of_shader(bld, pc)) 1260 lp_build_mask_check(bld->mask); 1261} 1262 1263 1264/** 1265 * Emit code which will dump the value of all the temporary registers 1266 * to stdout. 1267 */ 1268static void 1269emit_dump_temps(struct lp_build_tgsi_soa_context *bld) 1270{ 1271 struct gallivm_state *gallivm = bld->base.gallivm; 1272 LLVMBuilderRef builder = gallivm->builder; 1273 LLVMValueRef temp_ptr; 1274 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); 1275 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1); 1276 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); 1277 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); 1278 int index; 1279 int n = bld->info->file_max[TGSI_FILE_TEMPORARY]; 1280 1281 for (index = 0; index < n; index++) { 1282 LLVMValueRef idx = lp_build_const_int32(gallivm, index); 1283 LLVMValueRef v[4][4], res; 1284 int chan; 1285 1286 lp_build_printf(gallivm, "TEMP[%d]:\n", idx); 1287 1288 for (chan = 0; chan < 4; chan++) { 1289 temp_ptr = get_temp_ptr(bld, index, chan); 1290 res = LLVMBuildLoad(builder, temp_ptr, ""); 1291 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); 1292 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); 1293 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); 1294 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); 1295 } 1296 1297 lp_build_printf(gallivm, " X: %f %f %f %f\n", 1298 v[0][0], v[0][1], v[0][2], v[0][3]); 1299 lp_build_printf(gallivm, " Y: %f %f %f %f\n", 1300 v[1][0], v[1][1], v[1][2], v[1][3]); 1301 lp_build_printf(gallivm, " Z: %f %f %f %f\n", 1302 v[2][0], v[2][1], v[2][2], v[2][3]); 1303 lp_build_printf(gallivm, " W: %f %f %f %f\n", 1304 v[3][0], v[3][1], v[3][2], v[3][3]); 1305 } 1306} 1307 1308 1309 1310static void 1311emit_declaration( 1312 struct lp_build_tgsi_soa_context *bld, 1313 const struct tgsi_full_declaration *decl) 1314{ 1315 struct gallivm_state *gallivm = bld->base.gallivm; 1316 LLVMTypeRef vec_type = bld->base.vec_type; 1317 const unsigned first = decl->Range.First; 1318 const unsigned last = decl->Range.Last; 1319 unsigned idx, i; 1320 1321 for (idx = first; idx <= last; ++idx) { 1322 assert(last <= bld->info->file_max[decl->Declaration.File]); 1323 switch (decl->Declaration.File) { 1324 case TGSI_FILE_TEMPORARY: 1325 assert(idx < LP_MAX_TGSI_TEMPS); 1326 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 1327 for (i = 0; i < NUM_CHANNELS; i++) 1328 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); 1329 } 1330 break; 1331 1332 case TGSI_FILE_OUTPUT: 1333 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 1334 for (i = 0; i < NUM_CHANNELS; i++) 1335 bld->outputs[idx][i] = lp_build_alloca(gallivm, 1336 vec_type, "output"); 1337 } 1338 break; 1339 1340 case TGSI_FILE_ADDRESS: 1341 assert(idx < LP_MAX_TGSI_ADDRS); 1342 for (i = 0; i < NUM_CHANNELS; i++) 1343 bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr"); 1344 break; 1345 1346 case TGSI_FILE_PREDICATE: 1347 assert(idx < LP_MAX_TGSI_PREDS); 1348 for (i = 0; i < NUM_CHANNELS; i++) 1349 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, 1350 "predicate"); 1351 break; 1352 1353 default: 1354 /* don't need to declare other vars */ 1355 break; 1356 } 1357 } 1358} 1359 1360 1361/** 1362 * Emit LLVM for one TGSI instruction. 1363 * \param return TRUE for success, FALSE otherwise 1364 */ 1365static boolean 1366emit_instruction( 1367 struct lp_build_tgsi_soa_context *bld, 1368 const struct tgsi_full_instruction *inst, 1369 const struct tgsi_opcode_info *info, 1370 int *pc) 1371{ 1372 unsigned chan_index; 1373 LLVMValueRef src0, src1, src2; 1374 LLVMValueRef tmp0, tmp1, tmp2; 1375 LLVMValueRef tmp3 = NULL; 1376 LLVMValueRef tmp4 = NULL; 1377 LLVMValueRef tmp5 = NULL; 1378 LLVMValueRef tmp6 = NULL; 1379 LLVMValueRef tmp7 = NULL; 1380 LLVMValueRef res; 1381 LLVMValueRef dst0[NUM_CHANNELS]; 1382 1383 /* 1384 * Stores and write masks are handled in a general fashion after the long 1385 * instruction opcode switch statement. 1386 * 1387 * Although not stricitly necessary, we avoid generating instructions for 1388 * channels which won't be stored, in cases where's that easy. For some 1389 * complex instructions, like texture sampling, it is more convenient to 1390 * assume a full writemask and then let LLVM optimization passes eliminate 1391 * redundant code. 1392 */ 1393 1394 (*pc)++; 1395 1396 assert(info->num_dst <= 1); 1397 if (info->num_dst) { 1398 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1399 dst0[chan_index] = bld->base.undef; 1400 } 1401 } 1402 1403 switch (inst->Instruction.Opcode) { 1404 case TGSI_OPCODE_ARL: 1405 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1406 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1407 tmp0 = lp_build_floor(&bld->base, tmp0); 1408 dst0[chan_index] = tmp0; 1409 } 1410 break; 1411 1412 case TGSI_OPCODE_MOV: 1413 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1414 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 1415 } 1416 break; 1417 1418 case TGSI_OPCODE_LIT: 1419 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 1420 dst0[CHAN_X] = bld->base.one; 1421 } 1422 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1423 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1424 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 1425 } 1426 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1427 /* XMM[1] = SrcReg[0].yyyy */ 1428 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1429 /* XMM[1] = max(XMM[1], 0) */ 1430 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 1431 /* XMM[2] = SrcReg[0].wwww */ 1432 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 1433 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 1434 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1435 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 1436 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 1437 } 1438 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 1439 dst0[CHAN_W] = bld->base.one; 1440 } 1441 break; 1442 1443 case TGSI_OPCODE_RCP: 1444 /* TGSI_OPCODE_RECIP */ 1445 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1446 res = lp_build_rcp(&bld->base, src0); 1447 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1448 dst0[chan_index] = res; 1449 } 1450 break; 1451 1452 case TGSI_OPCODE_RSQ: 1453 /* TGSI_OPCODE_RECIPSQRT */ 1454 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1455 src0 = lp_build_abs(&bld->base, src0); 1456 res = lp_build_rsqrt(&bld->base, src0); 1457 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1458 dst0[chan_index] = res; 1459 } 1460 break; 1461 1462 case TGSI_OPCODE_EXP: 1463 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1464 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1465 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1466 LLVMValueRef *p_exp2_int_part = NULL; 1467 LLVMValueRef *p_frac_part = NULL; 1468 LLVMValueRef *p_exp2 = NULL; 1469 1470 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1471 1472 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1473 p_exp2_int_part = &tmp0; 1474 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1475 p_frac_part = &tmp1; 1476 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1477 p_exp2 = &tmp2; 1478 1479 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 1480 1481 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1482 dst0[CHAN_X] = tmp0; 1483 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1484 dst0[CHAN_Y] = tmp1; 1485 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1486 dst0[CHAN_Z] = tmp2; 1487 } 1488 /* dst.w = 1.0 */ 1489 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1490 dst0[CHAN_W] = bld->base.one; 1491 } 1492 break; 1493 1494 case TGSI_OPCODE_LOG: 1495 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1496 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1497 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1498 LLVMValueRef *p_floor_log2 = NULL; 1499 LLVMValueRef *p_exp = NULL; 1500 LLVMValueRef *p_log2 = NULL; 1501 1502 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1503 src0 = lp_build_abs( &bld->base, src0 ); 1504 1505 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1506 p_floor_log2 = &tmp0; 1507 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1508 p_exp = &tmp1; 1509 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1510 p_log2 = &tmp2; 1511 1512 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 1513 1514 /* dst.x = floor(lg2(abs(src.x))) */ 1515 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1516 dst0[CHAN_X] = tmp0; 1517 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 1518 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 1519 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 1520 } 1521 /* dst.z = lg2(abs(src.x)) */ 1522 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1523 dst0[CHAN_Z] = tmp2; 1524 } 1525 /* dst.w = 1.0 */ 1526 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1527 dst0[CHAN_W] = bld->base.one; 1528 } 1529 break; 1530 1531 case TGSI_OPCODE_MUL: 1532 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1533 src0 = emit_fetch( bld, inst, 0, chan_index ); 1534 src1 = emit_fetch( bld, inst, 1, chan_index ); 1535 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 1536 } 1537 break; 1538 1539 case TGSI_OPCODE_ADD: 1540 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1541 src0 = emit_fetch( bld, inst, 0, chan_index ); 1542 src1 = emit_fetch( bld, inst, 1, chan_index ); 1543 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 1544 } 1545 break; 1546 1547 case TGSI_OPCODE_DP3: 1548 /* TGSI_OPCODE_DOT3 */ 1549 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1550 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1551 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1552 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1553 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1554 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1555 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1556 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1557 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1558 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1559 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1560 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1561 dst0[chan_index] = tmp0; 1562 } 1563 break; 1564 1565 case TGSI_OPCODE_DP4: 1566 /* TGSI_OPCODE_DOT4 */ 1567 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1568 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1569 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1570 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1571 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1572 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1573 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1574 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1575 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1576 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1577 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1578 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1579 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1580 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1581 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1582 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1583 dst0[chan_index] = tmp0; 1584 } 1585 break; 1586 1587 case TGSI_OPCODE_DST: 1588 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1589 dst0[CHAN_X] = bld->base.one; 1590 } 1591 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1592 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1593 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1594 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1595 } 1596 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1597 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1598 } 1599 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1600 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1601 } 1602 break; 1603 1604 case TGSI_OPCODE_MIN: 1605 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1606 src0 = emit_fetch( bld, inst, 0, chan_index ); 1607 src1 = emit_fetch( bld, inst, 1, chan_index ); 1608 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1609 } 1610 break; 1611 1612 case TGSI_OPCODE_MAX: 1613 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1614 src0 = emit_fetch( bld, inst, 0, chan_index ); 1615 src1 = emit_fetch( bld, inst, 1, chan_index ); 1616 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1617 } 1618 break; 1619 1620 case TGSI_OPCODE_SLT: 1621 /* TGSI_OPCODE_SETLT */ 1622 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1623 src0 = emit_fetch( bld, inst, 0, chan_index ); 1624 src1 = emit_fetch( bld, inst, 1, chan_index ); 1625 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1626 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1627 } 1628 break; 1629 1630 case TGSI_OPCODE_SGE: 1631 /* TGSI_OPCODE_SETGE */ 1632 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1633 src0 = emit_fetch( bld, inst, 0, chan_index ); 1634 src1 = emit_fetch( bld, inst, 1, chan_index ); 1635 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1636 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1637 } 1638 break; 1639 1640 case TGSI_OPCODE_MAD: 1641 /* TGSI_OPCODE_MADD */ 1642 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1643 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1644 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1645 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1646 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1647 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1648 dst0[chan_index] = tmp0; 1649 } 1650 break; 1651 1652 case TGSI_OPCODE_SUB: 1653 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1654 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1655 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1656 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1657 } 1658 break; 1659 1660 case TGSI_OPCODE_LRP: 1661 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1662 src0 = emit_fetch( bld, inst, 0, chan_index ); 1663 src1 = emit_fetch( bld, inst, 1, chan_index ); 1664 src2 = emit_fetch( bld, inst, 2, chan_index ); 1665 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1666 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1667 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1668 } 1669 break; 1670 1671 case TGSI_OPCODE_CND: 1672 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1673 src0 = emit_fetch( bld, inst, 0, chan_index ); 1674 src1 = emit_fetch( bld, inst, 1, chan_index ); 1675 src2 = emit_fetch( bld, inst, 2, chan_index ); 1676 tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5); 1677 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1678 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1679 } 1680 break; 1681 1682 case TGSI_OPCODE_DP2A: 1683 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1684 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1685 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1686 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1687 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1688 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1689 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1690 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1691 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1692 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1693 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1694 } 1695 break; 1696 1697 case TGSI_OPCODE_FRC: 1698 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1699 src0 = emit_fetch( bld, inst, 0, chan_index ); 1700 tmp0 = lp_build_floor(&bld->base, src0); 1701 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1702 dst0[chan_index] = tmp0; 1703 } 1704 break; 1705 1706 case TGSI_OPCODE_CLAMP: 1707 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1708 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1709 src1 = emit_fetch( bld, inst, 1, chan_index ); 1710 src2 = emit_fetch( bld, inst, 2, chan_index ); 1711 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1712 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1713 dst0[chan_index] = tmp0; 1714 } 1715 break; 1716 1717 case TGSI_OPCODE_FLR: 1718 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1719 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1720 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1721 } 1722 break; 1723 1724 case TGSI_OPCODE_ROUND: 1725 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1726 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1727 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1728 } 1729 break; 1730 1731 case TGSI_OPCODE_EX2: { 1732 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1733 tmp0 = lp_build_exp2( &bld->base, tmp0); 1734 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1735 dst0[chan_index] = tmp0; 1736 } 1737 break; 1738 } 1739 1740 case TGSI_OPCODE_LG2: 1741 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1742 tmp0 = lp_build_log2( &bld->base, tmp0); 1743 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1744 dst0[chan_index] = tmp0; 1745 } 1746 break; 1747 1748 case TGSI_OPCODE_POW: 1749 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1750 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1751 res = lp_build_pow( &bld->base, src0, src1 ); 1752 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1753 dst0[chan_index] = res; 1754 } 1755 break; 1756 1757 case TGSI_OPCODE_XPD: 1758 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1759 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1760 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1761 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1762 } 1763 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1764 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1765 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1766 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1767 } 1768 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1769 tmp2 = tmp0; 1770 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1771 tmp5 = tmp3; 1772 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1773 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1774 dst0[CHAN_X] = tmp2; 1775 } 1776 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1777 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1778 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1779 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1780 } 1781 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1782 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1783 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1784 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1785 dst0[CHAN_Y] = tmp3; 1786 } 1787 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1788 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1789 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1790 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1791 dst0[CHAN_Z] = tmp5; 1792 } 1793 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1794 dst0[CHAN_W] = bld->base.one; 1795 } 1796 break; 1797 1798 case TGSI_OPCODE_ABS: 1799 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1800 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1801 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1802 } 1803 break; 1804 1805 case TGSI_OPCODE_RCC: 1806 /* deprecated? */ 1807 assert(0); 1808 return FALSE; 1809 1810 case TGSI_OPCODE_DPH: 1811 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1812 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1813 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1814 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1815 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1816 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1817 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1818 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1819 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1820 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1821 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1822 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1823 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1824 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1825 dst0[chan_index] = tmp0; 1826 } 1827 break; 1828 1829 case TGSI_OPCODE_COS: 1830 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1831 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1832 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1833 dst0[chan_index] = tmp0; 1834 } 1835 break; 1836 1837 case TGSI_OPCODE_DDX: 1838 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1839 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1840 } 1841 break; 1842 1843 case TGSI_OPCODE_DDY: 1844 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1845 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1846 } 1847 break; 1848 1849 case TGSI_OPCODE_KILP: 1850 /* predicated kill */ 1851 emit_kilp( bld, inst, (*pc)-1 ); 1852 break; 1853 1854 case TGSI_OPCODE_KIL: 1855 /* conditional kill */ 1856 emit_kil( bld, inst, (*pc)-1 ); 1857 break; 1858 1859 case TGSI_OPCODE_PK2H: 1860 return FALSE; 1861 break; 1862 1863 case TGSI_OPCODE_PK2US: 1864 return FALSE; 1865 break; 1866 1867 case TGSI_OPCODE_PK4B: 1868 return FALSE; 1869 break; 1870 1871 case TGSI_OPCODE_PK4UB: 1872 return FALSE; 1873 break; 1874 1875 case TGSI_OPCODE_RFL: 1876 return FALSE; 1877 break; 1878 1879 case TGSI_OPCODE_SEQ: 1880 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1881 src0 = emit_fetch( bld, inst, 0, chan_index ); 1882 src1 = emit_fetch( bld, inst, 1, chan_index ); 1883 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1884 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1885 } 1886 break; 1887 1888 case TGSI_OPCODE_SFL: 1889 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1890 dst0[chan_index] = bld->base.zero; 1891 } 1892 break; 1893 1894 case TGSI_OPCODE_SGT: 1895 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1896 src0 = emit_fetch( bld, inst, 0, chan_index ); 1897 src1 = emit_fetch( bld, inst, 1, chan_index ); 1898 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1899 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1900 } 1901 break; 1902 1903 case TGSI_OPCODE_SIN: 1904 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1905 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1906 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1907 dst0[chan_index] = tmp0; 1908 } 1909 break; 1910 1911 case TGSI_OPCODE_SLE: 1912 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1913 src0 = emit_fetch( bld, inst, 0, chan_index ); 1914 src1 = emit_fetch( bld, inst, 1, chan_index ); 1915 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1916 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1917 } 1918 break; 1919 1920 case TGSI_OPCODE_SNE: 1921 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1922 src0 = emit_fetch( bld, inst, 0, chan_index ); 1923 src1 = emit_fetch( bld, inst, 1, chan_index ); 1924 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1925 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1926 } 1927 break; 1928 1929 case TGSI_OPCODE_STR: 1930 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1931 dst0[chan_index] = bld->base.one; 1932 } 1933 break; 1934 1935 case TGSI_OPCODE_TEX: 1936 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); 1937 break; 1938 1939 case TGSI_OPCODE_TXD: 1940 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1941 break; 1942 1943 case TGSI_OPCODE_UP2H: 1944 /* deprecated */ 1945 assert (0); 1946 return FALSE; 1947 break; 1948 1949 case TGSI_OPCODE_UP2US: 1950 /* deprecated */ 1951 assert(0); 1952 return FALSE; 1953 break; 1954 1955 case TGSI_OPCODE_UP4B: 1956 /* deprecated */ 1957 assert(0); 1958 return FALSE; 1959 break; 1960 1961 case TGSI_OPCODE_UP4UB: 1962 /* deprecated */ 1963 assert(0); 1964 return FALSE; 1965 break; 1966 1967 case TGSI_OPCODE_X2D: 1968 /* deprecated? */ 1969 assert(0); 1970 return FALSE; 1971 break; 1972 1973 case TGSI_OPCODE_ARA: 1974 /* deprecated */ 1975 assert(0); 1976 return FALSE; 1977 break; 1978 1979 case TGSI_OPCODE_ARR: 1980 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1981 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1982 tmp0 = lp_build_round(&bld->base, tmp0); 1983 dst0[chan_index] = tmp0; 1984 } 1985 break; 1986 1987 case TGSI_OPCODE_BRA: 1988 /* deprecated */ 1989 assert(0); 1990 return FALSE; 1991 break; 1992 1993 case TGSI_OPCODE_CAL: 1994 lp_exec_mask_call(&bld->exec_mask, 1995 inst->Label.Label, 1996 pc); 1997 1998 break; 1999 2000 case TGSI_OPCODE_RET: 2001 lp_exec_mask_ret(&bld->exec_mask, pc); 2002 break; 2003 2004 case TGSI_OPCODE_END: 2005 if (0) { 2006 /* for debugging */ 2007 emit_dump_temps(bld); 2008 } 2009 *pc = -1; 2010 break; 2011 2012 case TGSI_OPCODE_SSG: 2013 /* TGSI_OPCODE_SGN */ 2014 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2015 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2016 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 2017 } 2018 break; 2019 2020 case TGSI_OPCODE_CMP: 2021 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2022 src0 = emit_fetch( bld, inst, 0, chan_index ); 2023 src1 = emit_fetch( bld, inst, 1, chan_index ); 2024 src2 = emit_fetch( bld, inst, 2, chan_index ); 2025 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 2026 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 2027 } 2028 break; 2029 2030 case TGSI_OPCODE_SCS: 2031 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 2032 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 2033 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 2034 } 2035 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 2036 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 2037 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 2038 } 2039 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 2040 dst0[CHAN_Z] = bld->base.zero; 2041 } 2042 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 2043 dst0[CHAN_W] = bld->base.one; 2044 } 2045 break; 2046 2047 case TGSI_OPCODE_TXB: 2048 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); 2049 break; 2050 2051 case TGSI_OPCODE_NRM: 2052 /* fall-through */ 2053 case TGSI_OPCODE_NRM4: 2054 /* 3 or 4-component normalization */ 2055 { 2056 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 2057 2058 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 2059 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 2060 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 2061 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 2062 2063 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 2064 2065 /* xmm4 = src.x */ 2066 /* xmm0 = src.x * src.x */ 2067 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 2068 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 2069 tmp4 = tmp0; 2070 } 2071 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 2072 2073 /* xmm5 = src.y */ 2074 /* xmm0 = xmm0 + src.y * src.y */ 2075 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 2076 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 2077 tmp5 = tmp1; 2078 } 2079 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2080 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2081 2082 /* xmm6 = src.z */ 2083 /* xmm0 = xmm0 + src.z * src.z */ 2084 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 2085 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 2086 tmp6 = tmp1; 2087 } 2088 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2089 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2090 2091 if (dims == 4) { 2092 /* xmm7 = src.w */ 2093 /* xmm0 = xmm0 + src.w * src.w */ 2094 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 2095 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 2096 tmp7 = tmp1; 2097 } 2098 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2099 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2100 } 2101 2102 /* xmm1 = 1 / sqrt(xmm0) */ 2103 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 2104 2105 /* dst.x = xmm1 * src.x */ 2106 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 2107 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 2108 } 2109 2110 /* dst.y = xmm1 * src.y */ 2111 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 2112 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 2113 } 2114 2115 /* dst.z = xmm1 * src.z */ 2116 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 2117 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 2118 } 2119 2120 /* dst.w = xmm1 * src.w */ 2121 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 2122 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 2123 } 2124 } 2125 2126 /* dst.w = 1.0 */ 2127 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 2128 dst0[CHAN_W] = bld->base.one; 2129 } 2130 } 2131 break; 2132 2133 case TGSI_OPCODE_DIV: 2134 /* deprecated */ 2135 assert( 0 ); 2136 return FALSE; 2137 break; 2138 2139 case TGSI_OPCODE_DP2: 2140 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 2141 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 2142 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 2143 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 2144 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 2145 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 2146 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 2147 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2148 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 2149 } 2150 break; 2151 2152 case TGSI_OPCODE_TXL: 2153 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 2154 break; 2155 2156 case TGSI_OPCODE_TXP: 2157 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); 2158 break; 2159 2160 case TGSI_OPCODE_BRK: 2161 lp_exec_break(&bld->exec_mask); 2162 break; 2163 2164 case TGSI_OPCODE_IF: 2165 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 2166 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 2167 tmp0, bld->base.zero); 2168 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 2169 break; 2170 2171 case TGSI_OPCODE_BGNLOOP: 2172 lp_exec_bgnloop(&bld->exec_mask); 2173 break; 2174 2175 case TGSI_OPCODE_BGNSUB: 2176 lp_exec_mask_bgnsub(&bld->exec_mask); 2177 break; 2178 2179 case TGSI_OPCODE_ELSE: 2180 lp_exec_mask_cond_invert(&bld->exec_mask); 2181 break; 2182 2183 case TGSI_OPCODE_ENDIF: 2184 lp_exec_mask_cond_pop(&bld->exec_mask); 2185 break; 2186 2187 case TGSI_OPCODE_ENDLOOP: 2188 lp_exec_endloop(bld->base.gallivm, &bld->exec_mask); 2189 break; 2190 2191 case TGSI_OPCODE_ENDSUB: 2192 lp_exec_mask_endsub(&bld->exec_mask, pc); 2193 break; 2194 2195 case TGSI_OPCODE_PUSHA: 2196 /* deprecated? */ 2197 assert(0); 2198 return FALSE; 2199 break; 2200 2201 case TGSI_OPCODE_POPA: 2202 /* deprecated? */ 2203 assert(0); 2204 return FALSE; 2205 break; 2206 2207 case TGSI_OPCODE_CEIL: 2208 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2209 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2210 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 2211 } 2212 break; 2213 2214 case TGSI_OPCODE_I2F: 2215 /* deprecated? */ 2216 assert(0); 2217 return FALSE; 2218 break; 2219 2220 case TGSI_OPCODE_NOT: 2221 /* deprecated? */ 2222 assert(0); 2223 return FALSE; 2224 break; 2225 2226 case TGSI_OPCODE_TRUNC: 2227 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2228 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2229 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 2230 } 2231 break; 2232 2233 case TGSI_OPCODE_SHL: 2234 /* deprecated? */ 2235 assert(0); 2236 return FALSE; 2237 break; 2238 2239 case TGSI_OPCODE_ISHR: 2240 /* deprecated? */ 2241 assert(0); 2242 return FALSE; 2243 break; 2244 2245 case TGSI_OPCODE_AND: 2246 /* deprecated? */ 2247 assert(0); 2248 return FALSE; 2249 break; 2250 2251 case TGSI_OPCODE_OR: 2252 /* deprecated? */ 2253 assert(0); 2254 return FALSE; 2255 break; 2256 2257 case TGSI_OPCODE_MOD: 2258 /* deprecated? */ 2259 assert(0); 2260 return FALSE; 2261 break; 2262 2263 case TGSI_OPCODE_XOR: 2264 /* deprecated? */ 2265 assert(0); 2266 return FALSE; 2267 break; 2268 2269 case TGSI_OPCODE_SAD: 2270 /* deprecated? */ 2271 assert(0); 2272 return FALSE; 2273 break; 2274 2275 case TGSI_OPCODE_TXF: 2276 /* deprecated? */ 2277 assert(0); 2278 return FALSE; 2279 break; 2280 2281 case TGSI_OPCODE_TXQ: 2282 /* deprecated? */ 2283 assert(0); 2284 return FALSE; 2285 break; 2286 2287 case TGSI_OPCODE_CONT: 2288 lp_exec_continue(&bld->exec_mask); 2289 break; 2290 2291 case TGSI_OPCODE_EMIT: 2292 return FALSE; 2293 break; 2294 2295 case TGSI_OPCODE_ENDPRIM: 2296 return FALSE; 2297 break; 2298 2299 case TGSI_OPCODE_NOP: 2300 break; 2301 2302 default: 2303 return FALSE; 2304 } 2305 2306 if(info->num_dst) { 2307 LLVMValueRef pred[NUM_CHANNELS]; 2308 2309 emit_fetch_predicate( bld, inst, pred ); 2310 2311 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2312 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); 2313 } 2314 } 2315 2316 return TRUE; 2317} 2318 2319 2320void 2321lp_build_tgsi_soa(struct gallivm_state *gallivm, 2322 const struct tgsi_token *tokens, 2323 struct lp_type type, 2324 struct lp_build_mask_context *mask, 2325 LLVMValueRef consts_ptr, 2326 const LLVMValueRef *pos, 2327 const LLVMValueRef (*inputs)[NUM_CHANNELS], 2328 LLVMValueRef (*outputs)[NUM_CHANNELS], 2329 struct lp_build_sampler_soa *sampler, 2330 const struct tgsi_shader_info *info) 2331{ 2332 struct lp_build_tgsi_soa_context bld; 2333 struct tgsi_parse_context parse; 2334 uint num_immediates = 0; 2335 uint num_instructions = 0; 2336 unsigned i; 2337 int pc = 0; 2338 2339 struct lp_type res_type; 2340 2341 assert(type.length <= LP_MAX_VECTOR_LENGTH); 2342 memset(&res_type, 0, sizeof res_type); 2343 res_type.width = type.width; 2344 res_type.length = type.length; 2345 res_type.sign = 1; 2346 2347 /* Setup build context */ 2348 memset(&bld, 0, sizeof bld); 2349 lp_build_context_init(&bld.base, gallivm, type); 2350 lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type)); 2351 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); 2352 bld.mask = mask; 2353 bld.pos = pos; 2354 bld.inputs = inputs; 2355 bld.outputs = outputs; 2356 bld.consts_ptr = consts_ptr; 2357 bld.sampler = sampler; 2358 bld.info = info; 2359 bld.indirect_files = info->indirect_files; 2360 bld.instructions = (struct tgsi_full_instruction *) 2361 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); 2362 bld.max_instructions = LP_MAX_INSTRUCTIONS; 2363 2364 if (!bld.instructions) { 2365 return; 2366 } 2367 2368 lp_exec_mask_init(&bld.exec_mask, &bld.base); 2369 2370 if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 2371 LLVMValueRef array_size = 2372 lp_build_const_int32(gallivm, 2373 info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); 2374 bld.temps_array = lp_build_array_alloca(gallivm, 2375 bld.base.vec_type, array_size, 2376 "temp_array"); 2377 } 2378 2379 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2380 LLVMValueRef array_size = 2381 lp_build_const_int32(gallivm, 2382 info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); 2383 bld.outputs_array = lp_build_array_alloca(gallivm, 2384 bld.base.vec_type, array_size, 2385 "output_array"); 2386 } 2387 2388 /* If we have indirect addressing in inputs we need to copy them into 2389 * our alloca array to be able to iterate over them */ 2390 if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) { 2391 unsigned index, chan; 2392 LLVMTypeRef vec_type = bld.base.vec_type; 2393 LLVMValueRef array_size = 2394 lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4); 2395 bld.inputs_array = lp_build_array_alloca(gallivm, 2396 vec_type, array_size, 2397 "input_array"); 2398 2399 assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1); 2400 2401 for (index = 0; index < info->num_inputs; ++index) { 2402 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2403 LLVMValueRef lindex = 2404 lp_build_const_int32(gallivm, index * 4 + chan); 2405 LLVMValueRef input_ptr = 2406 LLVMBuildGEP(gallivm->builder, bld.inputs_array, 2407 &lindex, 1, ""); 2408 LLVMValueRef value = bld.inputs[index][chan]; 2409 if (value) 2410 LLVMBuildStore(gallivm->builder, value, input_ptr); 2411 } 2412 } 2413 } 2414 2415 tgsi_parse_init( &parse, tokens ); 2416 2417 while( !tgsi_parse_end_of_tokens( &parse ) ) { 2418 tgsi_parse_token( &parse ); 2419 2420 switch( parse.FullToken.Token.Type ) { 2421 case TGSI_TOKEN_TYPE_DECLARATION: 2422 /* Inputs already interpolated */ 2423 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 2424 break; 2425 2426 case TGSI_TOKEN_TYPE_INSTRUCTION: 2427 { 2428 /* save expanded instruction */ 2429 if (num_instructions == bld.max_instructions) { 2430 struct tgsi_full_instruction *instructions; 2431 instructions = REALLOC(bld.instructions, 2432 bld.max_instructions 2433 * sizeof(struct tgsi_full_instruction), 2434 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 2435 * sizeof(struct tgsi_full_instruction)); 2436 if (!instructions) { 2437 break; 2438 } 2439 bld.instructions = instructions; 2440 bld.max_instructions += LP_MAX_INSTRUCTIONS; 2441 } 2442 2443 memcpy(bld.instructions + num_instructions, 2444 &parse.FullToken.FullInstruction, 2445 sizeof(bld.instructions[0])); 2446 2447 num_instructions++; 2448 } 2449 2450 break; 2451 2452 case TGSI_TOKEN_TYPE_IMMEDIATE: 2453 /* simply copy the immediate values into the next immediates[] slot */ 2454 { 2455 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 2456 assert(size <= 4); 2457 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 2458 for( i = 0; i < size; ++i ) 2459 bld.immediates[num_immediates][i] = 2460 lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float); 2461 for( i = size; i < 4; ++i ) 2462 bld.immediates[num_immediates][i] = bld.base.undef; 2463 num_immediates++; 2464 } 2465 break; 2466 2467 case TGSI_TOKEN_TYPE_PROPERTY: 2468 break; 2469 2470 default: 2471 assert( 0 ); 2472 } 2473 } 2474 2475 while (pc != -1) { 2476 struct tgsi_full_instruction *instr = bld.instructions + pc; 2477 const struct tgsi_opcode_info *opcode_info = 2478 tgsi_get_opcode_info(instr->Instruction.Opcode); 2479 if (!emit_instruction( &bld, instr, opcode_info, &pc )) 2480 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 2481 opcode_info->mnemonic); 2482 } 2483 2484 /* If we have indirect addressing in outputs we need to copy our alloca array 2485 * to the outputs slots specified by the called */ 2486 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2487 unsigned index, chan; 2488 assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1); 2489 for (index = 0; index < info->num_outputs; ++index) { 2490 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2491 bld.outputs[index][chan] = get_output_ptr(&bld, index, chan); 2492 } 2493 } 2494 } 2495 2496 if (0) { 2497 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 2498 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2499 debug_printf("11111111111111111111111111111 \n"); 2500 tgsi_dump(tokens, 0); 2501 lp_debug_dump_value(function); 2502 debug_printf("2222222222222222222222222222 \n"); 2503 } 2504 tgsi_parse_free( &parse ); 2505 2506 if (0) { 2507 LLVMModuleRef module = LLVMGetGlobalParent( 2508 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 2509 LLVMDumpModule(module); 2510 2511 } 2512 2513 FREE( bld.instructions ); 2514} 2515 2516