lp_bld_tgsi_soa.c revision 14746b1d4fc7ae30b557dacc819b81756df2f72f
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_scan.h" 49#include "lp_bld_type.h" 50#include "lp_bld_const.h" 51#include "lp_bld_arit.h" 52#include "lp_bld_bitarit.h" 53#include "lp_bld_gather.h" 54#include "lp_bld_init.h" 55#include "lp_bld_logic.h" 56#include "lp_bld_swizzle.h" 57#include "lp_bld_flow.h" 58#include "lp_bld_quad.h" 59#include "lp_bld_tgsi.h" 60#include "lp_bld_limits.h" 61#include "lp_bld_debug.h" 62#include "lp_bld_printf.h" 63 64 65#define FOR_EACH_CHANNEL( CHAN )\ 66 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 67 68#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 69 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 70 71#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 72 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 73 74#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 75 FOR_EACH_CHANNEL( CHAN )\ 76 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 77 78#define CHAN_X 0 79#define CHAN_Y 1 80#define CHAN_Z 2 81#define CHAN_W 3 82#define NUM_CHANNELS 4 83 84#define LP_MAX_INSTRUCTIONS 256 85 86 87struct lp_exec_mask { 88 struct lp_build_context *bld; 89 90 boolean has_mask; 91 92 LLVMTypeRef int_vec_type; 93 94 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 95 int cond_stack_size; 96 LLVMValueRef cond_mask; 97 98 LLVMBasicBlockRef loop_block; 99 LLVMValueRef cont_mask; 100 LLVMValueRef break_mask; 101 LLVMValueRef break_var; 102 struct { 103 LLVMBasicBlockRef loop_block; 104 LLVMValueRef cont_mask; 105 LLVMValueRef break_mask; 106 LLVMValueRef break_var; 107 } loop_stack[LP_MAX_TGSI_NESTING]; 108 int loop_stack_size; 109 110 LLVMValueRef ret_mask; 111 struct { 112 int pc; 113 LLVMValueRef ret_mask; 114 } call_stack[LP_MAX_TGSI_NESTING]; 115 int call_stack_size; 116 117 LLVMValueRef exec_mask; 118}; 119 120struct lp_build_tgsi_soa_context 121{ 122 struct lp_build_context base; 123 124 /* Builder for vector integer masks and indices */ 125 struct lp_build_context uint_bld; 126 127 /* Builder for scalar elements of shader's data type (float) */ 128 struct lp_build_context elem_bld; 129 130 LLVMValueRef consts_ptr; 131 const LLVMValueRef *pos; 132 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 133 LLVMValueRef (*outputs)[NUM_CHANNELS]; 134 135 const struct lp_build_sampler_soa *sampler; 136 137 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 138 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 139 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 140 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; 141 142 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 143 * set in the indirect_files field. 144 * The temps[] array above is unused then. 145 */ 146 LLVMValueRef temps_array; 147 148 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is 149 * set in the indirect_files field. 150 * The outputs[] array above is unused then. 151 */ 152 LLVMValueRef outputs_array; 153 154 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is 155 * set in the indirect_files field. 156 * The inputs[] array above is unused then. 157 */ 158 LLVMValueRef inputs_array; 159 160 const struct tgsi_shader_info *info; 161 /** bitmask indicating which register files are accessed indirectly */ 162 unsigned indirect_files; 163 164 struct lp_build_mask_context *mask; 165 struct lp_exec_mask exec_mask; 166 167 struct tgsi_full_instruction *instructions; 168 uint max_instructions; 169}; 170 171static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 172{ 173 mask->bld = bld; 174 mask->has_mask = FALSE; 175 mask->cond_stack_size = 0; 176 mask->loop_stack_size = 0; 177 mask->call_stack_size = 0; 178 179 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); 180 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 181 LLVMConstAllOnes(mask->int_vec_type); 182} 183 184static void lp_exec_mask_update(struct lp_exec_mask *mask) 185{ 186 LLVMBuilderRef builder = mask->bld->gallivm->builder; 187 188 if (mask->loop_stack_size) { 189 /*for loops we need to update the entire mask at runtime */ 190 LLVMValueRef tmp; 191 assert(mask->break_mask); 192 tmp = LLVMBuildAnd(builder, 193 mask->cont_mask, 194 mask->break_mask, 195 "maskcb"); 196 mask->exec_mask = LLVMBuildAnd(builder, 197 mask->cond_mask, 198 tmp, 199 "maskfull"); 200 } else 201 mask->exec_mask = mask->cond_mask; 202 203 if (mask->call_stack_size) { 204 mask->exec_mask = LLVMBuildAnd(builder, 205 mask->exec_mask, 206 mask->ret_mask, 207 "callmask"); 208 } 209 210 mask->has_mask = (mask->cond_stack_size > 0 || 211 mask->loop_stack_size > 0 || 212 mask->call_stack_size > 0); 213} 214 215static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 216 LLVMValueRef val) 217{ 218 LLVMBuilderRef builder = mask->bld->gallivm->builder; 219 220 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 221 if (mask->cond_stack_size == 0) { 222 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 223 } 224 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 225 assert(LLVMTypeOf(val) == mask->int_vec_type); 226 mask->cond_mask = LLVMBuildAnd(builder, 227 mask->cond_mask, 228 val, 229 ""); 230 lp_exec_mask_update(mask); 231} 232 233static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 234{ 235 LLVMBuilderRef builder = mask->bld->gallivm->builder; 236 LLVMValueRef prev_mask; 237 LLVMValueRef inv_mask; 238 239 assert(mask->cond_stack_size); 240 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 241 if (mask->cond_stack_size == 1) { 242 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 243 } 244 245 inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); 246 247 mask->cond_mask = LLVMBuildAnd(builder, 248 inv_mask, 249 prev_mask, ""); 250 lp_exec_mask_update(mask); 251} 252 253static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 254{ 255 assert(mask->cond_stack_size); 256 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 257 lp_exec_mask_update(mask); 258} 259 260static void lp_exec_bgnloop(struct lp_exec_mask *mask) 261{ 262 LLVMBuilderRef builder = mask->bld->gallivm->builder; 263 264 if (mask->loop_stack_size == 0) { 265 assert(mask->loop_block == NULL); 266 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 267 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 268 assert(mask->break_var == NULL); 269 } 270 271 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 272 273 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 274 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 275 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 276 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 277 ++mask->loop_stack_size; 278 279 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); 280 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 281 282 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); 283 LLVMBuildBr(builder, mask->loop_block); 284 LLVMPositionBuilderAtEnd(builder, mask->loop_block); 285 286 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, ""); 287 288 lp_exec_mask_update(mask); 289} 290 291static void lp_exec_break(struct lp_exec_mask *mask) 292{ 293 LLVMBuilderRef builder = mask->bld->gallivm->builder; 294 LLVMValueRef exec_mask = LLVMBuildNot(builder, 295 mask->exec_mask, 296 "break"); 297 298 mask->break_mask = LLVMBuildAnd(builder, 299 mask->break_mask, 300 exec_mask, "break_full"); 301 302 lp_exec_mask_update(mask); 303} 304 305static void lp_exec_continue(struct lp_exec_mask *mask) 306{ 307 LLVMBuilderRef builder = mask->bld->gallivm->builder; 308 LLVMValueRef exec_mask = LLVMBuildNot(builder, 309 mask->exec_mask, 310 ""); 311 312 mask->cont_mask = LLVMBuildAnd(builder, 313 mask->cont_mask, 314 exec_mask, ""); 315 316 lp_exec_mask_update(mask); 317} 318 319 320static void lp_exec_endloop(struct gallivm_state *gallivm, 321 struct lp_exec_mask *mask) 322{ 323 LLVMBuilderRef builder = mask->bld->gallivm->builder; 324 LLVMBasicBlockRef endloop; 325 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, 326 mask->bld->type.width * 327 mask->bld->type.length); 328 LLVMValueRef i1cond; 329 330 assert(mask->break_mask); 331 332 /* 333 * Restore the cont_mask, but don't pop 334 */ 335 assert(mask->loop_stack_size); 336 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 337 lp_exec_mask_update(mask); 338 339 /* 340 * Unlike the continue mask, the break_mask must be preserved across loop 341 * iterations 342 */ 343 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 344 345 /* i1cond = (mask == 0) */ 346 i1cond = LLVMBuildICmp( 347 builder, 348 LLVMIntNE, 349 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), 350 LLVMConstNull(reg_type), ""); 351 352 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); 353 354 LLVMBuildCondBr(builder, 355 i1cond, mask->loop_block, endloop); 356 357 LLVMPositionBuilderAtEnd(builder, endloop); 358 359 assert(mask->loop_stack_size); 360 --mask->loop_stack_size; 361 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 362 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 363 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 364 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 365 366 lp_exec_mask_update(mask); 367} 368 369/* stores val into an address pointed to by dst. 370 * mask->exec_mask is used to figure out which bits of val 371 * should be stored into the address 372 * (0 means don't store this bit, 1 means do store). 373 */ 374static void lp_exec_mask_store(struct lp_exec_mask *mask, 375 LLVMValueRef pred, 376 LLVMValueRef val, 377 LLVMValueRef dst) 378{ 379 LLVMBuilderRef builder = mask->bld->gallivm->builder; 380 381 /* Mix the predicate and execution mask */ 382 if (mask->has_mask) { 383 if (pred) { 384 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 385 } else { 386 pred = mask->exec_mask; 387 } 388 } 389 390 if (pred) { 391 LLVMValueRef real_val, dst_val; 392 393 dst_val = LLVMBuildLoad(builder, dst, ""); 394 real_val = lp_build_select(mask->bld, 395 pred, 396 val, dst_val); 397 398 LLVMBuildStore(builder, real_val, dst); 399 } else 400 LLVMBuildStore(builder, val, dst); 401} 402 403static void lp_exec_mask_call(struct lp_exec_mask *mask, 404 int func, 405 int *pc) 406{ 407 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 408 mask->call_stack[mask->call_stack_size].pc = *pc; 409 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 410 mask->call_stack_size++; 411 *pc = func; 412} 413 414static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 415{ 416 LLVMBuilderRef builder = mask->bld->gallivm->builder; 417 LLVMValueRef exec_mask; 418 419 if (mask->call_stack_size == 0) { 420 /* returning from main() */ 421 *pc = -1; 422 return; 423 } 424 exec_mask = LLVMBuildNot(builder, 425 mask->exec_mask, 426 "ret"); 427 428 mask->ret_mask = LLVMBuildAnd(builder, 429 mask->ret_mask, 430 exec_mask, "ret_full"); 431 432 lp_exec_mask_update(mask); 433} 434 435static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 436{ 437} 438 439static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 440{ 441 assert(mask->call_stack_size); 442 mask->call_stack_size--; 443 *pc = mask->call_stack[mask->call_stack_size].pc; 444 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 445 lp_exec_mask_update(mask); 446} 447 448 449/** 450 * Return pointer to a temporary register channel (src or dest). 451 * Note that indirect addressing cannot be handled here. 452 * \param index which temporary register 453 * \param chan which channel of the temp register. 454 */ 455static LLVMValueRef 456get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 457 unsigned index, 458 unsigned chan) 459{ 460 LLVMBuilderRef builder = bld->base.gallivm->builder; 461 assert(chan < 4); 462 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 463 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan); 464 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, ""); 465 } 466 else { 467 return bld->temps[index][chan]; 468 } 469} 470 471/** 472 * Return pointer to a output register channel (src or dest). 473 * Note that indirect addressing cannot be handled here. 474 * \param index which output register 475 * \param chan which channel of the output register. 476 */ 477static LLVMValueRef 478get_output_ptr(struct lp_build_tgsi_soa_context *bld, 479 unsigned index, 480 unsigned chan) 481{ 482 LLVMBuilderRef builder = bld->base.gallivm->builder; 483 assert(chan < 4); 484 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 485 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, 486 index * 4 + chan); 487 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, ""); 488 } 489 else { 490 return bld->outputs[index][chan]; 491 } 492} 493 494/** 495 * Gather vector. 496 * XXX the lp_build_gather() function should be capable of doing this 497 * with a little work. 498 */ 499static LLVMValueRef 500build_gather(struct lp_build_tgsi_soa_context *bld, 501 LLVMValueRef base_ptr, 502 LLVMValueRef indexes) 503{ 504 LLVMBuilderRef builder = bld->base.gallivm->builder; 505 LLVMValueRef res = bld->base.undef; 506 unsigned i; 507 508 /* 509 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 510 */ 511 for (i = 0; i < bld->base.type.length; i++) { 512 LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i); 513 LLVMValueRef index = LLVMBuildExtractElement(builder, 514 indexes, ii, ""); 515 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, 516 &index, 1, "gather_ptr"); 517 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 518 519 res = LLVMBuildInsertElement(builder, res, scalar, ii, ""); 520 } 521 522 return res; 523} 524 525 526/** 527 * Scatter/store vector. 528 */ 529static void 530emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 531 LLVMValueRef base_ptr, 532 LLVMValueRef indexes, 533 LLVMValueRef values, 534 struct lp_exec_mask *mask, 535 LLVMValueRef pred) 536{ 537 struct gallivm_state *gallivm = bld->base.gallivm; 538 LLVMBuilderRef builder = gallivm->builder; 539 unsigned i; 540 541 /* Mix the predicate and execution mask */ 542 if (mask->has_mask) { 543 if (pred) { 544 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 545 } 546 else { 547 pred = mask->exec_mask; 548 } 549 } 550 551 /* 552 * Loop over elements of index_vec, store scalar value. 553 */ 554 for (i = 0; i < bld->base.type.length; i++) { 555 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 556 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 557 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 558 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 559 LLVMValueRef scalar_pred = pred ? 560 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 561 562 if (0) 563 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", 564 ii, val, index, scalar_ptr); 565 566 if (scalar_pred) { 567 LLVMValueRef real_val, dst_val; 568 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 569 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 570 LLVMBuildStore(builder, real_val, scalar_ptr); 571 } 572 else { 573 LLVMBuildStore(builder, val, scalar_ptr); 574 } 575 } 576} 577 578 579/** 580 * Read the current value of the ADDR register, convert the floats to 581 * ints, add the base index and return the vector of offsets. 582 * The offsets will be used to index into the constant buffer or 583 * temporary register file. 584 */ 585static LLVMValueRef 586get_indirect_index(struct lp_build_tgsi_soa_context *bld, 587 unsigned reg_file, unsigned reg_index, 588 const struct tgsi_src_register *indirect_reg) 589{ 590 LLVMBuilderRef builder = bld->base.gallivm->builder; 591 struct lp_build_context *uint_bld = &bld->uint_bld; 592 /* always use X component of address register */ 593 unsigned swizzle = indirect_reg->SwizzleX; 594 LLVMValueRef base; 595 LLVMValueRef rel; 596 LLVMValueRef max_index; 597 LLVMValueRef index; 598 599 assert(bld->indirect_files & (1 << reg_file)); 600 601 base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index); 602 603 assert(swizzle < 4); 604 rel = LLVMBuildLoad(builder, 605 bld->addr[indirect_reg->Index][swizzle], 606 "load addr reg"); 607 608 /* for indexing we want integers */ 609 rel = LLVMBuildFPToSI(builder, 610 rel, 611 uint_bld->vec_type, ""); 612 613 index = lp_build_add(uint_bld, base, rel); 614 615 max_index = lp_build_const_int_vec(bld->base.gallivm, 616 uint_bld->type, 617 bld->info->file_max[reg_file]); 618 619 assert(!uint_bld->type.sign); 620 index = lp_build_min(uint_bld, index, max_index); 621 622 return index; 623} 624 625 626/** 627 * Register fetch. 628 */ 629static LLVMValueRef 630emit_fetch( 631 struct lp_build_tgsi_soa_context *bld, 632 const struct tgsi_full_instruction *inst, 633 unsigned src_op, 634 const unsigned chan_index ) 635{ 636 struct gallivm_state *gallivm = bld->base.gallivm; 637 LLVMBuilderRef builder = gallivm->builder; 638 struct lp_build_context *uint_bld = &bld->uint_bld; 639 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 640 const unsigned swizzle = 641 tgsi_util_get_full_src_register_swizzle(reg, chan_index); 642 LLVMValueRef res; 643 LLVMValueRef indirect_index = NULL; 644 645 if (swizzle > 3) { 646 assert(0 && "invalid swizzle in emit_fetch()"); 647 return bld->base.undef; 648 } 649 650 if (reg->Register.Indirect) { 651 indirect_index = get_indirect_index(bld, 652 reg->Register.File, 653 reg->Register.Index, 654 ®->Indirect); 655 } else { 656 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 657 } 658 659 switch (reg->Register.File) { 660 case TGSI_FILE_CONSTANT: 661 if (reg->Register.Indirect) { 662 LLVMValueRef swizzle_vec = 663 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); 664 LLVMValueRef index_vec; /* index into the const buffer */ 665 666 /* index_vec = indirect_index * 4 + swizzle */ 667 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 668 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 669 670 /* Gather values from the constant buffer */ 671 res = build_gather(bld, bld->consts_ptr, index_vec); 672 } 673 else { 674 LLVMValueRef index; /* index into the const buffer */ 675 LLVMValueRef scalar, scalar_ptr; 676 677 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle); 678 679 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, 680 &index, 1, ""); 681 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 682 683 res = lp_build_broadcast_scalar(&bld->base, scalar); 684 } 685 break; 686 687 case TGSI_FILE_IMMEDIATE: 688 res = bld->immediates[reg->Register.Index][swizzle]; 689 assert(res); 690 break; 691 692 case TGSI_FILE_INPUT: 693 if (reg->Register.Indirect) { 694 LLVMValueRef swizzle_vec = 695 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); 696 LLVMValueRef length_vec = 697 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); 698 LLVMValueRef index_vec; /* index into the const buffer */ 699 LLVMValueRef inputs_array; 700 LLVMTypeRef float4_ptr_type; 701 702 /* index_vec = (indirect_index * 4 + swizzle) * length */ 703 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 704 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 705 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 706 707 /* cast inputs_array pointer to float* */ 708 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 709 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, 710 float4_ptr_type, ""); 711 712 /* Gather values from the temporary register array */ 713 res = build_gather(bld, inputs_array, index_vec); 714 } else { 715 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 716 LLVMValueRef lindex = lp_build_const_int32(gallivm, 717 reg->Register.Index * 4 + swizzle); 718 LLVMValueRef input_ptr = LLVMBuildGEP(builder, 719 bld->inputs_array, &lindex, 1, ""); 720 res = LLVMBuildLoad(builder, input_ptr, ""); 721 } 722 else { 723 res = bld->inputs[reg->Register.Index][swizzle]; 724 } 725 } 726 assert(res); 727 break; 728 729 case TGSI_FILE_TEMPORARY: 730 if (reg->Register.Indirect) { 731 LLVMValueRef swizzle_vec = 732 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); 733 LLVMValueRef length_vec = 734 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, 735 bld->base.type.length); 736 LLVMValueRef index_vec; /* index into the const buffer */ 737 LLVMValueRef temps_array; 738 LLVMTypeRef float4_ptr_type; 739 740 /* index_vec = (indirect_index * 4 + swizzle) * length */ 741 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 742 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 743 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 744 745 /* cast temps_array pointer to float* */ 746 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0); 747 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 748 float4_ptr_type, ""); 749 750 /* Gather values from the temporary register array */ 751 res = build_gather(bld, temps_array, index_vec); 752 } 753 else { 754 LLVMValueRef temp_ptr; 755 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); 756 res = LLVMBuildLoad(builder, temp_ptr, ""); 757 if (!res) 758 return bld->base.undef; 759 } 760 break; 761 762 default: 763 assert(0 && "invalid src register in emit_fetch()"); 764 return bld->base.undef; 765 } 766 767 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 768 case TGSI_UTIL_SIGN_CLEAR: 769 res = lp_build_abs( &bld->base, res ); 770 break; 771 772 case TGSI_UTIL_SIGN_SET: 773 res = lp_build_abs( &bld->base, res ); 774 /* fall through */ 775 case TGSI_UTIL_SIGN_TOGGLE: 776 res = lp_build_negate( &bld->base, res ); 777 break; 778 779 case TGSI_UTIL_SIGN_KEEP: 780 break; 781 } 782 783 return res; 784} 785 786 787/** 788 * Register fetch with derivatives. 789 */ 790static void 791emit_fetch_deriv( 792 struct lp_build_tgsi_soa_context *bld, 793 const struct tgsi_full_instruction *inst, 794 unsigned index, 795 const unsigned chan_index, 796 LLVMValueRef *res, 797 LLVMValueRef *ddx, 798 LLVMValueRef *ddy) 799{ 800 LLVMValueRef src; 801 802 src = emit_fetch(bld, inst, index, chan_index); 803 804 if(res) 805 *res = src; 806 807 /* TODO: use interpolation coeffs for inputs */ 808 809 if(ddx) 810 *ddx = lp_build_ddx(&bld->base, src); 811 812 if(ddy) 813 *ddy = lp_build_ddy(&bld->base, src); 814} 815 816 817/** 818 * Predicate. 819 */ 820static void 821emit_fetch_predicate( 822 struct lp_build_tgsi_soa_context *bld, 823 const struct tgsi_full_instruction *inst, 824 LLVMValueRef *pred) 825{ 826 LLVMBuilderRef builder = bld->base.gallivm->builder; 827 unsigned index; 828 unsigned char swizzles[4]; 829 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 830 LLVMValueRef value; 831 unsigned chan; 832 833 if (!inst->Instruction.Predicate) { 834 FOR_EACH_CHANNEL( chan ) { 835 pred[chan] = NULL; 836 } 837 return; 838 } 839 840 swizzles[0] = inst->Predicate.SwizzleX; 841 swizzles[1] = inst->Predicate.SwizzleY; 842 swizzles[2] = inst->Predicate.SwizzleZ; 843 swizzles[3] = inst->Predicate.SwizzleW; 844 845 index = inst->Predicate.Index; 846 assert(index < LP_MAX_TGSI_PREDS); 847 848 FOR_EACH_CHANNEL( chan ) { 849 unsigned swizzle = swizzles[chan]; 850 851 /* 852 * Only fetch the predicate register channels that are actually listed 853 * in the swizzles 854 */ 855 if (!unswizzled[swizzle]) { 856 value = LLVMBuildLoad(builder, 857 bld->preds[index][swizzle], ""); 858 859 /* 860 * Convert the value to an integer mask. 861 * 862 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 863 * is needlessly causing two comparisons due to storing the intermediate 864 * result as float vector instead of an integer mask vector. 865 */ 866 value = lp_build_compare(bld->base.gallivm, 867 bld->base.type, 868 PIPE_FUNC_NOTEQUAL, 869 value, 870 bld->base.zero); 871 if (inst->Predicate.Negate) { 872 value = LLVMBuildNot(builder, value, ""); 873 } 874 875 unswizzled[swizzle] = value; 876 } else { 877 value = unswizzled[swizzle]; 878 } 879 880 pred[chan] = value; 881 } 882} 883 884 885/** 886 * Register store. 887 */ 888static void 889emit_store( 890 struct lp_build_tgsi_soa_context *bld, 891 const struct tgsi_full_instruction *inst, 892 unsigned index, 893 unsigned chan_index, 894 LLVMValueRef pred, 895 LLVMValueRef value) 896{ 897 struct gallivm_state *gallivm = bld->base.gallivm; 898 LLVMBuilderRef builder = gallivm->builder; 899 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 900 struct lp_build_context *uint_bld = &bld->uint_bld; 901 LLVMValueRef indirect_index = NULL; 902 903 switch( inst->Instruction.Saturate ) { 904 case TGSI_SAT_NONE: 905 break; 906 907 case TGSI_SAT_ZERO_ONE: 908 value = lp_build_max(&bld->base, value, bld->base.zero); 909 value = lp_build_min(&bld->base, value, bld->base.one); 910 break; 911 912 case TGSI_SAT_MINUS_PLUS_ONE: 913 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0)); 914 value = lp_build_min(&bld->base, value, bld->base.one); 915 break; 916 917 default: 918 assert(0); 919 } 920 921 if (reg->Register.Indirect) { 922 indirect_index = get_indirect_index(bld, 923 reg->Register.File, 924 reg->Register.Index, 925 ®->Indirect); 926 } else { 927 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 928 } 929 930 switch( reg->Register.File ) { 931 case TGSI_FILE_OUTPUT: 932 if (reg->Register.Indirect) { 933 LLVMValueRef chan_vec = 934 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 935 LLVMValueRef length_vec = 936 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); 937 LLVMValueRef index_vec; /* indexes into the temp registers */ 938 LLVMValueRef outputs_array; 939 LLVMValueRef pixel_offsets; 940 LLVMTypeRef float_ptr_type; 941 int i; 942 943 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 944 pixel_offsets = uint_bld->undef; 945 for (i = 0; i < bld->base.type.length; i++) { 946 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 947 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 948 ii, ii, ""); 949 } 950 951 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 952 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 953 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 954 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 955 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 956 957 float_ptr_type = 958 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 959 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, 960 float_ptr_type, ""); 961 962 /* Scatter store values into temp registers */ 963 emit_mask_scatter(bld, outputs_array, index_vec, value, 964 &bld->exec_mask, pred); 965 } 966 else { 967 LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index, 968 chan_index); 969 lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr); 970 } 971 break; 972 973 case TGSI_FILE_TEMPORARY: 974 if (reg->Register.Indirect) { 975 LLVMValueRef chan_vec = 976 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 977 LLVMValueRef length_vec = 978 lp_build_const_int_vec(gallivm, uint_bld->type, 979 bld->base.type.length); 980 LLVMValueRef index_vec; /* indexes into the temp registers */ 981 LLVMValueRef temps_array; 982 LLVMValueRef pixel_offsets; 983 LLVMTypeRef float_ptr_type; 984 int i; 985 986 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 987 pixel_offsets = uint_bld->undef; 988 for (i = 0; i < bld->base.type.length; i++) { 989 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 990 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 991 ii, ii, ""); 992 } 993 994 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 995 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 996 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 997 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 998 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 999 1000 float_ptr_type = 1001 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1002 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 1003 float_ptr_type, ""); 1004 1005 /* Scatter store values into temp registers */ 1006 emit_mask_scatter(bld, temps_array, index_vec, value, 1007 &bld->exec_mask, pred); 1008 } 1009 else { 1010 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 1011 chan_index); 1012 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); 1013 } 1014 break; 1015 1016 case TGSI_FILE_ADDRESS: 1017 lp_exec_mask_store(&bld->exec_mask, pred, value, 1018 bld->addr[reg->Register.Index][chan_index]); 1019 break; 1020 1021 case TGSI_FILE_PREDICATE: 1022 lp_exec_mask_store(&bld->exec_mask, pred, value, 1023 bld->preds[reg->Register.Index][chan_index]); 1024 break; 1025 1026 default: 1027 assert( 0 ); 1028 } 1029} 1030 1031 1032/** 1033 * High-level instruction translators. 1034 */ 1035 1036static void 1037emit_tex( struct lp_build_tgsi_soa_context *bld, 1038 const struct tgsi_full_instruction *inst, 1039 enum lp_build_tex_modifier modifier, 1040 LLVMValueRef *texel) 1041{ 1042 LLVMBuilderRef builder = bld->base.gallivm->builder; 1043 unsigned unit; 1044 LLVMValueRef lod_bias, explicit_lod; 1045 LLVMValueRef oow = NULL; 1046 LLVMValueRef coords[3]; 1047 LLVMValueRef ddx[3]; 1048 LLVMValueRef ddy[3]; 1049 unsigned num_coords; 1050 unsigned i; 1051 1052 if (!bld->sampler) { 1053 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 1054 for (i = 0; i < 4; i++) { 1055 texel[i] = bld->base.undef; 1056 } 1057 return; 1058 } 1059 1060 switch (inst->Texture.Texture) { 1061 case TGSI_TEXTURE_1D: 1062 num_coords = 1; 1063 break; 1064 case TGSI_TEXTURE_2D: 1065 case TGSI_TEXTURE_RECT: 1066 num_coords = 2; 1067 break; 1068 case TGSI_TEXTURE_SHADOW1D: 1069 case TGSI_TEXTURE_SHADOW2D: 1070 case TGSI_TEXTURE_SHADOWRECT: 1071 case TGSI_TEXTURE_3D: 1072 case TGSI_TEXTURE_CUBE: 1073 num_coords = 3; 1074 break; 1075 default: 1076 assert(0); 1077 return; 1078 } 1079 1080 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 1081 lod_bias = emit_fetch( bld, inst, 0, 3 ); 1082 explicit_lod = NULL; 1083 } 1084 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 1085 lod_bias = NULL; 1086 explicit_lod = emit_fetch( bld, inst, 0, 3 ); 1087 } 1088 else { 1089 lod_bias = NULL; 1090 explicit_lod = NULL; 1091 } 1092 1093 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 1094 oow = emit_fetch( bld, inst, 0, 3 ); 1095 oow = lp_build_rcp(&bld->base, oow); 1096 } 1097 1098 for (i = 0; i < num_coords; i++) { 1099 coords[i] = emit_fetch( bld, inst, 0, i ); 1100 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 1101 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 1102 } 1103 for (i = num_coords; i < 3; i++) { 1104 coords[i] = bld->base.undef; 1105 } 1106 1107 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 1108 LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0); 1109 for (i = 0; i < num_coords; i++) { 1110 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); 1111 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); 1112 ddx[i] = LLVMBuildExtractElement(builder, src1, index0, ""); 1113 ddy[i] = LLVMBuildExtractElement(builder, src2, index0, ""); 1114 } 1115 unit = inst->Src[3].Register.Index; 1116 } else { 1117 for (i = 0; i < num_coords; i++) { 1118 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); 1119 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); 1120 } 1121 unit = inst->Src[1].Register.Index; 1122 } 1123 for (i = num_coords; i < 3; i++) { 1124 ddx[i] = LLVMGetUndef(bld->base.elem_type); 1125 ddy[i] = LLVMGetUndef(bld->base.elem_type); 1126 } 1127 1128 bld->sampler->emit_fetch_texel(bld->sampler, 1129 bld->base.gallivm, 1130 bld->base.type, 1131 unit, num_coords, coords, 1132 ddx, ddy, 1133 lod_bias, explicit_lod, 1134 texel); 1135} 1136 1137static boolean 1138near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 1139 int pc) 1140{ 1141 int i; 1142 1143 for (i = 0; i < 5; i++) { 1144 unsigned opcode; 1145 1146 if (pc + i >= bld->info->num_instructions) 1147 return TRUE; 1148 1149 opcode = bld->instructions[pc + i].Instruction.Opcode; 1150 1151 if (opcode == TGSI_OPCODE_END) 1152 return TRUE; 1153 1154 if (opcode == TGSI_OPCODE_TEX || 1155 opcode == TGSI_OPCODE_TXP || 1156 opcode == TGSI_OPCODE_TXD || 1157 opcode == TGSI_OPCODE_TXB || 1158 opcode == TGSI_OPCODE_TXL || 1159 opcode == TGSI_OPCODE_TXF || 1160 opcode == TGSI_OPCODE_TXQ || 1161 opcode == TGSI_OPCODE_CAL || 1162 opcode == TGSI_OPCODE_CALLNZ || 1163 opcode == TGSI_OPCODE_IF || 1164 opcode == TGSI_OPCODE_IFC || 1165 opcode == TGSI_OPCODE_BGNLOOP || 1166 opcode == TGSI_OPCODE_SWITCH) 1167 return FALSE; 1168 } 1169 1170 return TRUE; 1171} 1172 1173 1174 1175/** 1176 * Kill fragment if any of the src register values are negative. 1177 */ 1178static void 1179emit_kil( 1180 struct lp_build_tgsi_soa_context *bld, 1181 const struct tgsi_full_instruction *inst, 1182 int pc) 1183{ 1184 LLVMBuilderRef builder = bld->base.gallivm->builder; 1185 const struct tgsi_full_src_register *reg = &inst->Src[0]; 1186 LLVMValueRef terms[NUM_CHANNELS]; 1187 LLVMValueRef mask; 1188 unsigned chan_index; 1189 1190 memset(&terms, 0, sizeof terms); 1191 1192 FOR_EACH_CHANNEL( chan_index ) { 1193 unsigned swizzle; 1194 1195 /* Unswizzle channel */ 1196 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1197 1198 /* Check if the component has not been already tested. */ 1199 assert(swizzle < NUM_CHANNELS); 1200 if( !terms[swizzle] ) 1201 /* TODO: change the comparison operator instead of setting the sign */ 1202 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 1203 } 1204 1205 mask = NULL; 1206 FOR_EACH_CHANNEL( chan_index ) { 1207 if(terms[chan_index]) { 1208 LLVMValueRef chan_mask; 1209 1210 /* 1211 * If term < 0 then mask = 0 else mask = ~0. 1212 */ 1213 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 1214 1215 if(mask) 1216 mask = LLVMBuildAnd(builder, mask, chan_mask, ""); 1217 else 1218 mask = chan_mask; 1219 } 1220 } 1221 1222 if(mask) { 1223 lp_build_mask_update(bld->mask, mask); 1224 1225 if (!near_end_of_shader(bld, pc)) 1226 lp_build_mask_check(bld->mask); 1227 } 1228} 1229 1230 1231/** 1232 * Predicated fragment kill. 1233 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 1234 * The only predication is the execution mask which will apply if 1235 * we're inside a loop or conditional. 1236 */ 1237static void 1238emit_kilp(struct lp_build_tgsi_soa_context *bld, 1239 const struct tgsi_full_instruction *inst, 1240 int pc) 1241{ 1242 LLVMBuilderRef builder = bld->base.gallivm->builder; 1243 LLVMValueRef mask; 1244 1245 /* For those channels which are "alive", disable fragment shader 1246 * execution. 1247 */ 1248 if (bld->exec_mask.has_mask) { 1249 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 1250 } 1251 else { 1252 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type); 1253 mask = zero; 1254 } 1255 1256 lp_build_mask_update(bld->mask, mask); 1257 1258 if (!near_end_of_shader(bld, pc)) 1259 lp_build_mask_check(bld->mask); 1260} 1261 1262 1263/** 1264 * Emit code which will dump the value of all the temporary registers 1265 * to stdout. 1266 */ 1267static void 1268emit_dump_temps(struct lp_build_tgsi_soa_context *bld) 1269{ 1270 struct gallivm_state *gallivm = bld->base.gallivm; 1271 LLVMBuilderRef builder = gallivm->builder; 1272 LLVMValueRef temp_ptr; 1273 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); 1274 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1); 1275 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); 1276 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); 1277 int index; 1278 int n = bld->info->file_max[TGSI_FILE_TEMPORARY]; 1279 1280 for (index = 0; index < n; index++) { 1281 LLVMValueRef idx = lp_build_const_int32(gallivm, index); 1282 LLVMValueRef v[4][4], res; 1283 int chan; 1284 1285 lp_build_printf(gallivm, "TEMP[%d]:\n", idx); 1286 1287 for (chan = 0; chan < 4; chan++) { 1288 temp_ptr = get_temp_ptr(bld, index, chan); 1289 res = LLVMBuildLoad(builder, temp_ptr, ""); 1290 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); 1291 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); 1292 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); 1293 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); 1294 } 1295 1296 lp_build_printf(gallivm, " X: %f %f %f %f\n", 1297 v[0][0], v[0][1], v[0][2], v[0][3]); 1298 lp_build_printf(gallivm, " Y: %f %f %f %f\n", 1299 v[1][0], v[1][1], v[1][2], v[1][3]); 1300 lp_build_printf(gallivm, " Z: %f %f %f %f\n", 1301 v[2][0], v[2][1], v[2][2], v[2][3]); 1302 lp_build_printf(gallivm, " W: %f %f %f %f\n", 1303 v[3][0], v[3][1], v[3][2], v[3][3]); 1304 } 1305} 1306 1307 1308 1309static void 1310emit_declaration( 1311 struct lp_build_tgsi_soa_context *bld, 1312 const struct tgsi_full_declaration *decl) 1313{ 1314 struct gallivm_state *gallivm = bld->base.gallivm; 1315 LLVMTypeRef vec_type = bld->base.vec_type; 1316 const unsigned first = decl->Range.First; 1317 const unsigned last = decl->Range.Last; 1318 unsigned idx, i; 1319 1320 for (idx = first; idx <= last; ++idx) { 1321 assert(last <= bld->info->file_max[decl->Declaration.File]); 1322 switch (decl->Declaration.File) { 1323 case TGSI_FILE_TEMPORARY: 1324 assert(idx < LP_MAX_TGSI_TEMPS); 1325 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 1326 for (i = 0; i < NUM_CHANNELS; i++) 1327 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); 1328 } 1329 break; 1330 1331 case TGSI_FILE_OUTPUT: 1332 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 1333 for (i = 0; i < NUM_CHANNELS; i++) 1334 bld->outputs[idx][i] = lp_build_alloca(gallivm, 1335 vec_type, "output"); 1336 } 1337 break; 1338 1339 case TGSI_FILE_ADDRESS: 1340 assert(idx < LP_MAX_TGSI_ADDRS); 1341 for (i = 0; i < NUM_CHANNELS; i++) 1342 bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr"); 1343 break; 1344 1345 case TGSI_FILE_PREDICATE: 1346 assert(idx < LP_MAX_TGSI_PREDS); 1347 for (i = 0; i < NUM_CHANNELS; i++) 1348 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, 1349 "predicate"); 1350 break; 1351 1352 default: 1353 /* don't need to declare other vars */ 1354 break; 1355 } 1356 } 1357} 1358 1359 1360/** 1361 * Emit LLVM for one TGSI instruction. 1362 * \param return TRUE for success, FALSE otherwise 1363 */ 1364static boolean 1365emit_instruction( 1366 struct lp_build_tgsi_soa_context *bld, 1367 const struct tgsi_full_instruction *inst, 1368 const struct tgsi_opcode_info *info, 1369 int *pc) 1370{ 1371 unsigned chan_index; 1372 LLVMValueRef src0, src1, src2; 1373 LLVMValueRef tmp0, tmp1, tmp2; 1374 LLVMValueRef tmp3 = NULL; 1375 LLVMValueRef tmp4 = NULL; 1376 LLVMValueRef tmp5 = NULL; 1377 LLVMValueRef tmp6 = NULL; 1378 LLVMValueRef tmp7 = NULL; 1379 LLVMValueRef res; 1380 LLVMValueRef dst0[NUM_CHANNELS]; 1381 1382 /* 1383 * Stores and write masks are handled in a general fashion after the long 1384 * instruction opcode switch statement. 1385 * 1386 * Although not stricitly necessary, we avoid generating instructions for 1387 * channels which won't be stored, in cases where's that easy. For some 1388 * complex instructions, like texture sampling, it is more convenient to 1389 * assume a full writemask and then let LLVM optimization passes eliminate 1390 * redundant code. 1391 */ 1392 1393 (*pc)++; 1394 1395 assert(info->num_dst <= 1); 1396 if (info->num_dst) { 1397 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1398 dst0[chan_index] = bld->base.undef; 1399 } 1400 } 1401 1402 switch (inst->Instruction.Opcode) { 1403 case TGSI_OPCODE_ARL: 1404 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1405 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1406 tmp0 = lp_build_floor(&bld->base, tmp0); 1407 dst0[chan_index] = tmp0; 1408 } 1409 break; 1410 1411 case TGSI_OPCODE_MOV: 1412 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1413 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 1414 } 1415 break; 1416 1417 case TGSI_OPCODE_LIT: 1418 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 1419 dst0[CHAN_X] = bld->base.one; 1420 } 1421 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1422 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1423 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 1424 } 1425 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1426 /* XMM[1] = SrcReg[0].yyyy */ 1427 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1428 /* XMM[1] = max(XMM[1], 0) */ 1429 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 1430 /* XMM[2] = SrcReg[0].wwww */ 1431 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 1432 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 1433 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1434 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 1435 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 1436 } 1437 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 1438 dst0[CHAN_W] = bld->base.one; 1439 } 1440 break; 1441 1442 case TGSI_OPCODE_RCP: 1443 /* TGSI_OPCODE_RECIP */ 1444 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1445 res = lp_build_rcp(&bld->base, src0); 1446 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1447 dst0[chan_index] = res; 1448 } 1449 break; 1450 1451 case TGSI_OPCODE_RSQ: 1452 /* TGSI_OPCODE_RECIPSQRT */ 1453 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1454 src0 = lp_build_abs(&bld->base, src0); 1455 res = lp_build_rsqrt(&bld->base, src0); 1456 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1457 dst0[chan_index] = res; 1458 } 1459 break; 1460 1461 case TGSI_OPCODE_EXP: 1462 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1463 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1464 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1465 LLVMValueRef *p_exp2_int_part = NULL; 1466 LLVMValueRef *p_frac_part = NULL; 1467 LLVMValueRef *p_exp2 = NULL; 1468 1469 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1470 1471 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1472 p_exp2_int_part = &tmp0; 1473 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1474 p_frac_part = &tmp1; 1475 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1476 p_exp2 = &tmp2; 1477 1478 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 1479 1480 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1481 dst0[CHAN_X] = tmp0; 1482 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1483 dst0[CHAN_Y] = tmp1; 1484 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1485 dst0[CHAN_Z] = tmp2; 1486 } 1487 /* dst.w = 1.0 */ 1488 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1489 dst0[CHAN_W] = bld->base.one; 1490 } 1491 break; 1492 1493 case TGSI_OPCODE_LOG: 1494 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1495 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1496 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1497 LLVMValueRef *p_floor_log2 = NULL; 1498 LLVMValueRef *p_exp = NULL; 1499 LLVMValueRef *p_log2 = NULL; 1500 1501 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1502 src0 = lp_build_abs( &bld->base, src0 ); 1503 1504 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1505 p_floor_log2 = &tmp0; 1506 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1507 p_exp = &tmp1; 1508 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1509 p_log2 = &tmp2; 1510 1511 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 1512 1513 /* dst.x = floor(lg2(abs(src.x))) */ 1514 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1515 dst0[CHAN_X] = tmp0; 1516 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 1517 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 1518 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 1519 } 1520 /* dst.z = lg2(abs(src.x)) */ 1521 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1522 dst0[CHAN_Z] = tmp2; 1523 } 1524 /* dst.w = 1.0 */ 1525 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1526 dst0[CHAN_W] = bld->base.one; 1527 } 1528 break; 1529 1530 case TGSI_OPCODE_MUL: 1531 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1532 src0 = emit_fetch( bld, inst, 0, chan_index ); 1533 src1 = emit_fetch( bld, inst, 1, chan_index ); 1534 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 1535 } 1536 break; 1537 1538 case TGSI_OPCODE_ADD: 1539 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1540 src0 = emit_fetch( bld, inst, 0, chan_index ); 1541 src1 = emit_fetch( bld, inst, 1, chan_index ); 1542 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 1543 } 1544 break; 1545 1546 case TGSI_OPCODE_DP3: 1547 /* TGSI_OPCODE_DOT3 */ 1548 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1549 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1550 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1551 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1552 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1553 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1554 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1555 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1556 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1557 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1558 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1559 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1560 dst0[chan_index] = tmp0; 1561 } 1562 break; 1563 1564 case TGSI_OPCODE_DP4: 1565 /* TGSI_OPCODE_DOT4 */ 1566 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1567 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1568 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1569 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1570 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1571 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1572 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1573 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1574 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1575 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1576 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1577 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1578 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1579 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1580 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1581 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1582 dst0[chan_index] = tmp0; 1583 } 1584 break; 1585 1586 case TGSI_OPCODE_DST: 1587 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1588 dst0[CHAN_X] = bld->base.one; 1589 } 1590 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1591 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1592 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1593 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1594 } 1595 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1596 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1597 } 1598 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1599 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1600 } 1601 break; 1602 1603 case TGSI_OPCODE_MIN: 1604 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1605 src0 = emit_fetch( bld, inst, 0, chan_index ); 1606 src1 = emit_fetch( bld, inst, 1, chan_index ); 1607 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1608 } 1609 break; 1610 1611 case TGSI_OPCODE_MAX: 1612 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1613 src0 = emit_fetch( bld, inst, 0, chan_index ); 1614 src1 = emit_fetch( bld, inst, 1, chan_index ); 1615 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1616 } 1617 break; 1618 1619 case TGSI_OPCODE_SLT: 1620 /* TGSI_OPCODE_SETLT */ 1621 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1622 src0 = emit_fetch( bld, inst, 0, chan_index ); 1623 src1 = emit_fetch( bld, inst, 1, chan_index ); 1624 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1625 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1626 } 1627 break; 1628 1629 case TGSI_OPCODE_SGE: 1630 /* TGSI_OPCODE_SETGE */ 1631 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1632 src0 = emit_fetch( bld, inst, 0, chan_index ); 1633 src1 = emit_fetch( bld, inst, 1, chan_index ); 1634 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1635 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1636 } 1637 break; 1638 1639 case TGSI_OPCODE_MAD: 1640 /* TGSI_OPCODE_MADD */ 1641 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1642 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1643 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1644 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1645 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1646 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1647 dst0[chan_index] = tmp0; 1648 } 1649 break; 1650 1651 case TGSI_OPCODE_SUB: 1652 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1653 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1654 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1655 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1656 } 1657 break; 1658 1659 case TGSI_OPCODE_LRP: 1660 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1661 src0 = emit_fetch( bld, inst, 0, chan_index ); 1662 src1 = emit_fetch( bld, inst, 1, chan_index ); 1663 src2 = emit_fetch( bld, inst, 2, chan_index ); 1664 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1665 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1666 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1667 } 1668 break; 1669 1670 case TGSI_OPCODE_CND: 1671 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1672 src0 = emit_fetch( bld, inst, 0, chan_index ); 1673 src1 = emit_fetch( bld, inst, 1, chan_index ); 1674 src2 = emit_fetch( bld, inst, 2, chan_index ); 1675 tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5); 1676 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1677 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1678 } 1679 break; 1680 1681 case TGSI_OPCODE_DP2A: 1682 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1683 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1684 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1685 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1686 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1687 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1688 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1689 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1690 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1691 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1692 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1693 } 1694 break; 1695 1696 case TGSI_OPCODE_FRC: 1697 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1698 src0 = emit_fetch( bld, inst, 0, chan_index ); 1699 tmp0 = lp_build_floor(&bld->base, src0); 1700 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1701 dst0[chan_index] = tmp0; 1702 } 1703 break; 1704 1705 case TGSI_OPCODE_CLAMP: 1706 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1707 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1708 src1 = emit_fetch( bld, inst, 1, chan_index ); 1709 src2 = emit_fetch( bld, inst, 2, chan_index ); 1710 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1711 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1712 dst0[chan_index] = tmp0; 1713 } 1714 break; 1715 1716 case TGSI_OPCODE_FLR: 1717 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1718 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1719 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1720 } 1721 break; 1722 1723 case TGSI_OPCODE_ROUND: 1724 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1725 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1726 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1727 } 1728 break; 1729 1730 case TGSI_OPCODE_EX2: { 1731 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1732 tmp0 = lp_build_exp2( &bld->base, tmp0); 1733 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1734 dst0[chan_index] = tmp0; 1735 } 1736 break; 1737 } 1738 1739 case TGSI_OPCODE_LG2: 1740 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1741 tmp0 = lp_build_log2( &bld->base, tmp0); 1742 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1743 dst0[chan_index] = tmp0; 1744 } 1745 break; 1746 1747 case TGSI_OPCODE_POW: 1748 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1749 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1750 res = lp_build_pow( &bld->base, src0, src1 ); 1751 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1752 dst0[chan_index] = res; 1753 } 1754 break; 1755 1756 case TGSI_OPCODE_XPD: 1757 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1758 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1759 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1760 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1761 } 1762 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1763 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1764 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1765 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1766 } 1767 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1768 tmp2 = tmp0; 1769 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1770 tmp5 = tmp3; 1771 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1772 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1773 dst0[CHAN_X] = tmp2; 1774 } 1775 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1776 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1777 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1778 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1779 } 1780 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1781 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1782 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1783 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1784 dst0[CHAN_Y] = tmp3; 1785 } 1786 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1787 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1788 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1789 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1790 dst0[CHAN_Z] = tmp5; 1791 } 1792 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1793 dst0[CHAN_W] = bld->base.one; 1794 } 1795 break; 1796 1797 case TGSI_OPCODE_ABS: 1798 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1799 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1800 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1801 } 1802 break; 1803 1804 case TGSI_OPCODE_RCC: 1805 /* deprecated? */ 1806 assert(0); 1807 return FALSE; 1808 1809 case TGSI_OPCODE_DPH: 1810 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1811 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1812 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1813 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1814 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1815 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1816 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1817 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1818 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1819 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1820 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1821 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1822 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1823 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1824 dst0[chan_index] = tmp0; 1825 } 1826 break; 1827 1828 case TGSI_OPCODE_COS: 1829 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1830 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1831 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1832 dst0[chan_index] = tmp0; 1833 } 1834 break; 1835 1836 case TGSI_OPCODE_DDX: 1837 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1838 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1839 } 1840 break; 1841 1842 case TGSI_OPCODE_DDY: 1843 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1844 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1845 } 1846 break; 1847 1848 case TGSI_OPCODE_KILP: 1849 /* predicated kill */ 1850 emit_kilp( bld, inst, (*pc)-1 ); 1851 break; 1852 1853 case TGSI_OPCODE_KIL: 1854 /* conditional kill */ 1855 emit_kil( bld, inst, (*pc)-1 ); 1856 break; 1857 1858 case TGSI_OPCODE_PK2H: 1859 return FALSE; 1860 break; 1861 1862 case TGSI_OPCODE_PK2US: 1863 return FALSE; 1864 break; 1865 1866 case TGSI_OPCODE_PK4B: 1867 return FALSE; 1868 break; 1869 1870 case TGSI_OPCODE_PK4UB: 1871 return FALSE; 1872 break; 1873 1874 case TGSI_OPCODE_RFL: 1875 return FALSE; 1876 break; 1877 1878 case TGSI_OPCODE_SEQ: 1879 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1880 src0 = emit_fetch( bld, inst, 0, chan_index ); 1881 src1 = emit_fetch( bld, inst, 1, chan_index ); 1882 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1883 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1884 } 1885 break; 1886 1887 case TGSI_OPCODE_SFL: 1888 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1889 dst0[chan_index] = bld->base.zero; 1890 } 1891 break; 1892 1893 case TGSI_OPCODE_SGT: 1894 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1895 src0 = emit_fetch( bld, inst, 0, chan_index ); 1896 src1 = emit_fetch( bld, inst, 1, chan_index ); 1897 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1898 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1899 } 1900 break; 1901 1902 case TGSI_OPCODE_SIN: 1903 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1904 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1905 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1906 dst0[chan_index] = tmp0; 1907 } 1908 break; 1909 1910 case TGSI_OPCODE_SLE: 1911 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1912 src0 = emit_fetch( bld, inst, 0, chan_index ); 1913 src1 = emit_fetch( bld, inst, 1, chan_index ); 1914 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1915 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1916 } 1917 break; 1918 1919 case TGSI_OPCODE_SNE: 1920 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1921 src0 = emit_fetch( bld, inst, 0, chan_index ); 1922 src1 = emit_fetch( bld, inst, 1, chan_index ); 1923 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1924 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1925 } 1926 break; 1927 1928 case TGSI_OPCODE_STR: 1929 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1930 dst0[chan_index] = bld->base.one; 1931 } 1932 break; 1933 1934 case TGSI_OPCODE_TEX: 1935 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); 1936 break; 1937 1938 case TGSI_OPCODE_TXD: 1939 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1940 break; 1941 1942 case TGSI_OPCODE_UP2H: 1943 /* deprecated */ 1944 assert (0); 1945 return FALSE; 1946 break; 1947 1948 case TGSI_OPCODE_UP2US: 1949 /* deprecated */ 1950 assert(0); 1951 return FALSE; 1952 break; 1953 1954 case TGSI_OPCODE_UP4B: 1955 /* deprecated */ 1956 assert(0); 1957 return FALSE; 1958 break; 1959 1960 case TGSI_OPCODE_UP4UB: 1961 /* deprecated */ 1962 assert(0); 1963 return FALSE; 1964 break; 1965 1966 case TGSI_OPCODE_X2D: 1967 /* deprecated? */ 1968 assert(0); 1969 return FALSE; 1970 break; 1971 1972 case TGSI_OPCODE_ARA: 1973 /* deprecated */ 1974 assert(0); 1975 return FALSE; 1976 break; 1977 1978 case TGSI_OPCODE_ARR: 1979 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1980 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1981 tmp0 = lp_build_round(&bld->base, tmp0); 1982 dst0[chan_index] = tmp0; 1983 } 1984 break; 1985 1986 case TGSI_OPCODE_BRA: 1987 /* deprecated */ 1988 assert(0); 1989 return FALSE; 1990 break; 1991 1992 case TGSI_OPCODE_CAL: 1993 lp_exec_mask_call(&bld->exec_mask, 1994 inst->Label.Label, 1995 pc); 1996 1997 break; 1998 1999 case TGSI_OPCODE_RET: 2000 lp_exec_mask_ret(&bld->exec_mask, pc); 2001 break; 2002 2003 case TGSI_OPCODE_END: 2004 if (0) { 2005 /* for debugging */ 2006 emit_dump_temps(bld); 2007 } 2008 *pc = -1; 2009 break; 2010 2011 case TGSI_OPCODE_SSG: 2012 /* TGSI_OPCODE_SGN */ 2013 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2014 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2015 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 2016 } 2017 break; 2018 2019 case TGSI_OPCODE_CMP: 2020 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2021 src0 = emit_fetch( bld, inst, 0, chan_index ); 2022 src1 = emit_fetch( bld, inst, 1, chan_index ); 2023 src2 = emit_fetch( bld, inst, 2, chan_index ); 2024 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 2025 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 2026 } 2027 break; 2028 2029 case TGSI_OPCODE_SCS: 2030 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 2031 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 2032 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 2033 } 2034 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 2035 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 2036 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 2037 } 2038 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 2039 dst0[CHAN_Z] = bld->base.zero; 2040 } 2041 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 2042 dst0[CHAN_W] = bld->base.one; 2043 } 2044 break; 2045 2046 case TGSI_OPCODE_TXB: 2047 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); 2048 break; 2049 2050 case TGSI_OPCODE_NRM: 2051 /* fall-through */ 2052 case TGSI_OPCODE_NRM4: 2053 /* 3 or 4-component normalization */ 2054 { 2055 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 2056 2057 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 2058 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 2059 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 2060 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 2061 2062 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 2063 2064 /* xmm4 = src.x */ 2065 /* xmm0 = src.x * src.x */ 2066 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 2067 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 2068 tmp4 = tmp0; 2069 } 2070 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 2071 2072 /* xmm5 = src.y */ 2073 /* xmm0 = xmm0 + src.y * src.y */ 2074 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 2075 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 2076 tmp5 = tmp1; 2077 } 2078 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2079 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2080 2081 /* xmm6 = src.z */ 2082 /* xmm0 = xmm0 + src.z * src.z */ 2083 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 2084 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 2085 tmp6 = tmp1; 2086 } 2087 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2088 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2089 2090 if (dims == 4) { 2091 /* xmm7 = src.w */ 2092 /* xmm0 = xmm0 + src.w * src.w */ 2093 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 2094 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 2095 tmp7 = tmp1; 2096 } 2097 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2098 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2099 } 2100 2101 /* xmm1 = 1 / sqrt(xmm0) */ 2102 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 2103 2104 /* dst.x = xmm1 * src.x */ 2105 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 2106 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 2107 } 2108 2109 /* dst.y = xmm1 * src.y */ 2110 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 2111 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 2112 } 2113 2114 /* dst.z = xmm1 * src.z */ 2115 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 2116 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 2117 } 2118 2119 /* dst.w = xmm1 * src.w */ 2120 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 2121 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 2122 } 2123 } 2124 2125 /* dst.w = 1.0 */ 2126 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 2127 dst0[CHAN_W] = bld->base.one; 2128 } 2129 } 2130 break; 2131 2132 case TGSI_OPCODE_DIV: 2133 /* deprecated */ 2134 assert( 0 ); 2135 return FALSE; 2136 break; 2137 2138 case TGSI_OPCODE_DP2: 2139 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 2140 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 2141 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 2142 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 2143 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 2144 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 2145 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 2146 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2147 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 2148 } 2149 break; 2150 2151 case TGSI_OPCODE_TXL: 2152 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 2153 break; 2154 2155 case TGSI_OPCODE_TXP: 2156 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); 2157 break; 2158 2159 case TGSI_OPCODE_BRK: 2160 lp_exec_break(&bld->exec_mask); 2161 break; 2162 2163 case TGSI_OPCODE_IF: 2164 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 2165 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 2166 tmp0, bld->base.zero); 2167 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 2168 break; 2169 2170 case TGSI_OPCODE_BGNLOOP: 2171 lp_exec_bgnloop(&bld->exec_mask); 2172 break; 2173 2174 case TGSI_OPCODE_BGNSUB: 2175 lp_exec_mask_bgnsub(&bld->exec_mask); 2176 break; 2177 2178 case TGSI_OPCODE_ELSE: 2179 lp_exec_mask_cond_invert(&bld->exec_mask); 2180 break; 2181 2182 case TGSI_OPCODE_ENDIF: 2183 lp_exec_mask_cond_pop(&bld->exec_mask); 2184 break; 2185 2186 case TGSI_OPCODE_ENDLOOP: 2187 lp_exec_endloop(bld->base.gallivm, &bld->exec_mask); 2188 break; 2189 2190 case TGSI_OPCODE_ENDSUB: 2191 lp_exec_mask_endsub(&bld->exec_mask, pc); 2192 break; 2193 2194 case TGSI_OPCODE_PUSHA: 2195 /* deprecated? */ 2196 assert(0); 2197 return FALSE; 2198 break; 2199 2200 case TGSI_OPCODE_POPA: 2201 /* deprecated? */ 2202 assert(0); 2203 return FALSE; 2204 break; 2205 2206 case TGSI_OPCODE_CEIL: 2207 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2208 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2209 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 2210 } 2211 break; 2212 2213 case TGSI_OPCODE_I2F: 2214 /* deprecated? */ 2215 assert(0); 2216 return FALSE; 2217 break; 2218 2219 case TGSI_OPCODE_NOT: 2220 /* deprecated? */ 2221 assert(0); 2222 return FALSE; 2223 break; 2224 2225 case TGSI_OPCODE_TRUNC: 2226 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2227 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2228 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 2229 } 2230 break; 2231 2232 case TGSI_OPCODE_SHL: 2233 /* deprecated? */ 2234 assert(0); 2235 return FALSE; 2236 break; 2237 2238 case TGSI_OPCODE_ISHR: 2239 /* deprecated? */ 2240 assert(0); 2241 return FALSE; 2242 break; 2243 2244 case TGSI_OPCODE_AND: 2245 /* deprecated? */ 2246 assert(0); 2247 return FALSE; 2248 break; 2249 2250 case TGSI_OPCODE_OR: 2251 /* deprecated? */ 2252 assert(0); 2253 return FALSE; 2254 break; 2255 2256 case TGSI_OPCODE_MOD: 2257 /* deprecated? */ 2258 assert(0); 2259 return FALSE; 2260 break; 2261 2262 case TGSI_OPCODE_XOR: 2263 /* deprecated? */ 2264 assert(0); 2265 return FALSE; 2266 break; 2267 2268 case TGSI_OPCODE_SAD: 2269 /* deprecated? */ 2270 assert(0); 2271 return FALSE; 2272 break; 2273 2274 case TGSI_OPCODE_TXF: 2275 /* deprecated? */ 2276 assert(0); 2277 return FALSE; 2278 break; 2279 2280 case TGSI_OPCODE_TXQ: 2281 /* deprecated? */ 2282 assert(0); 2283 return FALSE; 2284 break; 2285 2286 case TGSI_OPCODE_CONT: 2287 lp_exec_continue(&bld->exec_mask); 2288 break; 2289 2290 case TGSI_OPCODE_EMIT: 2291 return FALSE; 2292 break; 2293 2294 case TGSI_OPCODE_ENDPRIM: 2295 return FALSE; 2296 break; 2297 2298 case TGSI_OPCODE_NOP: 2299 break; 2300 2301 default: 2302 return FALSE; 2303 } 2304 2305 if(info->num_dst) { 2306 LLVMValueRef pred[NUM_CHANNELS]; 2307 2308 emit_fetch_predicate( bld, inst, pred ); 2309 2310 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2311 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); 2312 } 2313 } 2314 2315 return TRUE; 2316} 2317 2318 2319void 2320lp_build_tgsi_soa(struct gallivm_state *gallivm, 2321 const struct tgsi_token *tokens, 2322 struct lp_type type, 2323 struct lp_build_mask_context *mask, 2324 LLVMValueRef consts_ptr, 2325 const LLVMValueRef *pos, 2326 const LLVMValueRef (*inputs)[NUM_CHANNELS], 2327 LLVMValueRef (*outputs)[NUM_CHANNELS], 2328 struct lp_build_sampler_soa *sampler, 2329 const struct tgsi_shader_info *info) 2330{ 2331 struct lp_build_tgsi_soa_context bld; 2332 struct tgsi_parse_context parse; 2333 uint num_immediates = 0; 2334 uint num_instructions = 0; 2335 unsigned i; 2336 int pc = 0; 2337 2338 struct lp_type res_type; 2339 2340 assert(type.length <= LP_MAX_VECTOR_LENGTH); 2341 memset(&res_type, 0, sizeof res_type); 2342 res_type.width = type.width; 2343 res_type.length = type.length; 2344 res_type.sign = 1; 2345 2346 /* Setup build context */ 2347 memset(&bld, 0, sizeof bld); 2348 lp_build_context_init(&bld.base, gallivm, type); 2349 lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type)); 2350 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); 2351 bld.mask = mask; 2352 bld.pos = pos; 2353 bld.inputs = inputs; 2354 bld.outputs = outputs; 2355 bld.consts_ptr = consts_ptr; 2356 bld.sampler = sampler; 2357 bld.info = info; 2358 bld.indirect_files = info->indirect_files; 2359 bld.instructions = (struct tgsi_full_instruction *) 2360 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); 2361 bld.max_instructions = LP_MAX_INSTRUCTIONS; 2362 2363 if (!bld.instructions) { 2364 return; 2365 } 2366 2367 lp_exec_mask_init(&bld.exec_mask, &bld.base); 2368 2369 if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 2370 LLVMValueRef array_size = 2371 lp_build_const_int32(gallivm, 2372 info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); 2373 bld.temps_array = lp_build_array_alloca(gallivm, 2374 bld.base.vec_type, array_size, 2375 "temp_array"); 2376 } 2377 2378 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2379 LLVMValueRef array_size = 2380 lp_build_const_int32(gallivm, 2381 info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); 2382 bld.outputs_array = lp_build_array_alloca(gallivm, 2383 bld.base.vec_type, array_size, 2384 "output_array"); 2385 } 2386 2387 /* If we have indirect addressing in inputs we need to copy them into 2388 * our alloca array to be able to iterate over them */ 2389 if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) { 2390 unsigned index, chan; 2391 LLVMTypeRef vec_type = bld.base.vec_type; 2392 LLVMValueRef array_size = 2393 lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4); 2394 bld.inputs_array = lp_build_array_alloca(gallivm, 2395 vec_type, array_size, 2396 "input_array"); 2397 2398 assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1); 2399 2400 for (index = 0; index < info->num_inputs; ++index) { 2401 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2402 LLVMValueRef lindex = 2403 lp_build_const_int32(gallivm, index * 4 + chan); 2404 LLVMValueRef input_ptr = 2405 LLVMBuildGEP(gallivm->builder, bld.inputs_array, 2406 &lindex, 1, ""); 2407 LLVMValueRef value = bld.inputs[index][chan]; 2408 if (value) 2409 LLVMBuildStore(gallivm->builder, value, input_ptr); 2410 } 2411 } 2412 } 2413 2414 tgsi_parse_init( &parse, tokens ); 2415 2416 while( !tgsi_parse_end_of_tokens( &parse ) ) { 2417 tgsi_parse_token( &parse ); 2418 2419 switch( parse.FullToken.Token.Type ) { 2420 case TGSI_TOKEN_TYPE_DECLARATION: 2421 /* Inputs already interpolated */ 2422 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 2423 break; 2424 2425 case TGSI_TOKEN_TYPE_INSTRUCTION: 2426 { 2427 /* save expanded instruction */ 2428 if (num_instructions == bld.max_instructions) { 2429 struct tgsi_full_instruction *instructions; 2430 instructions = REALLOC(bld.instructions, 2431 bld.max_instructions 2432 * sizeof(struct tgsi_full_instruction), 2433 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 2434 * sizeof(struct tgsi_full_instruction)); 2435 if (!instructions) { 2436 break; 2437 } 2438 bld.instructions = instructions; 2439 bld.max_instructions += LP_MAX_INSTRUCTIONS; 2440 } 2441 2442 memcpy(bld.instructions + num_instructions, 2443 &parse.FullToken.FullInstruction, 2444 sizeof(bld.instructions[0])); 2445 2446 num_instructions++; 2447 } 2448 2449 break; 2450 2451 case TGSI_TOKEN_TYPE_IMMEDIATE: 2452 /* simply copy the immediate values into the next immediates[] slot */ 2453 { 2454 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 2455 assert(size <= 4); 2456 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 2457 for( i = 0; i < size; ++i ) 2458 bld.immediates[num_immediates][i] = 2459 lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float); 2460 for( i = size; i < 4; ++i ) 2461 bld.immediates[num_immediates][i] = bld.base.undef; 2462 num_immediates++; 2463 } 2464 break; 2465 2466 case TGSI_TOKEN_TYPE_PROPERTY: 2467 break; 2468 2469 default: 2470 assert( 0 ); 2471 } 2472 } 2473 2474 while (pc != -1) { 2475 struct tgsi_full_instruction *instr = bld.instructions + pc; 2476 const struct tgsi_opcode_info *opcode_info = 2477 tgsi_get_opcode_info(instr->Instruction.Opcode); 2478 if (!emit_instruction( &bld, instr, opcode_info, &pc )) 2479 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 2480 opcode_info->mnemonic); 2481 } 2482 2483 /* If we have indirect addressing in outputs we need to copy our alloca array 2484 * to the outputs slots specified by the called */ 2485 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2486 unsigned index, chan; 2487 assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1); 2488 for (index = 0; index < info->num_outputs; ++index) { 2489 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2490 bld.outputs[index][chan] = get_output_ptr(&bld, index, chan); 2491 } 2492 } 2493 } 2494 2495 if (0) { 2496 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 2497 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2498 debug_printf("11111111111111111111111111111 \n"); 2499 tgsi_dump(tokens, 0); 2500 lp_debug_dump_value(function); 2501 debug_printf("2222222222222222222222222222 \n"); 2502 } 2503 tgsi_parse_free( &parse ); 2504 2505 if (0) { 2506 LLVMModuleRef module = LLVMGetGlobalParent( 2507 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 2508 LLVMDumpModule(module); 2509 2510 } 2511 2512 FREE( bld.instructions ); 2513} 2514 2515