lp_bld_tgsi_soa.c revision efc82aef35a2aac5d2ed9774f6d28f2626796416
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_scan.h" 49#include "lp_bld_type.h" 50#include "lp_bld_const.h" 51#include "lp_bld_arit.h" 52#include "lp_bld_bitarit.h" 53#include "lp_bld_gather.h" 54#include "lp_bld_init.h" 55#include "lp_bld_logic.h" 56#include "lp_bld_swizzle.h" 57#include "lp_bld_flow.h" 58#include "lp_bld_quad.h" 59#include "lp_bld_tgsi.h" 60#include "lp_bld_limits.h" 61#include "lp_bld_debug.h" 62#include "lp_bld_printf.h" 63 64 65#define FOR_EACH_CHANNEL( CHAN )\ 66 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 67 68#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 69 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 70 71#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 72 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 73 74#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 75 FOR_EACH_CHANNEL( CHAN )\ 76 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 77 78#define CHAN_X 0 79#define CHAN_Y 1 80#define CHAN_Z 2 81#define CHAN_W 3 82#define NUM_CHANNELS 4 83 84#define LP_MAX_INSTRUCTIONS 256 85 86 87struct lp_exec_mask { 88 struct lp_build_context *bld; 89 90 boolean has_mask; 91 92 LLVMTypeRef int_vec_type; 93 94 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 95 int cond_stack_size; 96 LLVMValueRef cond_mask; 97 98 LLVMBasicBlockRef loop_block; 99 LLVMValueRef cont_mask; 100 LLVMValueRef break_mask; 101 LLVMValueRef break_var; 102 struct { 103 LLVMBasicBlockRef loop_block; 104 LLVMValueRef cont_mask; 105 LLVMValueRef break_mask; 106 LLVMValueRef break_var; 107 } loop_stack[LP_MAX_TGSI_NESTING]; 108 int loop_stack_size; 109 110 LLVMValueRef ret_mask; 111 struct { 112 int pc; 113 LLVMValueRef ret_mask; 114 } call_stack[LP_MAX_TGSI_NESTING]; 115 int call_stack_size; 116 117 LLVMValueRef exec_mask; 118}; 119 120struct lp_build_tgsi_soa_context 121{ 122 struct lp_build_context base; 123 124 /* Builder for vector integer masks and indices */ 125 struct lp_build_context uint_bld; 126 127 /* Builder for scalar elements of shader's data type (float) */ 128 struct lp_build_context elem_bld; 129 130 LLVMValueRef consts_ptr; 131 const LLVMValueRef *pos; 132 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 133 LLVMValueRef (*outputs)[NUM_CHANNELS]; 134 135 const struct lp_build_sampler_soa *sampler; 136 137 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 138 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 139 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 140 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; 141 142 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 143 * set in the indirect_files field. 144 * The temps[] array above is unused then. 145 */ 146 LLVMValueRef temps_array; 147 148 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is 149 * set in the indirect_files field. 150 * The outputs[] array above is unused then. 151 */ 152 LLVMValueRef outputs_array; 153 154 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is 155 * set in the indirect_files field. 156 * The inputs[] array above is unused then. 157 */ 158 LLVMValueRef inputs_array; 159 160 const struct tgsi_shader_info *info; 161 /** bitmask indicating which register files are accessed indirectly */ 162 unsigned indirect_files; 163 164 struct lp_build_mask_context *mask; 165 struct lp_exec_mask exec_mask; 166 167 struct tgsi_full_instruction *instructions; 168 uint max_instructions; 169}; 170 171static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 172{ 173 mask->bld = bld; 174 mask->has_mask = FALSE; 175 mask->cond_stack_size = 0; 176 mask->loop_stack_size = 0; 177 mask->call_stack_size = 0; 178 179 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); 180 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 181 LLVMConstAllOnes(mask->int_vec_type); 182} 183 184static void lp_exec_mask_update(struct lp_exec_mask *mask) 185{ 186 if (mask->loop_stack_size) { 187 /*for loops we need to update the entire mask at runtime */ 188 LLVMValueRef tmp; 189 assert(mask->break_mask); 190 tmp = LLVMBuildAnd(mask->bld->builder, 191 mask->cont_mask, 192 mask->break_mask, 193 "maskcb"); 194 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 195 mask->cond_mask, 196 tmp, 197 "maskfull"); 198 } else 199 mask->exec_mask = mask->cond_mask; 200 201 if (mask->call_stack_size) { 202 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 203 mask->exec_mask, 204 mask->ret_mask, 205 "callmask"); 206 } 207 208 mask->has_mask = (mask->cond_stack_size > 0 || 209 mask->loop_stack_size > 0 || 210 mask->call_stack_size > 0); 211} 212 213static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 214 LLVMValueRef val) 215{ 216 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 217 if (mask->cond_stack_size == 0) { 218 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 219 } 220 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 221 assert(LLVMTypeOf(val) == mask->int_vec_type); 222 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 223 mask->cond_mask, 224 val, 225 ""); 226 lp_exec_mask_update(mask); 227} 228 229static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 230{ 231 LLVMValueRef prev_mask; 232 LLVMValueRef inv_mask; 233 234 assert(mask->cond_stack_size); 235 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 236 if (mask->cond_stack_size == 1) { 237 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 238 } 239 240 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, ""); 241 242 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 243 inv_mask, 244 prev_mask, ""); 245 lp_exec_mask_update(mask); 246} 247 248static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 249{ 250 assert(mask->cond_stack_size); 251 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 252 lp_exec_mask_update(mask); 253} 254 255static void lp_exec_bgnloop(struct lp_exec_mask *mask) 256{ 257 if (mask->loop_stack_size == 0) { 258 assert(mask->loop_block == NULL); 259 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 260 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 261 assert(mask->break_var == NULL); 262 } 263 264 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 265 266 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 267 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 268 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 269 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 270 ++mask->loop_stack_size; 271 272 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); 273 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 274 275 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); 276 LLVMBuildBr(mask->bld->builder, mask->loop_block); 277 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); 278 279 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, ""); 280 281 lp_exec_mask_update(mask); 282} 283 284static void lp_exec_break(struct lp_exec_mask *mask) 285{ 286 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 287 mask->exec_mask, 288 "break"); 289 290 mask->break_mask = LLVMBuildAnd(mask->bld->builder, 291 mask->break_mask, 292 exec_mask, "break_full"); 293 294 lp_exec_mask_update(mask); 295} 296 297static void lp_exec_continue(struct lp_exec_mask *mask) 298{ 299 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 300 mask->exec_mask, 301 ""); 302 303 mask->cont_mask = LLVMBuildAnd(mask->bld->builder, 304 mask->cont_mask, 305 exec_mask, ""); 306 307 lp_exec_mask_update(mask); 308} 309 310 311static void lp_exec_endloop(struct gallivm_state *gallivm, 312 struct lp_exec_mask *mask) 313{ 314 LLVMBasicBlockRef endloop; 315 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, 316 mask->bld->type.width * 317 mask->bld->type.length); 318 LLVMValueRef i1cond; 319 320 assert(mask->break_mask); 321 322 /* 323 * Restore the cont_mask, but don't pop 324 */ 325 assert(mask->loop_stack_size); 326 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 327 lp_exec_mask_update(mask); 328 329 /* 330 * Unlike the continue mask, the break_mask must be preserved across loop 331 * iterations 332 */ 333 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 334 335 /* i1cond = (mask == 0) */ 336 i1cond = LLVMBuildICmp( 337 mask->bld->builder, 338 LLVMIntNE, 339 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""), 340 LLVMConstNull(reg_type), ""); 341 342 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); 343 344 LLVMBuildCondBr(mask->bld->builder, 345 i1cond, mask->loop_block, endloop); 346 347 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); 348 349 assert(mask->loop_stack_size); 350 --mask->loop_stack_size; 351 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 352 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 353 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 354 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 355 356 lp_exec_mask_update(mask); 357} 358 359/* stores val into an address pointed to by dst. 360 * mask->exec_mask is used to figure out which bits of val 361 * should be stored into the address 362 * (0 means don't store this bit, 1 means do store). 363 */ 364static void lp_exec_mask_store(struct lp_exec_mask *mask, 365 LLVMValueRef pred, 366 LLVMValueRef val, 367 LLVMValueRef dst) 368{ 369 /* Mix the predicate and execution mask */ 370 if (mask->has_mask) { 371 if (pred) { 372 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); 373 } else { 374 pred = mask->exec_mask; 375 } 376 } 377 378 if (pred) { 379 LLVMValueRef real_val, dst_val; 380 381 dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); 382 real_val = lp_build_select(mask->bld, 383 pred, 384 val, dst_val); 385 386 LLVMBuildStore(mask->bld->builder, real_val, dst); 387 } else 388 LLVMBuildStore(mask->bld->builder, val, dst); 389} 390 391static void lp_exec_mask_call(struct lp_exec_mask *mask, 392 int func, 393 int *pc) 394{ 395 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 396 mask->call_stack[mask->call_stack_size].pc = *pc; 397 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 398 mask->call_stack_size++; 399 *pc = func; 400} 401 402static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 403{ 404 LLVMValueRef exec_mask; 405 406 if (mask->call_stack_size == 0) { 407 /* returning from main() */ 408 *pc = -1; 409 return; 410 } 411 exec_mask = LLVMBuildNot(mask->bld->builder, 412 mask->exec_mask, 413 "ret"); 414 415 mask->ret_mask = LLVMBuildAnd(mask->bld->builder, 416 mask->ret_mask, 417 exec_mask, "ret_full"); 418 419 lp_exec_mask_update(mask); 420} 421 422static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 423{ 424} 425 426static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 427{ 428 assert(mask->call_stack_size); 429 mask->call_stack_size--; 430 *pc = mask->call_stack[mask->call_stack_size].pc; 431 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 432 lp_exec_mask_update(mask); 433} 434 435 436/** 437 * Return pointer to a temporary register channel (src or dest). 438 * Note that indirect addressing cannot be handled here. 439 * \param index which temporary register 440 * \param chan which channel of the temp register. 441 */ 442static LLVMValueRef 443get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 444 unsigned index, 445 unsigned chan) 446{ 447 assert(chan < 4); 448 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 449 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, index * 4 + chan); 450 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); 451 } 452 else { 453 return bld->temps[index][chan]; 454 } 455} 456 457/** 458 * Return pointer to a output register channel (src or dest). 459 * Note that indirect addressing cannot be handled here. 460 * \param index which output register 461 * \param chan which channel of the output register. 462 */ 463static LLVMValueRef 464get_output_ptr(struct lp_build_tgsi_soa_context *bld, 465 unsigned index, 466 unsigned chan) 467{ 468 assert(chan < 4); 469 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 470 LLVMValueRef lindex = lp_build_const_int32(bld->base.gallivm, 471 index * 4 + chan); 472 return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, ""); 473 } 474 else { 475 return bld->outputs[index][chan]; 476 } 477} 478 479/** 480 * Gather vector. 481 * XXX the lp_build_gather() function should be capable of doing this 482 * with a little work. 483 */ 484static LLVMValueRef 485build_gather(struct lp_build_tgsi_soa_context *bld, 486 LLVMValueRef base_ptr, 487 LLVMValueRef indexes) 488{ 489 LLVMValueRef res = bld->base.undef; 490 unsigned i; 491 492 /* 493 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 494 */ 495 for (i = 0; i < bld->base.type.length; i++) { 496 LLVMValueRef ii = lp_build_const_int32(bld->base.gallivm, i); 497 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder, 498 indexes, ii, ""); 499 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr, 500 &index, 1, "gather_ptr"); 501 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 502 503 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, ""); 504 } 505 506 return res; 507} 508 509 510/** 511 * Scatter/store vector. 512 */ 513static void 514emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 515 LLVMValueRef base_ptr, 516 LLVMValueRef indexes, 517 LLVMValueRef values, 518 struct lp_exec_mask *mask, 519 LLVMValueRef pred) 520{ 521 struct gallivm_state *gallivm = bld->base.gallivm; 522 LLVMBuilderRef builder = bld->base.builder; 523 unsigned i; 524 525 /* Mix the predicate and execution mask */ 526 if (mask->has_mask) { 527 if (pred) { 528 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); 529 } 530 else { 531 pred = mask->exec_mask; 532 } 533 } 534 535 /* 536 * Loop over elements of index_vec, store scalar value. 537 */ 538 for (i = 0; i < bld->base.type.length; i++) { 539 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 540 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 541 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 542 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 543 LLVMValueRef scalar_pred = pred ? 544 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 545 546 if (0) 547 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", 548 ii, val, index, scalar_ptr); 549 550 if (scalar_pred) { 551 LLVMValueRef real_val, dst_val; 552 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 553 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 554 LLVMBuildStore(builder, real_val, scalar_ptr); 555 } 556 else { 557 LLVMBuildStore(builder, val, scalar_ptr); 558 } 559 } 560} 561 562 563/** 564 * Read the current value of the ADDR register, convert the floats to 565 * ints, add the base index and return the vector of offsets. 566 * The offsets will be used to index into the constant buffer or 567 * temporary register file. 568 */ 569static LLVMValueRef 570get_indirect_index(struct lp_build_tgsi_soa_context *bld, 571 unsigned reg_file, unsigned reg_index, 572 const struct tgsi_src_register *indirect_reg) 573{ 574 struct lp_build_context *uint_bld = &bld->uint_bld; 575 /* always use X component of address register */ 576 unsigned swizzle = indirect_reg->SwizzleX; 577 LLVMValueRef base; 578 LLVMValueRef rel; 579 LLVMValueRef max_index; 580 LLVMValueRef index; 581 582 assert(bld->indirect_files & (1 << reg_file)); 583 584 base = lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, reg_index); 585 586 assert(swizzle < 4); 587 rel = LLVMBuildLoad(bld->base.builder, 588 bld->addr[indirect_reg->Index][swizzle], 589 "load addr reg"); 590 591 /* for indexing we want integers */ 592 rel = LLVMBuildFPToSI(bld->base.builder, 593 rel, 594 uint_bld->vec_type, ""); 595 596 index = lp_build_add(uint_bld, base, rel); 597 598 max_index = lp_build_const_int_vec(bld->base.gallivm, 599 uint_bld->type, 600 bld->info->file_max[reg_file]); 601 602 assert(!uint_bld->type.sign); 603 index = lp_build_min(uint_bld, index, max_index); 604 605 return index; 606} 607 608 609/** 610 * Register fetch. 611 */ 612static LLVMValueRef 613emit_fetch( 614 struct lp_build_tgsi_soa_context *bld, 615 const struct tgsi_full_instruction *inst, 616 unsigned src_op, 617 const unsigned chan_index ) 618{ 619 struct gallivm_state *gallivm = bld->base.gallivm; 620 struct lp_build_context *uint_bld = &bld->uint_bld; 621 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 622 const unsigned swizzle = 623 tgsi_util_get_full_src_register_swizzle(reg, chan_index); 624 LLVMValueRef res; 625 LLVMValueRef indirect_index = NULL; 626 627 if (swizzle > 3) { 628 assert(0 && "invalid swizzle in emit_fetch()"); 629 return bld->base.undef; 630 } 631 632 if (reg->Register.Indirect) { 633 indirect_index = get_indirect_index(bld, 634 reg->Register.File, 635 reg->Register.Index, 636 ®->Indirect); 637 } else { 638 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 639 } 640 641 switch (reg->Register.File) { 642 case TGSI_FILE_CONSTANT: 643 if (reg->Register.Indirect) { 644 LLVMValueRef swizzle_vec = 645 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); 646 LLVMValueRef index_vec; /* index into the const buffer */ 647 648 /* index_vec = indirect_index * 4 + swizzle */ 649 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 650 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 651 652 /* Gather values from the constant buffer */ 653 res = build_gather(bld, bld->consts_ptr, index_vec); 654 } 655 else { 656 LLVMValueRef index; /* index into the const buffer */ 657 LLVMValueRef scalar, scalar_ptr; 658 659 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle); 660 661 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, 662 &index, 1, ""); 663 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 664 665 res = lp_build_broadcast_scalar(&bld->base, scalar); 666 } 667 break; 668 669 case TGSI_FILE_IMMEDIATE: 670 res = bld->immediates[reg->Register.Index][swizzle]; 671 assert(res); 672 break; 673 674 case TGSI_FILE_INPUT: 675 if (reg->Register.Indirect) { 676 LLVMValueRef swizzle_vec = 677 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); 678 LLVMValueRef length_vec = 679 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); 680 LLVMValueRef index_vec; /* index into the const buffer */ 681 LLVMValueRef inputs_array; 682 LLVMTypeRef float4_ptr_type; 683 684 /* index_vec = (indirect_index * 4 + swizzle) * length */ 685 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 686 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 687 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 688 689 /* cast inputs_array pointer to float* */ 690 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 691 inputs_array = LLVMBuildBitCast(uint_bld->builder, bld->inputs_array, 692 float4_ptr_type, ""); 693 694 /* Gather values from the temporary register array */ 695 res = build_gather(bld, inputs_array, index_vec); 696 } else { 697 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 698 LLVMValueRef lindex = lp_build_const_int32(gallivm, 699 reg->Register.Index * 4 + swizzle); 700 LLVMValueRef input_ptr = LLVMBuildGEP(bld->base.builder, 701 bld->inputs_array, &lindex, 1, ""); 702 res = LLVMBuildLoad(bld->base.builder, input_ptr, ""); 703 } 704 else { 705 res = bld->inputs[reg->Register.Index][swizzle]; 706 } 707 } 708 assert(res); 709 break; 710 711 case TGSI_FILE_TEMPORARY: 712 if (reg->Register.Indirect) { 713 LLVMValueRef swizzle_vec = 714 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, swizzle); 715 LLVMValueRef length_vec = 716 lp_build_const_int_vec(bld->base.gallivm, uint_bld->type, 717 bld->base.type.length); 718 LLVMValueRef index_vec; /* index into the const buffer */ 719 LLVMValueRef temps_array; 720 LLVMTypeRef float4_ptr_type; 721 722 /* index_vec = (indirect_index * 4 + swizzle) * length */ 723 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 724 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 725 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 726 727 /* cast temps_array pointer to float* */ 728 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->base.gallivm->context), 0); 729 temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array, 730 float4_ptr_type, ""); 731 732 /* Gather values from the temporary register array */ 733 res = build_gather(bld, temps_array, index_vec); 734 } 735 else { 736 LLVMValueRef temp_ptr; 737 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); 738 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 739 if (!res) 740 return bld->base.undef; 741 } 742 break; 743 744 default: 745 assert(0 && "invalid src register in emit_fetch()"); 746 return bld->base.undef; 747 } 748 749 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 750 case TGSI_UTIL_SIGN_CLEAR: 751 res = lp_build_abs( &bld->base, res ); 752 break; 753 754 case TGSI_UTIL_SIGN_SET: 755 res = lp_build_abs( &bld->base, res ); 756 /* fall through */ 757 case TGSI_UTIL_SIGN_TOGGLE: 758 res = lp_build_negate( &bld->base, res ); 759 break; 760 761 case TGSI_UTIL_SIGN_KEEP: 762 break; 763 } 764 765 return res; 766} 767 768 769/** 770 * Register fetch with derivatives. 771 */ 772static void 773emit_fetch_deriv( 774 struct lp_build_tgsi_soa_context *bld, 775 const struct tgsi_full_instruction *inst, 776 unsigned index, 777 const unsigned chan_index, 778 LLVMValueRef *res, 779 LLVMValueRef *ddx, 780 LLVMValueRef *ddy) 781{ 782 LLVMValueRef src; 783 784 src = emit_fetch(bld, inst, index, chan_index); 785 786 if(res) 787 *res = src; 788 789 /* TODO: use interpolation coeffs for inputs */ 790 791 if(ddx) 792 *ddx = lp_build_ddx(&bld->base, src); 793 794 if(ddy) 795 *ddy = lp_build_ddy(&bld->base, src); 796} 797 798 799/** 800 * Predicate. 801 */ 802static void 803emit_fetch_predicate( 804 struct lp_build_tgsi_soa_context *bld, 805 const struct tgsi_full_instruction *inst, 806 LLVMValueRef *pred) 807{ 808 unsigned index; 809 unsigned char swizzles[4]; 810 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 811 LLVMValueRef value; 812 unsigned chan; 813 814 if (!inst->Instruction.Predicate) { 815 FOR_EACH_CHANNEL( chan ) { 816 pred[chan] = NULL; 817 } 818 return; 819 } 820 821 swizzles[0] = inst->Predicate.SwizzleX; 822 swizzles[1] = inst->Predicate.SwizzleY; 823 swizzles[2] = inst->Predicate.SwizzleZ; 824 swizzles[3] = inst->Predicate.SwizzleW; 825 826 index = inst->Predicate.Index; 827 assert(index < LP_MAX_TGSI_PREDS); 828 829 FOR_EACH_CHANNEL( chan ) { 830 unsigned swizzle = swizzles[chan]; 831 832 /* 833 * Only fetch the predicate register channels that are actually listed 834 * in the swizzles 835 */ 836 if (!unswizzled[swizzle]) { 837 value = LLVMBuildLoad(bld->base.builder, 838 bld->preds[index][swizzle], ""); 839 840 /* 841 * Convert the value to an integer mask. 842 * 843 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 844 * is needlessly causing two comparisons due to storing the intermediate 845 * result as float vector instead of an integer mask vector. 846 */ 847 value = lp_build_compare(bld->base.gallivm, 848 bld->base.type, 849 PIPE_FUNC_NOTEQUAL, 850 value, 851 bld->base.zero); 852 if (inst->Predicate.Negate) { 853 value = LLVMBuildNot(bld->base.builder, value, ""); 854 } 855 856 unswizzled[swizzle] = value; 857 } else { 858 value = unswizzled[swizzle]; 859 } 860 861 pred[chan] = value; 862 } 863} 864 865 866/** 867 * Register store. 868 */ 869static void 870emit_store( 871 struct lp_build_tgsi_soa_context *bld, 872 const struct tgsi_full_instruction *inst, 873 unsigned index, 874 unsigned chan_index, 875 LLVMValueRef pred, 876 LLVMValueRef value) 877{ 878 struct gallivm_state *gallivm = bld->base.gallivm; 879 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 880 struct lp_build_context *uint_bld = &bld->uint_bld; 881 LLVMValueRef indirect_index = NULL; 882 883 switch( inst->Instruction.Saturate ) { 884 case TGSI_SAT_NONE: 885 break; 886 887 case TGSI_SAT_ZERO_ONE: 888 value = lp_build_max(&bld->base, value, bld->base.zero); 889 value = lp_build_min(&bld->base, value, bld->base.one); 890 break; 891 892 case TGSI_SAT_MINUS_PLUS_ONE: 893 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0)); 894 value = lp_build_min(&bld->base, value, bld->base.one); 895 break; 896 897 default: 898 assert(0); 899 } 900 901 if (reg->Register.Indirect) { 902 indirect_index = get_indirect_index(bld, 903 reg->Register.File, 904 reg->Register.Index, 905 ®->Indirect); 906 } else { 907 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 908 } 909 910 switch( reg->Register.File ) { 911 case TGSI_FILE_OUTPUT: 912 if (reg->Register.Indirect) { 913 LLVMBuilderRef builder = bld->base.builder; 914 LLVMValueRef chan_vec = 915 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 916 LLVMValueRef length_vec = 917 lp_build_const_int_vec(gallivm, uint_bld->type, bld->base.type.length); 918 LLVMValueRef index_vec; /* indexes into the temp registers */ 919 LLVMValueRef outputs_array; 920 LLVMValueRef pixel_offsets; 921 LLVMTypeRef float_ptr_type; 922 int i; 923 924 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 925 pixel_offsets = uint_bld->undef; 926 for (i = 0; i < bld->base.type.length; i++) { 927 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 928 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 929 ii, ii, ""); 930 } 931 932 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 933 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 934 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 935 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 936 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 937 938 float_ptr_type = 939 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 940 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, 941 float_ptr_type, ""); 942 943 /* Scatter store values into temp registers */ 944 emit_mask_scatter(bld, outputs_array, index_vec, value, 945 &bld->exec_mask, pred); 946 } 947 else { 948 LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index, 949 chan_index); 950 lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr); 951 } 952 break; 953 954 case TGSI_FILE_TEMPORARY: 955 if (reg->Register.Indirect) { 956 LLVMBuilderRef builder = bld->base.builder; 957 LLVMValueRef chan_vec = 958 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 959 LLVMValueRef length_vec = 960 lp_build_const_int_vec(gallivm, uint_bld->type, 961 bld->base.type.length); 962 LLVMValueRef index_vec; /* indexes into the temp registers */ 963 LLVMValueRef temps_array; 964 LLVMValueRef pixel_offsets; 965 LLVMTypeRef float_ptr_type; 966 int i; 967 968 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 969 pixel_offsets = uint_bld->undef; 970 for (i = 0; i < bld->base.type.length; i++) { 971 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 972 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 973 ii, ii, ""); 974 } 975 976 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 977 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 978 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 979 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 980 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 981 982 float_ptr_type = 983 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 984 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 985 float_ptr_type, ""); 986 987 /* Scatter store values into temp registers */ 988 emit_mask_scatter(bld, temps_array, index_vec, value, 989 &bld->exec_mask, pred); 990 } 991 else { 992 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 993 chan_index); 994 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); 995 } 996 break; 997 998 case TGSI_FILE_ADDRESS: 999 lp_exec_mask_store(&bld->exec_mask, pred, value, 1000 bld->addr[reg->Register.Index][chan_index]); 1001 break; 1002 1003 case TGSI_FILE_PREDICATE: 1004 lp_exec_mask_store(&bld->exec_mask, pred, value, 1005 bld->preds[reg->Register.Index][chan_index]); 1006 break; 1007 1008 default: 1009 assert( 0 ); 1010 } 1011} 1012 1013 1014/** 1015 * High-level instruction translators. 1016 */ 1017 1018static void 1019emit_tex( struct lp_build_tgsi_soa_context *bld, 1020 const struct tgsi_full_instruction *inst, 1021 enum lp_build_tex_modifier modifier, 1022 LLVMValueRef *texel) 1023{ 1024 unsigned unit; 1025 LLVMValueRef lod_bias, explicit_lod; 1026 LLVMValueRef oow = NULL; 1027 LLVMValueRef coords[3]; 1028 LLVMValueRef ddx[3]; 1029 LLVMValueRef ddy[3]; 1030 unsigned num_coords; 1031 unsigned i; 1032 1033 if (!bld->sampler) { 1034 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 1035 for (i = 0; i < 4; i++) { 1036 texel[i] = bld->base.undef; 1037 } 1038 return; 1039 } 1040 1041 switch (inst->Texture.Texture) { 1042 case TGSI_TEXTURE_1D: 1043 num_coords = 1; 1044 break; 1045 case TGSI_TEXTURE_2D: 1046 case TGSI_TEXTURE_RECT: 1047 num_coords = 2; 1048 break; 1049 case TGSI_TEXTURE_SHADOW1D: 1050 case TGSI_TEXTURE_SHADOW2D: 1051 case TGSI_TEXTURE_SHADOWRECT: 1052 case TGSI_TEXTURE_3D: 1053 case TGSI_TEXTURE_CUBE: 1054 num_coords = 3; 1055 break; 1056 default: 1057 assert(0); 1058 return; 1059 } 1060 1061 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 1062 lod_bias = emit_fetch( bld, inst, 0, 3 ); 1063 explicit_lod = NULL; 1064 } 1065 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 1066 lod_bias = NULL; 1067 explicit_lod = emit_fetch( bld, inst, 0, 3 ); 1068 } 1069 else { 1070 lod_bias = NULL; 1071 explicit_lod = NULL; 1072 } 1073 1074 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 1075 oow = emit_fetch( bld, inst, 0, 3 ); 1076 oow = lp_build_rcp(&bld->base, oow); 1077 } 1078 1079 for (i = 0; i < num_coords; i++) { 1080 coords[i] = emit_fetch( bld, inst, 0, i ); 1081 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 1082 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 1083 } 1084 for (i = num_coords; i < 3; i++) { 1085 coords[i] = bld->base.undef; 1086 } 1087 1088 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 1089 LLVMValueRef index0 = lp_build_const_int32(bld->base.gallivm, 0); 1090 for (i = 0; i < num_coords; i++) { 1091 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); 1092 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); 1093 ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, ""); 1094 ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, ""); 1095 } 1096 unit = inst->Src[3].Register.Index; 1097 } else { 1098 for (i = 0; i < num_coords; i++) { 1099 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); 1100 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); 1101 } 1102 unit = inst->Src[1].Register.Index; 1103 } 1104 for (i = num_coords; i < 3; i++) { 1105 ddx[i] = LLVMGetUndef(bld->base.elem_type); 1106 ddy[i] = LLVMGetUndef(bld->base.elem_type); 1107 } 1108 1109 bld->sampler->emit_fetch_texel(bld->sampler, 1110 bld->base.gallivm, 1111 bld->base.type, 1112 unit, num_coords, coords, 1113 ddx, ddy, 1114 lod_bias, explicit_lod, 1115 texel); 1116} 1117 1118static boolean 1119near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 1120 int pc) 1121{ 1122 int i; 1123 1124 for (i = 0; i < 5; i++) { 1125 unsigned opcode; 1126 1127 if (pc + i >= bld->info->num_instructions) 1128 return TRUE; 1129 1130 opcode = bld->instructions[pc + i].Instruction.Opcode; 1131 1132 if (opcode == TGSI_OPCODE_END) 1133 return TRUE; 1134 1135 if (opcode == TGSI_OPCODE_TEX || 1136 opcode == TGSI_OPCODE_TXP || 1137 opcode == TGSI_OPCODE_TXD || 1138 opcode == TGSI_OPCODE_TXB || 1139 opcode == TGSI_OPCODE_TXL || 1140 opcode == TGSI_OPCODE_TXF || 1141 opcode == TGSI_OPCODE_TXQ || 1142 opcode == TGSI_OPCODE_CAL || 1143 opcode == TGSI_OPCODE_CALLNZ || 1144 opcode == TGSI_OPCODE_IF || 1145 opcode == TGSI_OPCODE_IFC || 1146 opcode == TGSI_OPCODE_BGNLOOP || 1147 opcode == TGSI_OPCODE_SWITCH) 1148 return FALSE; 1149 } 1150 1151 return TRUE; 1152} 1153 1154 1155 1156/** 1157 * Kill fragment if any of the src register values are negative. 1158 */ 1159static void 1160emit_kil( 1161 struct lp_build_tgsi_soa_context *bld, 1162 const struct tgsi_full_instruction *inst, 1163 int pc) 1164{ 1165 const struct tgsi_full_src_register *reg = &inst->Src[0]; 1166 LLVMValueRef terms[NUM_CHANNELS]; 1167 LLVMValueRef mask; 1168 unsigned chan_index; 1169 1170 memset(&terms, 0, sizeof terms); 1171 1172 FOR_EACH_CHANNEL( chan_index ) { 1173 unsigned swizzle; 1174 1175 /* Unswizzle channel */ 1176 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1177 1178 /* Check if the component has not been already tested. */ 1179 assert(swizzle < NUM_CHANNELS); 1180 if( !terms[swizzle] ) 1181 /* TODO: change the comparison operator instead of setting the sign */ 1182 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 1183 } 1184 1185 mask = NULL; 1186 FOR_EACH_CHANNEL( chan_index ) { 1187 if(terms[chan_index]) { 1188 LLVMValueRef chan_mask; 1189 1190 /* 1191 * If term < 0 then mask = 0 else mask = ~0. 1192 */ 1193 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 1194 1195 if(mask) 1196 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); 1197 else 1198 mask = chan_mask; 1199 } 1200 } 1201 1202 if(mask) { 1203 lp_build_mask_update(bld->mask, mask); 1204 1205 if (!near_end_of_shader(bld, pc)) 1206 lp_build_mask_check(bld->mask); 1207 } 1208} 1209 1210 1211/** 1212 * Predicated fragment kill. 1213 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 1214 * The only predication is the execution mask which will apply if 1215 * we're inside a loop or conditional. 1216 */ 1217static void 1218emit_kilp(struct lp_build_tgsi_soa_context *bld, 1219 const struct tgsi_full_instruction *inst, 1220 int pc) 1221{ 1222 LLVMValueRef mask; 1223 1224 /* For those channels which are "alive", disable fragment shader 1225 * execution. 1226 */ 1227 if (bld->exec_mask.has_mask) { 1228 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); 1229 } 1230 else { 1231 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type); 1232 mask = zero; 1233 } 1234 1235 lp_build_mask_update(bld->mask, mask); 1236 1237 if (!near_end_of_shader(bld, pc)) 1238 lp_build_mask_check(bld->mask); 1239} 1240 1241 1242/** 1243 * Emit code which will dump the value of all the temporary registers 1244 * to stdout. 1245 */ 1246static void 1247emit_dump_temps(struct lp_build_tgsi_soa_context *bld) 1248{ 1249 struct gallivm_state *gallivm = bld->base.gallivm; 1250 LLVMBuilderRef builder = gallivm->builder; 1251 LLVMValueRef temp_ptr; 1252 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); 1253 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1); 1254 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); 1255 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); 1256 int index; 1257 int n = bld->info->file_max[TGSI_FILE_TEMPORARY]; 1258 1259 for (index = 0; index < n; index++) { 1260 LLVMValueRef idx = lp_build_const_int32(gallivm, index); 1261 LLVMValueRef v[4][4], res; 1262 int chan; 1263 1264 lp_build_printf(gallivm, "TEMP[%d]:\n", idx); 1265 1266 for (chan = 0; chan < 4; chan++) { 1267 temp_ptr = get_temp_ptr(bld, index, chan); 1268 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 1269 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); 1270 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); 1271 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); 1272 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); 1273 } 1274 1275 lp_build_printf(gallivm, " X: %f %f %f %f\n", 1276 v[0][0], v[0][1], v[0][2], v[0][3]); 1277 lp_build_printf(gallivm, " Y: %f %f %f %f\n", 1278 v[1][0], v[1][1], v[1][2], v[1][3]); 1279 lp_build_printf(gallivm, " Z: %f %f %f %f\n", 1280 v[2][0], v[2][1], v[2][2], v[2][3]); 1281 lp_build_printf(gallivm, " W: %f %f %f %f\n", 1282 v[3][0], v[3][1], v[3][2], v[3][3]); 1283 } 1284} 1285 1286 1287 1288static void 1289emit_declaration( 1290 struct lp_build_tgsi_soa_context *bld, 1291 const struct tgsi_full_declaration *decl) 1292{ 1293 struct gallivm_state *gallivm = bld->base.gallivm; 1294 LLVMTypeRef vec_type = bld->base.vec_type; 1295 const unsigned first = decl->Range.First; 1296 const unsigned last = decl->Range.Last; 1297 unsigned idx, i; 1298 1299 for (idx = first; idx <= last; ++idx) { 1300 assert(last <= bld->info->file_max[decl->Declaration.File]); 1301 switch (decl->Declaration.File) { 1302 case TGSI_FILE_TEMPORARY: 1303 assert(idx < LP_MAX_TGSI_TEMPS); 1304 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 1305 for (i = 0; i < NUM_CHANNELS; i++) 1306 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); 1307 } 1308 break; 1309 1310 case TGSI_FILE_OUTPUT: 1311 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 1312 for (i = 0; i < NUM_CHANNELS; i++) 1313 bld->outputs[idx][i] = lp_build_alloca(gallivm, 1314 vec_type, "output"); 1315 } 1316 break; 1317 1318 case TGSI_FILE_ADDRESS: 1319 assert(idx < LP_MAX_TGSI_ADDRS); 1320 for (i = 0; i < NUM_CHANNELS; i++) 1321 bld->addr[idx][i] = lp_build_alloca(gallivm, vec_type, "addr"); 1322 break; 1323 1324 case TGSI_FILE_PREDICATE: 1325 assert(idx < LP_MAX_TGSI_PREDS); 1326 for (i = 0; i < NUM_CHANNELS; i++) 1327 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, 1328 "predicate"); 1329 break; 1330 1331 default: 1332 /* don't need to declare other vars */ 1333 break; 1334 } 1335 } 1336} 1337 1338 1339/** 1340 * Emit LLVM for one TGSI instruction. 1341 * \param return TRUE for success, FALSE otherwise 1342 */ 1343static boolean 1344emit_instruction( 1345 struct lp_build_tgsi_soa_context *bld, 1346 const struct tgsi_full_instruction *inst, 1347 const struct tgsi_opcode_info *info, 1348 int *pc) 1349{ 1350 unsigned chan_index; 1351 LLVMValueRef src0, src1, src2; 1352 LLVMValueRef tmp0, tmp1, tmp2; 1353 LLVMValueRef tmp3 = NULL; 1354 LLVMValueRef tmp4 = NULL; 1355 LLVMValueRef tmp5 = NULL; 1356 LLVMValueRef tmp6 = NULL; 1357 LLVMValueRef tmp7 = NULL; 1358 LLVMValueRef res; 1359 LLVMValueRef dst0[NUM_CHANNELS]; 1360 1361 /* 1362 * Stores and write masks are handled in a general fashion after the long 1363 * instruction opcode switch statement. 1364 * 1365 * Although not stricitly necessary, we avoid generating instructions for 1366 * channels which won't be stored, in cases where's that easy. For some 1367 * complex instructions, like texture sampling, it is more convenient to 1368 * assume a full writemask and then let LLVM optimization passes eliminate 1369 * redundant code. 1370 */ 1371 1372 (*pc)++; 1373 1374 assert(info->num_dst <= 1); 1375 if (info->num_dst) { 1376 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1377 dst0[chan_index] = bld->base.undef; 1378 } 1379 } 1380 1381 switch (inst->Instruction.Opcode) { 1382 case TGSI_OPCODE_ARL: 1383 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1384 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1385 tmp0 = lp_build_floor(&bld->base, tmp0); 1386 dst0[chan_index] = tmp0; 1387 } 1388 break; 1389 1390 case TGSI_OPCODE_MOV: 1391 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1392 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 1393 } 1394 break; 1395 1396 case TGSI_OPCODE_LIT: 1397 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 1398 dst0[CHAN_X] = bld->base.one; 1399 } 1400 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1401 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1402 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 1403 } 1404 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1405 /* XMM[1] = SrcReg[0].yyyy */ 1406 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1407 /* XMM[1] = max(XMM[1], 0) */ 1408 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 1409 /* XMM[2] = SrcReg[0].wwww */ 1410 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 1411 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 1412 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1413 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 1414 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 1415 } 1416 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 1417 dst0[CHAN_W] = bld->base.one; 1418 } 1419 break; 1420 1421 case TGSI_OPCODE_RCP: 1422 /* TGSI_OPCODE_RECIP */ 1423 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1424 res = lp_build_rcp(&bld->base, src0); 1425 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1426 dst0[chan_index] = res; 1427 } 1428 break; 1429 1430 case TGSI_OPCODE_RSQ: 1431 /* TGSI_OPCODE_RECIPSQRT */ 1432 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1433 src0 = lp_build_abs(&bld->base, src0); 1434 res = lp_build_rsqrt(&bld->base, src0); 1435 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1436 dst0[chan_index] = res; 1437 } 1438 break; 1439 1440 case TGSI_OPCODE_EXP: 1441 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1442 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1443 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1444 LLVMValueRef *p_exp2_int_part = NULL; 1445 LLVMValueRef *p_frac_part = NULL; 1446 LLVMValueRef *p_exp2 = NULL; 1447 1448 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1449 1450 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1451 p_exp2_int_part = &tmp0; 1452 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1453 p_frac_part = &tmp1; 1454 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1455 p_exp2 = &tmp2; 1456 1457 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 1458 1459 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1460 dst0[CHAN_X] = tmp0; 1461 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1462 dst0[CHAN_Y] = tmp1; 1463 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1464 dst0[CHAN_Z] = tmp2; 1465 } 1466 /* dst.w = 1.0 */ 1467 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1468 dst0[CHAN_W] = bld->base.one; 1469 } 1470 break; 1471 1472 case TGSI_OPCODE_LOG: 1473 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1474 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1475 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1476 LLVMValueRef *p_floor_log2 = NULL; 1477 LLVMValueRef *p_exp = NULL; 1478 LLVMValueRef *p_log2 = NULL; 1479 1480 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1481 src0 = lp_build_abs( &bld->base, src0 ); 1482 1483 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1484 p_floor_log2 = &tmp0; 1485 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1486 p_exp = &tmp1; 1487 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1488 p_log2 = &tmp2; 1489 1490 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 1491 1492 /* dst.x = floor(lg2(abs(src.x))) */ 1493 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1494 dst0[CHAN_X] = tmp0; 1495 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 1496 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 1497 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 1498 } 1499 /* dst.z = lg2(abs(src.x)) */ 1500 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1501 dst0[CHAN_Z] = tmp2; 1502 } 1503 /* dst.w = 1.0 */ 1504 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1505 dst0[CHAN_W] = bld->base.one; 1506 } 1507 break; 1508 1509 case TGSI_OPCODE_MUL: 1510 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1511 src0 = emit_fetch( bld, inst, 0, chan_index ); 1512 src1 = emit_fetch( bld, inst, 1, chan_index ); 1513 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 1514 } 1515 break; 1516 1517 case TGSI_OPCODE_ADD: 1518 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1519 src0 = emit_fetch( bld, inst, 0, chan_index ); 1520 src1 = emit_fetch( bld, inst, 1, chan_index ); 1521 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 1522 } 1523 break; 1524 1525 case TGSI_OPCODE_DP3: 1526 /* TGSI_OPCODE_DOT3 */ 1527 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1528 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1529 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1530 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1531 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1532 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1533 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1534 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1535 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1536 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1537 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1538 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1539 dst0[chan_index] = tmp0; 1540 } 1541 break; 1542 1543 case TGSI_OPCODE_DP4: 1544 /* TGSI_OPCODE_DOT4 */ 1545 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1546 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1547 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1548 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1549 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1550 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1551 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1552 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1553 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1554 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1555 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1556 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1557 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1558 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1559 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1560 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1561 dst0[chan_index] = tmp0; 1562 } 1563 break; 1564 1565 case TGSI_OPCODE_DST: 1566 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1567 dst0[CHAN_X] = bld->base.one; 1568 } 1569 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1570 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1571 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1572 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1573 } 1574 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1575 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1576 } 1577 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1578 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1579 } 1580 break; 1581 1582 case TGSI_OPCODE_MIN: 1583 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1584 src0 = emit_fetch( bld, inst, 0, chan_index ); 1585 src1 = emit_fetch( bld, inst, 1, chan_index ); 1586 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1587 } 1588 break; 1589 1590 case TGSI_OPCODE_MAX: 1591 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1592 src0 = emit_fetch( bld, inst, 0, chan_index ); 1593 src1 = emit_fetch( bld, inst, 1, chan_index ); 1594 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1595 } 1596 break; 1597 1598 case TGSI_OPCODE_SLT: 1599 /* TGSI_OPCODE_SETLT */ 1600 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1601 src0 = emit_fetch( bld, inst, 0, chan_index ); 1602 src1 = emit_fetch( bld, inst, 1, chan_index ); 1603 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1604 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1605 } 1606 break; 1607 1608 case TGSI_OPCODE_SGE: 1609 /* TGSI_OPCODE_SETGE */ 1610 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1611 src0 = emit_fetch( bld, inst, 0, chan_index ); 1612 src1 = emit_fetch( bld, inst, 1, chan_index ); 1613 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1614 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1615 } 1616 break; 1617 1618 case TGSI_OPCODE_MAD: 1619 /* TGSI_OPCODE_MADD */ 1620 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1621 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1622 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1623 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1624 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1625 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1626 dst0[chan_index] = tmp0; 1627 } 1628 break; 1629 1630 case TGSI_OPCODE_SUB: 1631 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1632 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1633 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1634 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1635 } 1636 break; 1637 1638 case TGSI_OPCODE_LRP: 1639 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1640 src0 = emit_fetch( bld, inst, 0, chan_index ); 1641 src1 = emit_fetch( bld, inst, 1, chan_index ); 1642 src2 = emit_fetch( bld, inst, 2, chan_index ); 1643 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1644 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1645 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1646 } 1647 break; 1648 1649 case TGSI_OPCODE_CND: 1650 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1651 src0 = emit_fetch( bld, inst, 0, chan_index ); 1652 src1 = emit_fetch( bld, inst, 1, chan_index ); 1653 src2 = emit_fetch( bld, inst, 2, chan_index ); 1654 tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5); 1655 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1656 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1657 } 1658 break; 1659 1660 case TGSI_OPCODE_DP2A: 1661 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1662 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1663 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1664 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1665 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1666 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1667 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1668 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1669 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1670 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1671 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1672 } 1673 break; 1674 1675 case TGSI_OPCODE_FRC: 1676 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1677 src0 = emit_fetch( bld, inst, 0, chan_index ); 1678 tmp0 = lp_build_floor(&bld->base, src0); 1679 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1680 dst0[chan_index] = tmp0; 1681 } 1682 break; 1683 1684 case TGSI_OPCODE_CLAMP: 1685 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1686 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1687 src1 = emit_fetch( bld, inst, 1, chan_index ); 1688 src2 = emit_fetch( bld, inst, 2, chan_index ); 1689 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1690 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1691 dst0[chan_index] = tmp0; 1692 } 1693 break; 1694 1695 case TGSI_OPCODE_FLR: 1696 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1697 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1698 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1699 } 1700 break; 1701 1702 case TGSI_OPCODE_ROUND: 1703 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1704 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1705 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1706 } 1707 break; 1708 1709 case TGSI_OPCODE_EX2: { 1710 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1711 tmp0 = lp_build_exp2( &bld->base, tmp0); 1712 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1713 dst0[chan_index] = tmp0; 1714 } 1715 break; 1716 } 1717 1718 case TGSI_OPCODE_LG2: 1719 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1720 tmp0 = lp_build_log2( &bld->base, tmp0); 1721 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1722 dst0[chan_index] = tmp0; 1723 } 1724 break; 1725 1726 case TGSI_OPCODE_POW: 1727 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1728 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1729 res = lp_build_pow( &bld->base, src0, src1 ); 1730 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1731 dst0[chan_index] = res; 1732 } 1733 break; 1734 1735 case TGSI_OPCODE_XPD: 1736 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1737 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1738 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1739 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1740 } 1741 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1742 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1743 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1744 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1745 } 1746 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1747 tmp2 = tmp0; 1748 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1749 tmp5 = tmp3; 1750 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1751 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1752 dst0[CHAN_X] = tmp2; 1753 } 1754 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1755 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1756 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1757 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1758 } 1759 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1760 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1761 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1762 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1763 dst0[CHAN_Y] = tmp3; 1764 } 1765 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1766 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1767 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1768 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1769 dst0[CHAN_Z] = tmp5; 1770 } 1771 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1772 dst0[CHAN_W] = bld->base.one; 1773 } 1774 break; 1775 1776 case TGSI_OPCODE_ABS: 1777 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1778 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1779 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1780 } 1781 break; 1782 1783 case TGSI_OPCODE_RCC: 1784 /* deprecated? */ 1785 assert(0); 1786 return FALSE; 1787 1788 case TGSI_OPCODE_DPH: 1789 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1790 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1791 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1792 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1793 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1794 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1795 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1796 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1797 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1798 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1799 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1800 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1801 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1802 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1803 dst0[chan_index] = tmp0; 1804 } 1805 break; 1806 1807 case TGSI_OPCODE_COS: 1808 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1809 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1810 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1811 dst0[chan_index] = tmp0; 1812 } 1813 break; 1814 1815 case TGSI_OPCODE_DDX: 1816 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1817 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1818 } 1819 break; 1820 1821 case TGSI_OPCODE_DDY: 1822 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1823 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1824 } 1825 break; 1826 1827 case TGSI_OPCODE_KILP: 1828 /* predicated kill */ 1829 emit_kilp( bld, inst, (*pc)-1 ); 1830 break; 1831 1832 case TGSI_OPCODE_KIL: 1833 /* conditional kill */ 1834 emit_kil( bld, inst, (*pc)-1 ); 1835 break; 1836 1837 case TGSI_OPCODE_PK2H: 1838 return FALSE; 1839 break; 1840 1841 case TGSI_OPCODE_PK2US: 1842 return FALSE; 1843 break; 1844 1845 case TGSI_OPCODE_PK4B: 1846 return FALSE; 1847 break; 1848 1849 case TGSI_OPCODE_PK4UB: 1850 return FALSE; 1851 break; 1852 1853 case TGSI_OPCODE_RFL: 1854 return FALSE; 1855 break; 1856 1857 case TGSI_OPCODE_SEQ: 1858 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1859 src0 = emit_fetch( bld, inst, 0, chan_index ); 1860 src1 = emit_fetch( bld, inst, 1, chan_index ); 1861 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1862 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1863 } 1864 break; 1865 1866 case TGSI_OPCODE_SFL: 1867 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1868 dst0[chan_index] = bld->base.zero; 1869 } 1870 break; 1871 1872 case TGSI_OPCODE_SGT: 1873 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1874 src0 = emit_fetch( bld, inst, 0, chan_index ); 1875 src1 = emit_fetch( bld, inst, 1, chan_index ); 1876 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1877 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1878 } 1879 break; 1880 1881 case TGSI_OPCODE_SIN: 1882 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1883 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1884 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1885 dst0[chan_index] = tmp0; 1886 } 1887 break; 1888 1889 case TGSI_OPCODE_SLE: 1890 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1891 src0 = emit_fetch( bld, inst, 0, chan_index ); 1892 src1 = emit_fetch( bld, inst, 1, chan_index ); 1893 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1894 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1895 } 1896 break; 1897 1898 case TGSI_OPCODE_SNE: 1899 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1900 src0 = emit_fetch( bld, inst, 0, chan_index ); 1901 src1 = emit_fetch( bld, inst, 1, chan_index ); 1902 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1903 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1904 } 1905 break; 1906 1907 case TGSI_OPCODE_STR: 1908 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1909 dst0[chan_index] = bld->base.one; 1910 } 1911 break; 1912 1913 case TGSI_OPCODE_TEX: 1914 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); 1915 break; 1916 1917 case TGSI_OPCODE_TXD: 1918 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1919 break; 1920 1921 case TGSI_OPCODE_UP2H: 1922 /* deprecated */ 1923 assert (0); 1924 return FALSE; 1925 break; 1926 1927 case TGSI_OPCODE_UP2US: 1928 /* deprecated */ 1929 assert(0); 1930 return FALSE; 1931 break; 1932 1933 case TGSI_OPCODE_UP4B: 1934 /* deprecated */ 1935 assert(0); 1936 return FALSE; 1937 break; 1938 1939 case TGSI_OPCODE_UP4UB: 1940 /* deprecated */ 1941 assert(0); 1942 return FALSE; 1943 break; 1944 1945 case TGSI_OPCODE_X2D: 1946 /* deprecated? */ 1947 assert(0); 1948 return FALSE; 1949 break; 1950 1951 case TGSI_OPCODE_ARA: 1952 /* deprecated */ 1953 assert(0); 1954 return FALSE; 1955 break; 1956 1957 case TGSI_OPCODE_ARR: 1958 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1959 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1960 tmp0 = lp_build_round(&bld->base, tmp0); 1961 dst0[chan_index] = tmp0; 1962 } 1963 break; 1964 1965 case TGSI_OPCODE_BRA: 1966 /* deprecated */ 1967 assert(0); 1968 return FALSE; 1969 break; 1970 1971 case TGSI_OPCODE_CAL: 1972 lp_exec_mask_call(&bld->exec_mask, 1973 inst->Label.Label, 1974 pc); 1975 1976 break; 1977 1978 case TGSI_OPCODE_RET: 1979 lp_exec_mask_ret(&bld->exec_mask, pc); 1980 break; 1981 1982 case TGSI_OPCODE_END: 1983 if (0) { 1984 /* for debugging */ 1985 emit_dump_temps(bld); 1986 } 1987 *pc = -1; 1988 break; 1989 1990 case TGSI_OPCODE_SSG: 1991 /* TGSI_OPCODE_SGN */ 1992 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1993 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1994 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 1995 } 1996 break; 1997 1998 case TGSI_OPCODE_CMP: 1999 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2000 src0 = emit_fetch( bld, inst, 0, chan_index ); 2001 src1 = emit_fetch( bld, inst, 1, chan_index ); 2002 src2 = emit_fetch( bld, inst, 2, chan_index ); 2003 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 2004 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 2005 } 2006 break; 2007 2008 case TGSI_OPCODE_SCS: 2009 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 2010 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 2011 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 2012 } 2013 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 2014 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 2015 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 2016 } 2017 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 2018 dst0[CHAN_Z] = bld->base.zero; 2019 } 2020 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 2021 dst0[CHAN_W] = bld->base.one; 2022 } 2023 break; 2024 2025 case TGSI_OPCODE_TXB: 2026 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); 2027 break; 2028 2029 case TGSI_OPCODE_NRM: 2030 /* fall-through */ 2031 case TGSI_OPCODE_NRM4: 2032 /* 3 or 4-component normalization */ 2033 { 2034 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 2035 2036 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 2037 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 2038 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 2039 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 2040 2041 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 2042 2043 /* xmm4 = src.x */ 2044 /* xmm0 = src.x * src.x */ 2045 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 2046 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 2047 tmp4 = tmp0; 2048 } 2049 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 2050 2051 /* xmm5 = src.y */ 2052 /* xmm0 = xmm0 + src.y * src.y */ 2053 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 2054 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 2055 tmp5 = tmp1; 2056 } 2057 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2058 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2059 2060 /* xmm6 = src.z */ 2061 /* xmm0 = xmm0 + src.z * src.z */ 2062 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 2063 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 2064 tmp6 = tmp1; 2065 } 2066 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2067 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2068 2069 if (dims == 4) { 2070 /* xmm7 = src.w */ 2071 /* xmm0 = xmm0 + src.w * src.w */ 2072 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 2073 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 2074 tmp7 = tmp1; 2075 } 2076 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2077 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2078 } 2079 2080 /* xmm1 = 1 / sqrt(xmm0) */ 2081 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 2082 2083 /* dst.x = xmm1 * src.x */ 2084 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 2085 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 2086 } 2087 2088 /* dst.y = xmm1 * src.y */ 2089 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 2090 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 2091 } 2092 2093 /* dst.z = xmm1 * src.z */ 2094 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 2095 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 2096 } 2097 2098 /* dst.w = xmm1 * src.w */ 2099 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 2100 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 2101 } 2102 } 2103 2104 /* dst.w = 1.0 */ 2105 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 2106 dst0[CHAN_W] = bld->base.one; 2107 } 2108 } 2109 break; 2110 2111 case TGSI_OPCODE_DIV: 2112 /* deprecated */ 2113 assert( 0 ); 2114 return FALSE; 2115 break; 2116 2117 case TGSI_OPCODE_DP2: 2118 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 2119 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 2120 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 2121 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 2122 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 2123 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 2124 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 2125 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2126 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 2127 } 2128 break; 2129 2130 case TGSI_OPCODE_TXL: 2131 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 2132 break; 2133 2134 case TGSI_OPCODE_TXP: 2135 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); 2136 break; 2137 2138 case TGSI_OPCODE_BRK: 2139 lp_exec_break(&bld->exec_mask); 2140 break; 2141 2142 case TGSI_OPCODE_IF: 2143 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 2144 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 2145 tmp0, bld->base.zero); 2146 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 2147 break; 2148 2149 case TGSI_OPCODE_BGNLOOP: 2150 lp_exec_bgnloop(&bld->exec_mask); 2151 break; 2152 2153 case TGSI_OPCODE_BGNSUB: 2154 lp_exec_mask_bgnsub(&bld->exec_mask); 2155 break; 2156 2157 case TGSI_OPCODE_ELSE: 2158 lp_exec_mask_cond_invert(&bld->exec_mask); 2159 break; 2160 2161 case TGSI_OPCODE_ENDIF: 2162 lp_exec_mask_cond_pop(&bld->exec_mask); 2163 break; 2164 2165 case TGSI_OPCODE_ENDLOOP: 2166 lp_exec_endloop(bld->base.gallivm, &bld->exec_mask); 2167 break; 2168 2169 case TGSI_OPCODE_ENDSUB: 2170 lp_exec_mask_endsub(&bld->exec_mask, pc); 2171 break; 2172 2173 case TGSI_OPCODE_PUSHA: 2174 /* deprecated? */ 2175 assert(0); 2176 return FALSE; 2177 break; 2178 2179 case TGSI_OPCODE_POPA: 2180 /* deprecated? */ 2181 assert(0); 2182 return FALSE; 2183 break; 2184 2185 case TGSI_OPCODE_CEIL: 2186 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2187 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2188 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 2189 } 2190 break; 2191 2192 case TGSI_OPCODE_I2F: 2193 /* deprecated? */ 2194 assert(0); 2195 return FALSE; 2196 break; 2197 2198 case TGSI_OPCODE_NOT: 2199 /* deprecated? */ 2200 assert(0); 2201 return FALSE; 2202 break; 2203 2204 case TGSI_OPCODE_TRUNC: 2205 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2206 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2207 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 2208 } 2209 break; 2210 2211 case TGSI_OPCODE_SHL: 2212 /* deprecated? */ 2213 assert(0); 2214 return FALSE; 2215 break; 2216 2217 case TGSI_OPCODE_ISHR: 2218 /* deprecated? */ 2219 assert(0); 2220 return FALSE; 2221 break; 2222 2223 case TGSI_OPCODE_AND: 2224 /* deprecated? */ 2225 assert(0); 2226 return FALSE; 2227 break; 2228 2229 case TGSI_OPCODE_OR: 2230 /* deprecated? */ 2231 assert(0); 2232 return FALSE; 2233 break; 2234 2235 case TGSI_OPCODE_MOD: 2236 /* deprecated? */ 2237 assert(0); 2238 return FALSE; 2239 break; 2240 2241 case TGSI_OPCODE_XOR: 2242 /* deprecated? */ 2243 assert(0); 2244 return FALSE; 2245 break; 2246 2247 case TGSI_OPCODE_SAD: 2248 /* deprecated? */ 2249 assert(0); 2250 return FALSE; 2251 break; 2252 2253 case TGSI_OPCODE_TXF: 2254 /* deprecated? */ 2255 assert(0); 2256 return FALSE; 2257 break; 2258 2259 case TGSI_OPCODE_TXQ: 2260 /* deprecated? */ 2261 assert(0); 2262 return FALSE; 2263 break; 2264 2265 case TGSI_OPCODE_CONT: 2266 lp_exec_continue(&bld->exec_mask); 2267 break; 2268 2269 case TGSI_OPCODE_EMIT: 2270 return FALSE; 2271 break; 2272 2273 case TGSI_OPCODE_ENDPRIM: 2274 return FALSE; 2275 break; 2276 2277 case TGSI_OPCODE_NOP: 2278 break; 2279 2280 default: 2281 return FALSE; 2282 } 2283 2284 if(info->num_dst) { 2285 LLVMValueRef pred[NUM_CHANNELS]; 2286 2287 emit_fetch_predicate( bld, inst, pred ); 2288 2289 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2290 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); 2291 } 2292 } 2293 2294 return TRUE; 2295} 2296 2297 2298void 2299lp_build_tgsi_soa(struct gallivm_state *gallivm, 2300 const struct tgsi_token *tokens, 2301 struct lp_type type, 2302 struct lp_build_mask_context *mask, 2303 LLVMValueRef consts_ptr, 2304 const LLVMValueRef *pos, 2305 const LLVMValueRef (*inputs)[NUM_CHANNELS], 2306 LLVMValueRef (*outputs)[NUM_CHANNELS], 2307 struct lp_build_sampler_soa *sampler, 2308 const struct tgsi_shader_info *info) 2309{ 2310 struct lp_build_tgsi_soa_context bld; 2311 struct tgsi_parse_context parse; 2312 uint num_immediates = 0; 2313 uint num_instructions = 0; 2314 unsigned i; 2315 int pc = 0; 2316 2317 struct lp_type res_type; 2318 2319 assert(type.length <= LP_MAX_VECTOR_LENGTH); 2320 memset(&res_type, 0, sizeof res_type); 2321 res_type.width = type.width; 2322 res_type.length = type.length; 2323 res_type.sign = 1; 2324 2325 /* Setup build context */ 2326 memset(&bld, 0, sizeof bld); 2327 lp_build_context_init(&bld.base, gallivm, type); 2328 lp_build_context_init(&bld.uint_bld, gallivm, lp_uint_type(type)); 2329 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); 2330 bld.mask = mask; 2331 bld.pos = pos; 2332 bld.inputs = inputs; 2333 bld.outputs = outputs; 2334 bld.consts_ptr = consts_ptr; 2335 bld.sampler = sampler; 2336 bld.info = info; 2337 bld.indirect_files = info->indirect_files; 2338 bld.instructions = (struct tgsi_full_instruction *) 2339 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); 2340 bld.max_instructions = LP_MAX_INSTRUCTIONS; 2341 2342 if (!bld.instructions) { 2343 return; 2344 } 2345 2346 lp_exec_mask_init(&bld.exec_mask, &bld.base); 2347 2348 if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 2349 LLVMValueRef array_size = 2350 lp_build_const_int32(gallivm, 2351 info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); 2352 bld.temps_array = lp_build_array_alloca(gallivm, 2353 bld.base.vec_type, array_size, 2354 "temp_array"); 2355 } 2356 2357 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2358 LLVMValueRef array_size = 2359 lp_build_const_int32(gallivm, 2360 info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); 2361 bld.outputs_array = lp_build_array_alloca(gallivm, 2362 bld.base.vec_type, array_size, 2363 "output_array"); 2364 } 2365 2366 /* If we have indirect addressing in inputs we need to copy them into 2367 * our alloca array to be able to iterate over them */ 2368 if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) { 2369 unsigned index, chan; 2370 LLVMTypeRef vec_type = bld.base.vec_type; 2371 LLVMValueRef array_size = 2372 lp_build_const_int32(gallivm, info->file_max[TGSI_FILE_INPUT]*4 + 4); 2373 bld.inputs_array = lp_build_array_alloca(gallivm, 2374 vec_type, array_size, 2375 "input_array"); 2376 2377 assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1); 2378 2379 for (index = 0; index < info->num_inputs; ++index) { 2380 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2381 LLVMValueRef lindex = 2382 lp_build_const_int32(gallivm, index * 4 + chan); 2383 LLVMValueRef input_ptr = 2384 LLVMBuildGEP(bld.base.builder, bld.inputs_array, 2385 &lindex, 1, ""); 2386 LLVMValueRef value = bld.inputs[index][chan]; 2387 if (value) 2388 LLVMBuildStore(bld.base.builder, value, input_ptr); 2389 } 2390 } 2391 } 2392 2393 tgsi_parse_init( &parse, tokens ); 2394 2395 while( !tgsi_parse_end_of_tokens( &parse ) ) { 2396 tgsi_parse_token( &parse ); 2397 2398 switch( parse.FullToken.Token.Type ) { 2399 case TGSI_TOKEN_TYPE_DECLARATION: 2400 /* Inputs already interpolated */ 2401 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 2402 break; 2403 2404 case TGSI_TOKEN_TYPE_INSTRUCTION: 2405 { 2406 /* save expanded instruction */ 2407 if (num_instructions == bld.max_instructions) { 2408 struct tgsi_full_instruction *instructions; 2409 instructions = REALLOC(bld.instructions, 2410 bld.max_instructions 2411 * sizeof(struct tgsi_full_instruction), 2412 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 2413 * sizeof(struct tgsi_full_instruction)); 2414 if (!instructions) { 2415 break; 2416 } 2417 bld.instructions = instructions; 2418 bld.max_instructions += LP_MAX_INSTRUCTIONS; 2419 } 2420 2421 memcpy(bld.instructions + num_instructions, 2422 &parse.FullToken.FullInstruction, 2423 sizeof(bld.instructions[0])); 2424 2425 num_instructions++; 2426 } 2427 2428 break; 2429 2430 case TGSI_TOKEN_TYPE_IMMEDIATE: 2431 /* simply copy the immediate values into the next immediates[] slot */ 2432 { 2433 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 2434 assert(size <= 4); 2435 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 2436 for( i = 0; i < size; ++i ) 2437 bld.immediates[num_immediates][i] = 2438 lp_build_const_vec(gallivm, type, parse.FullToken.FullImmediate.u[i].Float); 2439 for( i = size; i < 4; ++i ) 2440 bld.immediates[num_immediates][i] = bld.base.undef; 2441 num_immediates++; 2442 } 2443 break; 2444 2445 case TGSI_TOKEN_TYPE_PROPERTY: 2446 break; 2447 2448 default: 2449 assert( 0 ); 2450 } 2451 } 2452 2453 while (pc != -1) { 2454 struct tgsi_full_instruction *instr = bld.instructions + pc; 2455 const struct tgsi_opcode_info *opcode_info = 2456 tgsi_get_opcode_info(instr->Instruction.Opcode); 2457 if (!emit_instruction( &bld, instr, opcode_info, &pc )) 2458 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 2459 opcode_info->mnemonic); 2460 } 2461 2462 /* If we have indirect addressing in outputs we need to copy our alloca array 2463 * to the outputs slots specified by the called */ 2464 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2465 unsigned index, chan; 2466 assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1); 2467 for (index = 0; index < info->num_outputs; ++index) { 2468 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2469 bld.outputs[index][chan] = get_output_ptr(&bld, index, chan); 2470 } 2471 } 2472 } 2473 2474 if (0) { 2475 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 2476 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2477 debug_printf("11111111111111111111111111111 \n"); 2478 tgsi_dump(tokens, 0); 2479 lp_debug_dump_value(function); 2480 debug_printf("2222222222222222222222222222 \n"); 2481 } 2482 tgsi_parse_free( &parse ); 2483 2484 if (0) { 2485 LLVMModuleRef module = LLVMGetGlobalParent( 2486 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); 2487 LLVMDumpModule(module); 2488 2489 } 2490 2491 FREE( bld.instructions ); 2492} 2493 2494