lp_bld_tgsi_soa.c revision c790c2c7598dea2d5a5b0bfbe47732956e1e89a6
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_exec.h" 46#include "tgsi/tgsi_info.h" 47#include "tgsi/tgsi_parse.h" 48#include "tgsi/tgsi_util.h" 49#include "tgsi/tgsi_scan.h" 50#include "lp_bld_tgsi_action.h" 51#include "lp_bld_type.h" 52#include "lp_bld_const.h" 53#include "lp_bld_arit.h" 54#include "lp_bld_bitarit.h" 55#include "lp_bld_gather.h" 56#include "lp_bld_init.h" 57#include "lp_bld_logic.h" 58#include "lp_bld_swizzle.h" 59#include "lp_bld_flow.h" 60#include "lp_bld_quad.h" 61#include "lp_bld_tgsi.h" 62#include "lp_bld_limits.h" 63#include "lp_bld_debug.h" 64#include "lp_bld_printf.h" 65 66 67static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 68{ 69 LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context); 70 LLVMBuilderRef builder = bld->gallivm->builder; 71 72 mask->bld = bld; 73 mask->has_mask = FALSE; 74 mask->cond_stack_size = 0; 75 mask->loop_stack_size = 0; 76 mask->call_stack_size = 0; 77 78 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); 79 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 80 LLVMConstAllOnes(mask->int_vec_type); 81 82 mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter"); 83 84 LLVMBuildStore( 85 builder, 86 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false), 87 mask->loop_limiter); 88} 89 90static void lp_exec_mask_update(struct lp_exec_mask *mask) 91{ 92 LLVMBuilderRef builder = mask->bld->gallivm->builder; 93 94 if (mask->loop_stack_size) { 95 /*for loops we need to update the entire mask at runtime */ 96 LLVMValueRef tmp; 97 assert(mask->break_mask); 98 tmp = LLVMBuildAnd(builder, 99 mask->cont_mask, 100 mask->break_mask, 101 "maskcb"); 102 mask->exec_mask = LLVMBuildAnd(builder, 103 mask->cond_mask, 104 tmp, 105 "maskfull"); 106 } else 107 mask->exec_mask = mask->cond_mask; 108 109 if (mask->call_stack_size) { 110 mask->exec_mask = LLVMBuildAnd(builder, 111 mask->exec_mask, 112 mask->ret_mask, 113 "callmask"); 114 } 115 116 mask->has_mask = (mask->cond_stack_size > 0 || 117 mask->loop_stack_size > 0 || 118 mask->call_stack_size > 0); 119} 120 121static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 122 LLVMValueRef val) 123{ 124 LLVMBuilderRef builder = mask->bld->gallivm->builder; 125 126 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 127 if (mask->cond_stack_size == 0) { 128 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 129 } 130 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 131 assert(LLVMTypeOf(val) == mask->int_vec_type); 132 mask->cond_mask = LLVMBuildAnd(builder, 133 mask->cond_mask, 134 val, 135 ""); 136 lp_exec_mask_update(mask); 137} 138 139static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 140{ 141 LLVMBuilderRef builder = mask->bld->gallivm->builder; 142 LLVMValueRef prev_mask; 143 LLVMValueRef inv_mask; 144 145 assert(mask->cond_stack_size); 146 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 147 if (mask->cond_stack_size == 1) { 148 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 149 } 150 151 inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); 152 153 mask->cond_mask = LLVMBuildAnd(builder, 154 inv_mask, 155 prev_mask, ""); 156 lp_exec_mask_update(mask); 157} 158 159static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 160{ 161 assert(mask->cond_stack_size); 162 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 163 lp_exec_mask_update(mask); 164} 165 166static void lp_exec_bgnloop(struct lp_exec_mask *mask) 167{ 168 LLVMBuilderRef builder = mask->bld->gallivm->builder; 169 170 if (mask->loop_stack_size == 0) { 171 assert(mask->loop_block == NULL); 172 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 173 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 174 assert(mask->break_var == NULL); 175 } 176 177 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 178 179 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 180 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 181 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 182 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 183 ++mask->loop_stack_size; 184 185 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); 186 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 187 188 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); 189 190 LLVMBuildBr(builder, mask->loop_block); 191 LLVMPositionBuilderAtEnd(builder, mask->loop_block); 192 193 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, ""); 194 195 lp_exec_mask_update(mask); 196} 197 198static void lp_exec_break(struct lp_exec_mask *mask) 199{ 200 LLVMBuilderRef builder = mask->bld->gallivm->builder; 201 LLVMValueRef exec_mask = LLVMBuildNot(builder, 202 mask->exec_mask, 203 "break"); 204 205 mask->break_mask = LLVMBuildAnd(builder, 206 mask->break_mask, 207 exec_mask, "break_full"); 208 209 lp_exec_mask_update(mask); 210} 211 212static void lp_exec_continue(struct lp_exec_mask *mask) 213{ 214 LLVMBuilderRef builder = mask->bld->gallivm->builder; 215 LLVMValueRef exec_mask = LLVMBuildNot(builder, 216 mask->exec_mask, 217 ""); 218 219 mask->cont_mask = LLVMBuildAnd(builder, 220 mask->cont_mask, 221 exec_mask, ""); 222 223 lp_exec_mask_update(mask); 224} 225 226 227static void lp_exec_endloop(struct gallivm_state *gallivm, 228 struct lp_exec_mask *mask) 229{ 230 LLVMBuilderRef builder = mask->bld->gallivm->builder; 231 LLVMBasicBlockRef endloop; 232 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); 233 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, 234 mask->bld->type.width * 235 mask->bld->type.length); 236 LLVMValueRef i1cond, i2cond, icond, limiter; 237 238 assert(mask->break_mask); 239 240 /* 241 * Restore the cont_mask, but don't pop 242 */ 243 assert(mask->loop_stack_size); 244 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 245 lp_exec_mask_update(mask); 246 247 /* 248 * Unlike the continue mask, the break_mask must be preserved across loop 249 * iterations 250 */ 251 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 252 253 /* Decrement the loop limiter */ 254 limiter = LLVMBuildLoad(builder, mask->loop_limiter, ""); 255 256 limiter = LLVMBuildSub( 257 builder, 258 limiter, 259 LLVMConstInt(int_type, 1, false), 260 ""); 261 262 LLVMBuildStore(builder, limiter, mask->loop_limiter); 263 264 /* i1cond = (mask != 0) */ 265 i1cond = LLVMBuildICmp( 266 builder, 267 LLVMIntNE, 268 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), 269 LLVMConstNull(reg_type), ""); 270 271 /* i2cond = (looplimiter > 0) */ 272 i2cond = LLVMBuildICmp( 273 builder, 274 LLVMIntSGT, 275 limiter, 276 LLVMConstNull(int_type), ""); 277 278 /* if( i1cond && i2cond ) */ 279 icond = LLVMBuildAnd(builder, i1cond, i2cond, ""); 280 281 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); 282 283 LLVMBuildCondBr(builder, 284 icond, mask->loop_block, endloop); 285 286 LLVMPositionBuilderAtEnd(builder, endloop); 287 288 assert(mask->loop_stack_size); 289 --mask->loop_stack_size; 290 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 291 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 292 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 293 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 294 295 lp_exec_mask_update(mask); 296} 297 298/* stores val into an address pointed to by dst. 299 * mask->exec_mask is used to figure out which bits of val 300 * should be stored into the address 301 * (0 means don't store this bit, 1 means do store). 302 */ 303static void lp_exec_mask_store(struct lp_exec_mask *mask, 304 struct lp_build_context *bld_store, 305 LLVMValueRef pred, 306 LLVMValueRef val, 307 LLVMValueRef dst) 308{ 309 LLVMBuilderRef builder = mask->bld->gallivm->builder; 310 311 /* Mix the predicate and execution mask */ 312 if (mask->has_mask) { 313 if (pred) { 314 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 315 } else { 316 pred = mask->exec_mask; 317 } 318 } 319 320 if (pred) { 321 LLVMValueRef real_val, dst_val; 322 323 dst_val = LLVMBuildLoad(builder, dst, ""); 324 real_val = lp_build_select(bld_store, 325 pred, 326 val, dst_val); 327 328 LLVMBuildStore(builder, real_val, dst); 329 } else 330 LLVMBuildStore(builder, val, dst); 331} 332 333static void lp_exec_mask_call(struct lp_exec_mask *mask, 334 int func, 335 int *pc) 336{ 337 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 338 mask->call_stack[mask->call_stack_size].pc = *pc; 339 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 340 mask->call_stack_size++; 341 *pc = func; 342} 343 344static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 345{ 346 LLVMBuilderRef builder = mask->bld->gallivm->builder; 347 LLVMValueRef exec_mask; 348 349 if (mask->call_stack_size == 0) { 350 /* returning from main() */ 351 *pc = -1; 352 return; 353 } 354 exec_mask = LLVMBuildNot(builder, 355 mask->exec_mask, 356 "ret"); 357 358 mask->ret_mask = LLVMBuildAnd(builder, 359 mask->ret_mask, 360 exec_mask, "ret_full"); 361 362 lp_exec_mask_update(mask); 363} 364 365static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 366{ 367} 368 369static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 370{ 371 assert(mask->call_stack_size); 372 mask->call_stack_size--; 373 *pc = mask->call_stack[mask->call_stack_size].pc; 374 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 375 lp_exec_mask_update(mask); 376} 377 378 379/** 380 * Return pointer to a temporary register channel (src or dest). 381 * Note that indirect addressing cannot be handled here. 382 * \param index which temporary register 383 * \param chan which channel of the temp register. 384 */ 385LLVMValueRef 386lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld, 387 unsigned index, 388 unsigned chan) 389{ 390 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 391 assert(chan < 4); 392 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 393 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan); 394 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, ""); 395 } 396 else { 397 return bld->temps[index][chan]; 398 } 399} 400 401/** 402 * Return pointer to a output register channel (src or dest). 403 * Note that indirect addressing cannot be handled here. 404 * \param index which output register 405 * \param chan which channel of the output register. 406 */ 407LLVMValueRef 408lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, 409 unsigned index, 410 unsigned chan) 411{ 412 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 413 assert(chan < 4); 414 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 415 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, 416 index * 4 + chan); 417 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, ""); 418 } 419 else { 420 return bld->outputs[index][chan]; 421 } 422} 423 424/** 425 * Gather vector. 426 * XXX the lp_build_gather() function should be capable of doing this 427 * with a little work. 428 */ 429static LLVMValueRef 430build_gather(struct lp_build_context *bld, 431 LLVMValueRef base_ptr, 432 LLVMValueRef indexes) 433{ 434 LLVMBuilderRef builder = bld->gallivm->builder; 435 LLVMValueRef res = bld->undef; 436 unsigned i; 437 438 /* 439 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 440 */ 441 for (i = 0; i < bld->type.length; i++) { 442 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i); 443 LLVMValueRef index = LLVMBuildExtractElement(builder, 444 indexes, ii, ""); 445 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, 446 &index, 1, "gather_ptr"); 447 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 448 449 res = LLVMBuildInsertElement(builder, res, scalar, ii, ""); 450 } 451 452 return res; 453} 454 455 456/** 457 * Scatter/store vector. 458 */ 459static void 460emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 461 LLVMValueRef base_ptr, 462 LLVMValueRef indexes, 463 LLVMValueRef values, 464 struct lp_exec_mask *mask, 465 LLVMValueRef pred) 466{ 467 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 468 LLVMBuilderRef builder = gallivm->builder; 469 unsigned i; 470 471 /* Mix the predicate and execution mask */ 472 if (mask->has_mask) { 473 if (pred) { 474 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 475 } 476 else { 477 pred = mask->exec_mask; 478 } 479 } 480 481 /* 482 * Loop over elements of index_vec, store scalar value. 483 */ 484 for (i = 0; i < bld->bld_base.base.type.length; i++) { 485 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 486 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 487 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 488 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 489 LLVMValueRef scalar_pred = pred ? 490 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 491 492 if (0) 493 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", 494 ii, val, index, scalar_ptr); 495 496 if (scalar_pred) { 497 LLVMValueRef real_val, dst_val; 498 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 499 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 500 LLVMBuildStore(builder, real_val, scalar_ptr); 501 } 502 else { 503 LLVMBuildStore(builder, val, scalar_ptr); 504 } 505 } 506} 507 508 509/** 510 * Read the current value of the ADDR register, convert the floats to 511 * ints, add the base index and return the vector of offsets. 512 * The offsets will be used to index into the constant buffer or 513 * temporary register file. 514 */ 515static LLVMValueRef 516get_indirect_index(struct lp_build_tgsi_soa_context *bld, 517 unsigned reg_file, unsigned reg_index, 518 const struct tgsi_src_register *indirect_reg) 519{ 520 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 521 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 522 /* always use X component of address register */ 523 unsigned swizzle = indirect_reg->SwizzleX; 524 LLVMValueRef base; 525 LLVMValueRef rel; 526 LLVMValueRef max_index; 527 LLVMValueRef index; 528 529 assert(bld->indirect_files & (1 << reg_file)); 530 531 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index); 532 533 assert(swizzle < 4); 534 rel = LLVMBuildLoad(builder, 535 bld->addr[indirect_reg->Index][swizzle], 536 "load addr reg"); 537 538 index = lp_build_add(uint_bld, base, rel); 539 540 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm, 541 uint_bld->type, 542 bld->bld_base.info->file_max[reg_file]); 543 544 assert(!uint_bld->type.sign); 545 index = lp_build_min(uint_bld, index, max_index); 546 547 return index; 548} 549 550static struct lp_build_context * 551stype_to_fetch(struct lp_build_tgsi_context * bld_base, 552 enum tgsi_opcode_type stype) 553{ 554 struct lp_build_context *bld_fetch; 555 556 switch (stype) { 557 case TGSI_TYPE_FLOAT: 558 case TGSI_TYPE_UNTYPED: 559 bld_fetch = &bld_base->base; 560 break; 561 case TGSI_TYPE_UNSIGNED: 562 bld_fetch = &bld_base->uint_bld; 563 break; 564 case TGSI_TYPE_SIGNED: 565 bld_fetch = &bld_base->int_bld; 566 break; 567 case TGSI_TYPE_VOID: 568 case TGSI_TYPE_DOUBLE: 569 default: 570 assert(0); 571 bld_fetch = NULL; 572 break; 573 } 574 return bld_fetch; 575} 576 577static LLVMValueRef 578emit_fetch_constant( 579 struct lp_build_tgsi_context * bld_base, 580 const struct tgsi_full_src_register * reg, 581 enum tgsi_opcode_type stype, 582 unsigned swizzle) 583{ 584 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 585 struct gallivm_state *gallivm = bld_base->base.gallivm; 586 LLVMBuilderRef builder = gallivm->builder; 587 struct lp_build_context *uint_bld = &bld_base->uint_bld; 588 LLVMValueRef indirect_index = NULL; 589 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 590 591 /* XXX: Handle fetching xyzw components as a vector */ 592 assert(swizzle != ~0); 593 594 if (reg->Register.Indirect) { 595 indirect_index = get_indirect_index(bld, 596 reg->Register.File, 597 reg->Register.Index, 598 ®->Indirect); 599 } 600 601 if (reg->Register.Indirect) { 602 LLVMValueRef swizzle_vec = 603 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle); 604 LLVMValueRef index_vec; /* index into the const buffer */ 605 606 /* index_vec = indirect_index * 4 + swizzle */ 607 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 608 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 609 610 /* Gather values from the constant buffer */ 611 return build_gather(bld_fetch, bld->consts_ptr, index_vec); 612 } 613 else { 614 LLVMValueRef index; /* index into the const buffer */ 615 LLVMValueRef scalar, scalar_ptr; 616 617 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle); 618 619 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, 620 &index, 1, ""); 621 622 if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) { 623 LLVMTypeRef ivtype = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0); 624 LLVMValueRef temp_ptr; 625 temp_ptr = LLVMBuildBitCast(builder, scalar_ptr, ivtype, ""); 626 scalar = LLVMBuildLoad(builder, temp_ptr, ""); 627 } else 628 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 629 630 return lp_build_broadcast_scalar(bld_fetch, scalar); 631 } 632} 633 634static LLVMValueRef 635emit_fetch_immediate( 636 struct lp_build_tgsi_context * bld_base, 637 const struct tgsi_full_src_register * reg, 638 enum tgsi_opcode_type stype, 639 unsigned swizzle) 640{ 641 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 642 LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle]; 643 assert(res); 644 645 if (stype == TGSI_TYPE_UNSIGNED) { 646 res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type); 647 } else if (stype == TGSI_TYPE_SIGNED) { 648 res = LLVMConstBitCast(res, bld_base->int_bld.vec_type); 649 } 650 return res; 651} 652 653static LLVMValueRef 654emit_fetch_input( 655 struct lp_build_tgsi_context * bld_base, 656 const struct tgsi_full_src_register * reg, 657 enum tgsi_opcode_type stype, 658 unsigned swizzle) 659{ 660 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 661 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 662 LLVMBuilderRef builder = gallivm->builder; 663 struct lp_build_context *uint_bld = &bld_base->uint_bld; 664 LLVMValueRef indirect_index = NULL; 665 LLVMValueRef res; 666 667 if (reg->Register.Indirect) { 668 indirect_index = get_indirect_index(bld, 669 reg->Register.File, 670 reg->Register.Index, 671 ®->Indirect); 672 } 673 674 if (reg->Register.Indirect) { 675 LLVMValueRef swizzle_vec = 676 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); 677 LLVMValueRef length_vec = 678 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length); 679 LLVMValueRef index_vec; /* index into the const buffer */ 680 LLVMValueRef inputs_array; 681 LLVMTypeRef float4_ptr_type; 682 683 /* index_vec = (indirect_index * 4 + swizzle) * length */ 684 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 685 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 686 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 687 688 /* cast inputs_array pointer to float* */ 689 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 690 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, 691 float4_ptr_type, ""); 692 693 /* Gather values from the temporary register array */ 694 res = build_gather(&bld_base->base, inputs_array, index_vec); 695 } else { 696 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 697 LLVMValueRef lindex = lp_build_const_int32(gallivm, 698 reg->Register.Index * 4 + swizzle); 699 LLVMValueRef input_ptr = LLVMBuildGEP(builder, 700 bld->inputs_array, &lindex, 1, ""); 701 res = LLVMBuildLoad(builder, input_ptr, ""); 702 } 703 else { 704 res = bld->inputs[reg->Register.Index][swizzle]; 705 } 706 } 707 708 assert(res); 709 710 if (stype == TGSI_TYPE_UNSIGNED) { 711 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 712 } else if (stype == TGSI_TYPE_SIGNED) { 713 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 714 } 715 716 return res; 717} 718 719static LLVMValueRef 720emit_fetch_temporary( 721 struct lp_build_tgsi_context * bld_base, 722 const struct tgsi_full_src_register * reg, 723 enum tgsi_opcode_type stype, 724 unsigned swizzle) 725{ 726 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 727 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 728 LLVMBuilderRef builder = gallivm->builder; 729 struct lp_build_context *uint_bld = &bld_base->uint_bld; 730 LLVMValueRef indirect_index = NULL; 731 LLVMValueRef res; 732 733 if (reg->Register.Indirect) { 734 indirect_index = get_indirect_index(bld, 735 reg->Register.File, 736 reg->Register.Index, 737 ®->Indirect); 738 } 739 740 if (reg->Register.Indirect) { 741 LLVMValueRef swizzle_vec = 742 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle); 743 LLVMValueRef length_vec = 744 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, 745 bld->bld_base.base.type.length); 746 LLVMValueRef index_vec; /* index into the const buffer */ 747 LLVMValueRef temps_array; 748 LLVMTypeRef float4_ptr_type; 749 750 /* index_vec = (indirect_index * 4 + swizzle) * length */ 751 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 752 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 753 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 754 755 /* cast temps_array pointer to float* */ 756 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0); 757 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 758 float4_ptr_type, ""); 759 760 /* Gather values from the temporary register array */ 761 res = build_gather(&bld_base->base, temps_array, index_vec); 762 } 763 else { 764 LLVMValueRef temp_ptr; 765 if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) { 766 LLVMTypeRef itype = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0); 767 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, 768 swizzle); 769 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, itype, ""); 770 } else 771 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); 772 res = LLVMBuildLoad(builder, temp_ptr, ""); 773 if (!res) 774 return bld->bld_base.base.undef; 775 } 776 777 return res; 778} 779 780static LLVMValueRef 781emit_fetch_system_value( 782 struct lp_build_tgsi_context * bld_base, 783 const struct tgsi_full_src_register * reg, 784 enum tgsi_opcode_type stype, 785 unsigned swizzle) 786{ 787 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 788 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 789 const struct tgsi_shader_info *info = bld->bld_base.info; 790 LLVMBuilderRef builder = gallivm->builder; 791 LLVMValueRef res; 792 enum tgsi_opcode_type atype; // Actual type of the value 793 794 assert(!reg->Register.Indirect); 795 796 switch (info->system_value_semantic_name[reg->Register.Index]) { 797 case TGSI_SEMANTIC_INSTANCEID: 798 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id); 799 atype = TGSI_TYPE_UNSIGNED; 800 break; 801 802 case TGSI_SEMANTIC_VERTEXID: 803 res = bld->system_values.vertex_id; 804 atype = TGSI_TYPE_UNSIGNED; 805 break; 806 807 default: 808 assert(!"unexpected semantic in emit_fetch_system_value"); 809 res = bld_base->base.zero; 810 atype = TGSI_TYPE_FLOAT; 811 break; 812 } 813 814 if (atype != stype) { 815 if (stype == TGSI_TYPE_FLOAT) { 816 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); 817 } else if (stype == TGSI_TYPE_UNSIGNED) { 818 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 819 } else if (stype == TGSI_TYPE_SIGNED) { 820 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 821 } 822 } 823 824 return res; 825} 826 827/** 828 * Register fetch with derivatives. 829 */ 830static void 831emit_fetch_deriv( 832 struct lp_build_tgsi_soa_context *bld, 833 LLVMValueRef src, 834 LLVMValueRef *res, 835 LLVMValueRef *ddx, 836 LLVMValueRef *ddy) 837{ 838 if(res) 839 *res = src; 840 841 /* TODO: use interpolation coeffs for inputs */ 842 843 if(ddx) 844 *ddx = lp_build_ddx(&bld->bld_base.base, src); 845 846 if(ddy) 847 *ddy = lp_build_ddy(&bld->bld_base.base, src); 848} 849 850 851/** 852 * Predicate. 853 */ 854static void 855emit_fetch_predicate( 856 struct lp_build_tgsi_soa_context *bld, 857 const struct tgsi_full_instruction *inst, 858 LLVMValueRef *pred) 859{ 860 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 861 unsigned index; 862 unsigned char swizzles[4]; 863 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 864 LLVMValueRef value; 865 unsigned chan; 866 867 if (!inst->Instruction.Predicate) { 868 TGSI_FOR_EACH_CHANNEL( chan ) { 869 pred[chan] = NULL; 870 } 871 return; 872 } 873 874 swizzles[0] = inst->Predicate.SwizzleX; 875 swizzles[1] = inst->Predicate.SwizzleY; 876 swizzles[2] = inst->Predicate.SwizzleZ; 877 swizzles[3] = inst->Predicate.SwizzleW; 878 879 index = inst->Predicate.Index; 880 assert(index < LP_MAX_TGSI_PREDS); 881 882 TGSI_FOR_EACH_CHANNEL( chan ) { 883 unsigned swizzle = swizzles[chan]; 884 885 /* 886 * Only fetch the predicate register channels that are actually listed 887 * in the swizzles 888 */ 889 if (!unswizzled[swizzle]) { 890 value = LLVMBuildLoad(builder, 891 bld->preds[index][swizzle], ""); 892 893 /* 894 * Convert the value to an integer mask. 895 * 896 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 897 * is needlessly causing two comparisons due to storing the intermediate 898 * result as float vector instead of an integer mask vector. 899 */ 900 value = lp_build_compare(bld->bld_base.base.gallivm, 901 bld->bld_base.base.type, 902 PIPE_FUNC_NOTEQUAL, 903 value, 904 bld->bld_base.base.zero); 905 if (inst->Predicate.Negate) { 906 value = LLVMBuildNot(builder, value, ""); 907 } 908 909 unswizzled[swizzle] = value; 910 } else { 911 value = unswizzled[swizzle]; 912 } 913 914 pred[chan] = value; 915 } 916} 917 918/** 919 * Register store. 920 */ 921static void 922emit_store_chan( 923 struct lp_build_tgsi_context *bld_base, 924 const struct tgsi_full_instruction *inst, 925 unsigned index, 926 unsigned chan_index, 927 LLVMValueRef pred, 928 LLVMValueRef value) 929{ 930 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 931 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 932 LLVMBuilderRef builder = gallivm->builder; 933 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 934 struct lp_build_context *uint_bld = &bld_base->uint_bld; 935 LLVMValueRef indirect_index = NULL; 936 struct lp_build_context *bld_store; 937 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); 938 939 switch (dtype) { 940 default: 941 case TGSI_TYPE_FLOAT: 942 case TGSI_TYPE_UNTYPED: 943 bld_store = &bld_base->base; 944 break; 945 case TGSI_TYPE_UNSIGNED: 946 bld_store = &bld_base->uint_bld; 947 break; 948 case TGSI_TYPE_SIGNED: 949 bld_store = &bld_base->int_bld; 950 break; 951 case TGSI_TYPE_DOUBLE: 952 case TGSI_TYPE_VOID: 953 assert(0); 954 bld_store = NULL; 955 break; 956 } 957 958 switch( inst->Instruction.Saturate ) { 959 case TGSI_SAT_NONE: 960 break; 961 962 case TGSI_SAT_ZERO_ONE: 963 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero); 964 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 965 break; 966 967 case TGSI_SAT_MINUS_PLUS_ONE: 968 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0)); 969 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 970 break; 971 972 default: 973 assert(0); 974 } 975 976 if (reg->Register.Indirect) { 977 indirect_index = get_indirect_index(bld, 978 reg->Register.File, 979 reg->Register.Index, 980 ®->Indirect); 981 } else { 982 assert(reg->Register.Index <= 983 bld->bld_base.info->file_max[reg->Register.File]); 984 } 985 986 switch( reg->Register.File ) { 987 case TGSI_FILE_OUTPUT: 988 if (reg->Register.Indirect) { 989 LLVMValueRef chan_vec = 990 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 991 LLVMValueRef length_vec = 992 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length); 993 LLVMValueRef index_vec; /* indexes into the temp registers */ 994 LLVMValueRef outputs_array; 995 LLVMValueRef pixel_offsets; 996 LLVMTypeRef float_ptr_type; 997 int i; 998 999 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 1000 pixel_offsets = uint_bld->undef; 1001 for (i = 0; i < bld->bld_base.base.type.length; i++) { 1002 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 1003 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 1004 ii, ii, ""); 1005 } 1006 1007 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 1008 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 1009 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 1010 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 1011 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 1012 1013 float_ptr_type = 1014 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1015 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, 1016 float_ptr_type, ""); 1017 1018 /* Scatter store values into temp registers */ 1019 emit_mask_scatter(bld, outputs_array, index_vec, value, 1020 &bld->exec_mask, pred); 1021 } 1022 else { 1023 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index, 1024 chan_index); 1025 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr); 1026 } 1027 break; 1028 1029 case TGSI_FILE_TEMPORARY: 1030 if (reg->Register.Indirect) { 1031 LLVMValueRef chan_vec = 1032 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 1033 LLVMValueRef length_vec = 1034 lp_build_const_int_vec(gallivm, uint_bld->type, 1035 bld->bld_base.base.type.length); 1036 LLVMValueRef index_vec; /* indexes into the temp registers */ 1037 LLVMValueRef temps_array; 1038 LLVMValueRef pixel_offsets; 1039 LLVMTypeRef float_ptr_type; 1040 int i; 1041 1042 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 1043 pixel_offsets = uint_bld->undef; 1044 for (i = 0; i < bld->bld_base.base.type.length; i++) { 1045 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 1046 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 1047 ii, ii, ""); 1048 } 1049 1050 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 1051 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 1052 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 1053 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 1054 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 1055 1056 float_ptr_type = 1057 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1058 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 1059 float_ptr_type, ""); 1060 1061 /* Scatter store values into temp registers */ 1062 emit_mask_scatter(bld, temps_array, index_vec, value, 1063 &bld->exec_mask, pred); 1064 } 1065 else { 1066 LLVMValueRef temp_ptr; 1067 1068 switch (dtype) { 1069 case TGSI_TYPE_UNSIGNED: 1070 case TGSI_TYPE_SIGNED: { 1071 LLVMTypeRef itype = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4); 1072 LLVMTypeRef ivtype = LLVMPointerType(itype, 0); 1073 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, 1074 chan_index); 1075 LLVMValueRef temp_value_ptr; 1076 1077 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, ivtype, ""); 1078 temp_value_ptr = LLVMBuildBitCast(builder, value, itype, ""); 1079 value = temp_value_ptr; 1080 break; 1081 } 1082 default: 1083 case TGSI_TYPE_FLOAT: 1084 case TGSI_TYPE_UNTYPED: 1085 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, 1086 chan_index); 1087 break; 1088 } 1089 1090 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr); 1091 } 1092 break; 1093 1094 case TGSI_FILE_ADDRESS: 1095 assert(dtype == TGSI_TYPE_SIGNED); 1096 assert(LLVMTypeOf(value) == bld_base->base.int_vec_type); 1097 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, 1098 bld->addr[reg->Register.Index][chan_index]); 1099 break; 1100 1101 case TGSI_FILE_PREDICATE: 1102 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, 1103 bld->preds[reg->Register.Index][chan_index]); 1104 break; 1105 1106 default: 1107 assert( 0 ); 1108 } 1109} 1110 1111static void 1112emit_store( 1113 struct lp_build_tgsi_context * bld_base, 1114 const struct tgsi_full_instruction * inst, 1115 const struct tgsi_opcode_info * info, 1116 LLVMValueRef dst[4]) 1117 1118{ 1119 unsigned chan_index; 1120 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1121 1122 if(info->num_dst) { 1123 LLVMValueRef pred[TGSI_NUM_CHANNELS]; 1124 1125 emit_fetch_predicate( bld, inst, pred ); 1126 1127 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1128 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]); 1129 } 1130 } 1131} 1132 1133/** 1134 * High-level instruction translators. 1135 */ 1136 1137static void 1138emit_tex( struct lp_build_tgsi_soa_context *bld, 1139 const struct tgsi_full_instruction *inst, 1140 enum lp_build_tex_modifier modifier, 1141 LLVMValueRef *texel) 1142{ 1143 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1144 unsigned unit; 1145 LLVMValueRef lod_bias, explicit_lod; 1146 LLVMValueRef oow = NULL; 1147 LLVMValueRef coords[3]; 1148 LLVMValueRef ddx[3]; 1149 LLVMValueRef ddy[3]; 1150 unsigned num_coords; 1151 unsigned i; 1152 1153 if (!bld->sampler) { 1154 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 1155 for (i = 0; i < 4; i++) { 1156 texel[i] = bld->bld_base.base.undef; 1157 } 1158 return; 1159 } 1160 1161 switch (inst->Texture.Texture) { 1162 case TGSI_TEXTURE_1D: 1163 num_coords = 1; 1164 break; 1165 case TGSI_TEXTURE_1D_ARRAY: 1166 case TGSI_TEXTURE_2D: 1167 case TGSI_TEXTURE_RECT: 1168 num_coords = 2; 1169 break; 1170 case TGSI_TEXTURE_SHADOW1D: 1171 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1172 case TGSI_TEXTURE_SHADOW2D: 1173 case TGSI_TEXTURE_SHADOWRECT: 1174 case TGSI_TEXTURE_2D_ARRAY: 1175 case TGSI_TEXTURE_3D: 1176 case TGSI_TEXTURE_CUBE: 1177 num_coords = 3; 1178 break; 1179 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1180 num_coords = 4; 1181 break; 1182 default: 1183 assert(0); 1184 return; 1185 } 1186 1187 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 1188 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); 1189 explicit_lod = NULL; 1190 } 1191 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 1192 lod_bias = NULL; 1193 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); 1194 } 1195 else { 1196 lod_bias = NULL; 1197 explicit_lod = NULL; 1198 } 1199 1200 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 1201 oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); 1202 oow = lp_build_rcp(&bld->bld_base.base, oow); 1203 } 1204 1205 for (i = 0; i < num_coords; i++) { 1206 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i ); 1207 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 1208 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow); 1209 } 1210 for (i = num_coords; i < 3; i++) { 1211 coords[i] = bld->bld_base.base.undef; 1212 } 1213 1214 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 1215 LLVMValueRef index0 = lp_build_const_int32(bld->bld_base.base.gallivm, 0); 1216 for (i = 0; i < num_coords; i++) { 1217 LLVMValueRef src1 = lp_build_emit_fetch( &bld->bld_base, inst, 1, i ); 1218 LLVMValueRef src2 = lp_build_emit_fetch( &bld->bld_base, inst, 2, i ); 1219 ddx[i] = LLVMBuildExtractElement(builder, src1, index0, ""); 1220 ddy[i] = LLVMBuildExtractElement(builder, src2, index0, ""); 1221 } 1222 unit = inst->Src[3].Register.Index; 1223 } else { 1224 for (i = 0; i < num_coords; i++) { 1225 ddx[i] = lp_build_scalar_ddx( &bld->bld_base.base, coords[i] ); 1226 ddy[i] = lp_build_scalar_ddy( &bld->bld_base.base, coords[i] ); 1227 } 1228 unit = inst->Src[1].Register.Index; 1229 } 1230 for (i = num_coords; i < 3; i++) { 1231 ddx[i] = LLVMGetUndef(bld->bld_base.base.elem_type); 1232 ddy[i] = LLVMGetUndef(bld->bld_base.base.elem_type); 1233 } 1234 1235 bld->sampler->emit_fetch_texel(bld->sampler, 1236 bld->bld_base.base.gallivm, 1237 bld->bld_base.base.type, 1238 unit, num_coords, coords, 1239 ddx, ddy, 1240 lod_bias, explicit_lod, 1241 texel); 1242} 1243 1244static void 1245emit_txq( struct lp_build_tgsi_soa_context *bld, 1246 const struct tgsi_full_instruction *inst, 1247 LLVMValueRef *sizes_out) 1248{ 1249 LLVMValueRef explicit_lod; 1250 unsigned num_coords, has_lod; 1251 unsigned i; 1252 1253 switch (inst->Texture.Texture) { 1254 case TGSI_TEXTURE_1D: 1255 case TGSI_TEXTURE_SHADOW1D: 1256 case TGSI_TEXTURE_SHADOW2D: 1257 case TGSI_TEXTURE_SHADOWCUBE: 1258 num_coords = 1; 1259 has_lod = 1; 1260 break; 1261 case TGSI_TEXTURE_2D: 1262 case TGSI_TEXTURE_CUBE: 1263 case TGSI_TEXTURE_1D_ARRAY: 1264 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1265 num_coords = 2; 1266 has_lod = 1; 1267 break; 1268 case TGSI_TEXTURE_3D: 1269// case TGSI_TEXTURE_CUBE_ARRAY: 1270// case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 1271 case TGSI_TEXTURE_2D_ARRAY: 1272 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1273 num_coords = 3; 1274 has_lod = 1; 1275 break; 1276 1277 case TGSI_TEXTURE_BUFFER: 1278 num_coords = 1; 1279 has_lod = 0; 1280 break; 1281 1282 case TGSI_TEXTURE_RECT: 1283 case TGSI_TEXTURE_SHADOWRECT: 1284// case TGSI_TEXTURE_2D_MS: 1285 num_coords = 2; 1286 has_lod = 0; 1287 break; 1288 1289// case TGSI_TEXTURE_2D_MS_ARRAY: 1290// num_coords = 3; 1291// has_lod = 0; 1292// break; 1293 1294 default: 1295 assert(0); 1296 return; 1297 } 1298 1299 if (!bld->sampler) { 1300 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n"); 1301 for (i = 0; i < num_coords; i++) 1302 sizes_out[i] = bld->bld_base.base.undef; 1303 return; 1304 } 1305 1306 if (has_lod) 1307 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 2 ); 1308 else 1309 explicit_lod = NULL; 1310 1311 bld->sampler->emit_size_query(bld->sampler, 1312 bld->bld_base.base.gallivm, 1313 inst->Src[1].Register.Index, 1314 explicit_lod, 1315 sizes_out); 1316} 1317 1318static boolean 1319near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 1320 int pc) 1321{ 1322 int i; 1323 1324 for (i = 0; i < 5; i++) { 1325 unsigned opcode; 1326 1327 if (pc + i >= bld->bld_base.info->num_instructions) 1328 return TRUE; 1329 1330 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode; 1331 1332 if (opcode == TGSI_OPCODE_END) 1333 return TRUE; 1334 1335 if (opcode == TGSI_OPCODE_TEX || 1336 opcode == TGSI_OPCODE_TXP || 1337 opcode == TGSI_OPCODE_TXD || 1338 opcode == TGSI_OPCODE_TXB || 1339 opcode == TGSI_OPCODE_TXL || 1340 opcode == TGSI_OPCODE_TXF || 1341 opcode == TGSI_OPCODE_TXQ || 1342 opcode == TGSI_OPCODE_CAL || 1343 opcode == TGSI_OPCODE_CALLNZ || 1344 opcode == TGSI_OPCODE_IF || 1345 opcode == TGSI_OPCODE_IFC || 1346 opcode == TGSI_OPCODE_BGNLOOP || 1347 opcode == TGSI_OPCODE_SWITCH) 1348 return FALSE; 1349 } 1350 1351 return TRUE; 1352} 1353 1354 1355 1356/** 1357 * Kill fragment if any of the src register values are negative. 1358 */ 1359static void 1360emit_kil( 1361 struct lp_build_tgsi_soa_context *bld, 1362 const struct tgsi_full_instruction *inst, 1363 int pc) 1364{ 1365 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1366 const struct tgsi_full_src_register *reg = &inst->Src[0]; 1367 LLVMValueRef terms[TGSI_NUM_CHANNELS]; 1368 LLVMValueRef mask; 1369 unsigned chan_index; 1370 1371 memset(&terms, 0, sizeof terms); 1372 1373 TGSI_FOR_EACH_CHANNEL( chan_index ) { 1374 unsigned swizzle; 1375 1376 /* Unswizzle channel */ 1377 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1378 1379 /* Check if the component has not been already tested. */ 1380 assert(swizzle < TGSI_NUM_CHANNELS); 1381 if( !terms[swizzle] ) 1382 /* TODO: change the comparison operator instead of setting the sign */ 1383 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index ); 1384 } 1385 1386 mask = NULL; 1387 TGSI_FOR_EACH_CHANNEL( chan_index ) { 1388 if(terms[chan_index]) { 1389 LLVMValueRef chan_mask; 1390 1391 /* 1392 * If term < 0 then mask = 0 else mask = ~0. 1393 */ 1394 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero); 1395 1396 if(mask) 1397 mask = LLVMBuildAnd(builder, mask, chan_mask, ""); 1398 else 1399 mask = chan_mask; 1400 } 1401 } 1402 1403 if(mask) { 1404 lp_build_mask_update(bld->mask, mask); 1405 1406 if (!near_end_of_shader(bld, pc)) 1407 lp_build_mask_check(bld->mask); 1408 } 1409} 1410 1411 1412/** 1413 * Predicated fragment kill. 1414 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 1415 * The only predication is the execution mask which will apply if 1416 * we're inside a loop or conditional. 1417 */ 1418static void 1419emit_kilp(struct lp_build_tgsi_soa_context *bld, 1420 int pc) 1421{ 1422 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1423 LLVMValueRef mask; 1424 1425 /* For those channels which are "alive", disable fragment shader 1426 * execution. 1427 */ 1428 if (bld->exec_mask.has_mask) { 1429 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 1430 } 1431 else { 1432 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type); 1433 mask = zero; 1434 } 1435 1436 lp_build_mask_update(bld->mask, mask); 1437 1438 if (!near_end_of_shader(bld, pc)) 1439 lp_build_mask_check(bld->mask); 1440} 1441 1442 1443/** 1444 * Emit code which will dump the value of all the temporary registers 1445 * to stdout. 1446 */ 1447static void 1448emit_dump_temps(struct lp_build_tgsi_soa_context *bld) 1449{ 1450 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1451 LLVMBuilderRef builder = gallivm->builder; 1452 LLVMValueRef temp_ptr; 1453 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); 1454 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1); 1455 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); 1456 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); 1457 int index; 1458 int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY]; 1459 1460 for (index = 0; index < n; index++) { 1461 LLVMValueRef idx = lp_build_const_int32(gallivm, index); 1462 LLVMValueRef v[4][4], res; 1463 int chan; 1464 1465 lp_build_printf(gallivm, "TEMP[%d]:\n", idx); 1466 1467 for (chan = 0; chan < 4; chan++) { 1468 temp_ptr = lp_get_temp_ptr_soa(bld, index, chan); 1469 res = LLVMBuildLoad(builder, temp_ptr, ""); 1470 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); 1471 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); 1472 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); 1473 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); 1474 } 1475 1476 lp_build_printf(gallivm, " X: %f %f %f %f\n", 1477 v[0][0], v[0][1], v[0][2], v[0][3]); 1478 lp_build_printf(gallivm, " Y: %f %f %f %f\n", 1479 v[1][0], v[1][1], v[1][2], v[1][3]); 1480 lp_build_printf(gallivm, " Z: %f %f %f %f\n", 1481 v[2][0], v[2][1], v[2][2], v[2][3]); 1482 lp_build_printf(gallivm, " W: %f %f %f %f\n", 1483 v[3][0], v[3][1], v[3][2], v[3][3]); 1484 } 1485} 1486 1487 1488 1489void 1490lp_emit_declaration_soa( 1491 struct lp_build_tgsi_context *bld_base, 1492 const struct tgsi_full_declaration *decl) 1493{ 1494 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 1495 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1496 LLVMTypeRef vec_type = bld->bld_base.base.vec_type; 1497 const unsigned first = decl->Range.First; 1498 const unsigned last = decl->Range.Last; 1499 unsigned idx, i; 1500 1501 for (idx = first; idx <= last; ++idx) { 1502 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); 1503 switch (decl->Declaration.File) { 1504 case TGSI_FILE_TEMPORARY: 1505 assert(idx < LP_MAX_TGSI_TEMPS); 1506 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 1507 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 1508 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); 1509 } 1510 break; 1511 1512 case TGSI_FILE_OUTPUT: 1513 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 1514 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 1515 bld->outputs[idx][i] = lp_build_alloca(gallivm, 1516 vec_type, "output"); 1517 } 1518 break; 1519 1520 case TGSI_FILE_ADDRESS: 1521 /* ADDR registers are the only allocated with an integer LLVM IR type, 1522 * as they are guaranteed to always have integers. 1523 * XXX: Not sure if this exception is worthwhile (or the whole idea of 1524 * an ADDR register for that matter). 1525 */ 1526 assert(idx < LP_MAX_TGSI_ADDRS); 1527 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 1528 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr"); 1529 break; 1530 1531 case TGSI_FILE_PREDICATE: 1532 assert(idx < LP_MAX_TGSI_PREDS); 1533 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 1534 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, 1535 "predicate"); 1536 break; 1537 1538 default: 1539 /* don't need to declare other vars */ 1540 break; 1541 } 1542 } 1543} 1544 1545 1546void lp_emit_immediate_soa( 1547 struct lp_build_tgsi_context *bld_base, 1548 const struct tgsi_full_immediate *imm) 1549{ 1550 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 1551 struct gallivm_state * gallivm = bld_base->base.gallivm; 1552 1553 /* simply copy the immediate values into the next immediates[] slot */ 1554 unsigned i; 1555 const uint size = imm->Immediate.NrTokens - 1; 1556 assert(size <= 4); 1557 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES); 1558 switch (imm->Immediate.DataType) { 1559 case TGSI_IMM_FLOAT32: 1560 for( i = 0; i < size; ++i ) 1561 bld->immediates[bld->num_immediates][i] = 1562 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); 1563 1564 break; 1565 case TGSI_IMM_UINT32: 1566 for( i = 0; i < size; ++i ) { 1567 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint); 1568 bld->immediates[bld->num_immediates][i] = 1569 LLVMConstBitCast(tmp, bld_base->base.vec_type); 1570 } 1571 1572 break; 1573 case TGSI_IMM_INT32: 1574 for( i = 0; i < size; ++i ) { 1575 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int); 1576 bld->immediates[bld->num_immediates][i] = 1577 LLVMConstBitCast(tmp, bld_base->base.vec_type); 1578 } 1579 1580 break; 1581 } 1582 for( i = size; i < 4; ++i ) 1583 bld->immediates[bld->num_immediates][i] = bld_base->base.undef; 1584 1585 bld->num_immediates++; 1586} 1587 1588static void 1589ddx_emit( 1590 const struct lp_build_tgsi_action * action, 1591 struct lp_build_tgsi_context * bld_base, 1592 struct lp_build_emit_data * emit_data) 1593{ 1594 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1595 1596 emit_fetch_deriv(bld, emit_data->args[0], NULL, 1597 &emit_data->output[emit_data->chan], NULL); 1598} 1599 1600static void 1601ddy_emit( 1602 const struct lp_build_tgsi_action * action, 1603 struct lp_build_tgsi_context * bld_base, 1604 struct lp_build_emit_data * emit_data) 1605{ 1606 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1607 1608 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL, 1609 &emit_data->output[emit_data->chan]); 1610} 1611 1612static void 1613kilp_emit( 1614 const struct lp_build_tgsi_action * action, 1615 struct lp_build_tgsi_context * bld_base, 1616 struct lp_build_emit_data * emit_data) 1617{ 1618 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1619 1620 emit_kilp(bld, bld_base->pc - 1); 1621} 1622 1623static void 1624kil_emit( 1625 const struct lp_build_tgsi_action * action, 1626 struct lp_build_tgsi_context * bld_base, 1627 struct lp_build_emit_data * emit_data) 1628{ 1629 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1630 1631 emit_kil(bld, emit_data->inst, bld_base->pc - 1); 1632} 1633 1634static void 1635tex_emit( 1636 const struct lp_build_tgsi_action * action, 1637 struct lp_build_tgsi_context * bld_base, 1638 struct lp_build_emit_data * emit_data) 1639{ 1640 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1641 1642 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output); 1643} 1644 1645static void 1646txb_emit( 1647 const struct lp_build_tgsi_action * action, 1648 struct lp_build_tgsi_context * bld_base, 1649 struct lp_build_emit_data * emit_data) 1650{ 1651 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1652 1653 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 1654 emit_data->output); 1655} 1656 1657static void 1658txd_emit( 1659 const struct lp_build_tgsi_action * action, 1660 struct lp_build_tgsi_context * bld_base, 1661 struct lp_build_emit_data * emit_data) 1662{ 1663 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1664 1665 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 1666 emit_data->output); 1667} 1668 1669static void 1670txl_emit( 1671 const struct lp_build_tgsi_action * action, 1672 struct lp_build_tgsi_context * bld_base, 1673 struct lp_build_emit_data * emit_data) 1674{ 1675 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1676 1677 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 1678 emit_data->output); 1679} 1680 1681static void 1682txp_emit( 1683 const struct lp_build_tgsi_action * action, 1684 struct lp_build_tgsi_context * bld_base, 1685 struct lp_build_emit_data * emit_data) 1686{ 1687 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1688 1689 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED, 1690 emit_data->output); 1691} 1692 1693static void 1694txq_emit( 1695 const struct lp_build_tgsi_action * action, 1696 struct lp_build_tgsi_context * bld_base, 1697 struct lp_build_emit_data * emit_data) 1698{ 1699 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1700 1701 emit_txq(bld, emit_data->inst, emit_data->output); 1702} 1703 1704static void 1705cal_emit( 1706 const struct lp_build_tgsi_action * action, 1707 struct lp_build_tgsi_context * bld_base, 1708 struct lp_build_emit_data * emit_data) 1709{ 1710 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1711 1712 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label, 1713 &bld_base->pc); 1714} 1715 1716static void 1717ret_emit( 1718 const struct lp_build_tgsi_action * action, 1719 struct lp_build_tgsi_context * bld_base, 1720 struct lp_build_emit_data * emit_data) 1721{ 1722 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1723 1724 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc); 1725} 1726 1727static void 1728brk_emit( 1729 const struct lp_build_tgsi_action * action, 1730 struct lp_build_tgsi_context * bld_base, 1731 struct lp_build_emit_data * emit_data) 1732{ 1733 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1734 1735 lp_exec_break(&bld->exec_mask); 1736} 1737 1738static void 1739if_emit( 1740 const struct lp_build_tgsi_action * action, 1741 struct lp_build_tgsi_context * bld_base, 1742 struct lp_build_emit_data * emit_data) 1743{ 1744 LLVMValueRef tmp; 1745 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1746 1747 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL, 1748 emit_data->args[0], bld->bld_base.base.zero); 1749 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 1750} 1751 1752static void 1753bgnloop_emit( 1754 const struct lp_build_tgsi_action * action, 1755 struct lp_build_tgsi_context * bld_base, 1756 struct lp_build_emit_data * emit_data) 1757{ 1758 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1759 1760 lp_exec_bgnloop(&bld->exec_mask); 1761} 1762 1763static void 1764bgnsub_emit( 1765 const struct lp_build_tgsi_action * action, 1766 struct lp_build_tgsi_context * bld_base, 1767 struct lp_build_emit_data * emit_data) 1768{ 1769 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1770 1771 lp_exec_mask_bgnsub(&bld->exec_mask); 1772} 1773 1774static void 1775else_emit( 1776 const struct lp_build_tgsi_action * action, 1777 struct lp_build_tgsi_context * bld_base, 1778 struct lp_build_emit_data * emit_data) 1779{ 1780 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1781 1782 lp_exec_mask_cond_invert(&bld->exec_mask); 1783} 1784 1785static void 1786endif_emit( 1787 const struct lp_build_tgsi_action * action, 1788 struct lp_build_tgsi_context * bld_base, 1789 struct lp_build_emit_data * emit_data) 1790{ 1791 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1792 1793 lp_exec_mask_cond_pop(&bld->exec_mask); 1794} 1795 1796static void 1797endloop_emit( 1798 const struct lp_build_tgsi_action * action, 1799 struct lp_build_tgsi_context * bld_base, 1800 struct lp_build_emit_data * emit_data) 1801{ 1802 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1803 1804 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask); 1805} 1806 1807static void 1808endsub_emit( 1809 const struct lp_build_tgsi_action * action, 1810 struct lp_build_tgsi_context * bld_base, 1811 struct lp_build_emit_data * emit_data) 1812{ 1813 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1814 1815 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc); 1816} 1817 1818static void 1819cont_emit( 1820 const struct lp_build_tgsi_action * action, 1821 struct lp_build_tgsi_context * bld_base, 1822 struct lp_build_emit_data * emit_data) 1823{ 1824 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1825 1826 lp_exec_continue(&bld->exec_mask); 1827} 1828 1829/* XXX: Refactor and move it to lp_bld_tgsi_action.c 1830 * 1831 * XXX: What do the comments about xmm registers mean? Maybe they are left over 1832 * from old code, but there is no garauntee that LLVM will use those registers 1833 * for this code. 1834 * 1835 * XXX: There should be no calls to lp_build_emit_fetch in this function. This 1836 * should be handled by the emit_data->fetch_args function. */ 1837static void 1838nrm_emit( 1839 const struct lp_build_tgsi_action * action, 1840 struct lp_build_tgsi_context * bld_base, 1841 struct lp_build_emit_data * emit_data) 1842{ 1843 LLVMValueRef tmp0, tmp1; 1844 LLVMValueRef tmp4 = NULL; 1845 LLVMValueRef tmp5 = NULL; 1846 LLVMValueRef tmp6 = NULL; 1847 LLVMValueRef tmp7 = NULL; 1848 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1849 1850 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1851 1852 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) || 1853 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) || 1854 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) || 1855 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) { 1856 1857 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1858 1859 /* xmm4 = src.x */ 1860 /* xmm0 = src.x * src.x */ 1861 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X); 1862 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) { 1863 tmp4 = tmp0; 1864 } 1865 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0); 1866 1867 /* xmm5 = src.y */ 1868 /* xmm0 = xmm0 + src.y * src.y */ 1869 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y); 1870 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) { 1871 tmp5 = tmp1; 1872 } 1873 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1); 1874 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1); 1875 1876 /* xmm6 = src.z */ 1877 /* xmm0 = xmm0 + src.z * src.z */ 1878 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z); 1879 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) { 1880 tmp6 = tmp1; 1881 } 1882 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1); 1883 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1); 1884 1885 if (dims == 4) { 1886 /* xmm7 = src.w */ 1887 /* xmm0 = xmm0 + src.w * src.w */ 1888 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W); 1889 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) { 1890 tmp7 = tmp1; 1891 } 1892 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1); 1893 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1); 1894 } 1895 /* xmm1 = 1 / sqrt(xmm0) */ 1896 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0); 1897 /* dst.x = xmm1 * src.x */ 1898 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) { 1899 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1); 1900 } 1901 /* dst.y = xmm1 * src.y */ 1902 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) { 1903 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1); 1904 } 1905 1906 /* dst.z = xmm1 * src.z */ 1907 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) { 1908 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1); 1909 } 1910 /* dst.w = xmm1 * src.w */ 1911 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) { 1912 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1); 1913 } 1914 } 1915 1916 /* dst.w = 1.0 */ 1917 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) { 1918 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one; 1919 } 1920} 1921 1922static void emit_prologue(struct lp_build_tgsi_context * bld_base) 1923{ 1924 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1925 struct gallivm_state * gallivm = bld_base->base.gallivm; 1926 1927 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 1928 LLVMValueRef array_size = 1929 lp_build_const_int32(gallivm, 1930 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); 1931 bld->temps_array = lp_build_array_alloca(gallivm, 1932 bld_base->base.vec_type, array_size, 1933 "temp_array"); 1934 } 1935 1936 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 1937 LLVMValueRef array_size = 1938 lp_build_const_int32(gallivm, 1939 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); 1940 bld->outputs_array = lp_build_array_alloca(gallivm, 1941 bld_base->base.vec_type, array_size, 1942 "output_array"); 1943 } 1944 1945 /* If we have indirect addressing in inputs we need to copy them into 1946 * our alloca array to be able to iterate over them */ 1947 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 1948 unsigned index, chan; 1949 LLVMTypeRef vec_type = bld_base->base.vec_type; 1950 LLVMValueRef array_size = lp_build_const_int32(gallivm, 1951 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4); 1952 bld->inputs_array = lp_build_array_alloca(gallivm, 1953 vec_type, array_size, 1954 "input_array"); 1955 1956 assert(bld_base->info->num_inputs 1957 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1); 1958 1959 for (index = 0; index < bld_base->info->num_inputs; ++index) { 1960 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 1961 LLVMValueRef lindex = 1962 lp_build_const_int32(gallivm, index * 4 + chan); 1963 LLVMValueRef input_ptr = 1964 LLVMBuildGEP(gallivm->builder, bld->inputs_array, 1965 &lindex, 1, ""); 1966 LLVMValueRef value = bld->inputs[index][chan]; 1967 if (value) 1968 LLVMBuildStore(gallivm->builder, value, input_ptr); 1969 } 1970 } 1971 } 1972} 1973 1974static void emit_epilogue(struct lp_build_tgsi_context * bld_base) 1975{ 1976 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1977 1978 if (0) { 1979 /* for debugging */ 1980 emit_dump_temps(bld); 1981 } 1982 1983 /* If we have indirect addressing in outputs we need to copy our alloca array 1984 * to the outputs slots specified by the called */ 1985 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 1986 unsigned index, chan; 1987 assert(bld_base->info->num_outputs <= 1988 bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1); 1989 for (index = 0; index < bld_base->info->num_outputs; ++index) { 1990 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 1991 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); 1992 } 1993 } 1994 } 1995} 1996 1997void 1998lp_build_tgsi_soa(struct gallivm_state *gallivm, 1999 const struct tgsi_token *tokens, 2000 struct lp_type type, 2001 struct lp_build_mask_context *mask, 2002 LLVMValueRef consts_ptr, 2003 const struct lp_bld_tgsi_system_values *system_values, 2004 const LLVMValueRef *pos, 2005 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], 2006 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], 2007 struct lp_build_sampler_soa *sampler, 2008 const struct tgsi_shader_info *info) 2009{ 2010 struct lp_build_tgsi_soa_context bld; 2011 2012 struct lp_type res_type; 2013 2014 assert(type.length <= LP_MAX_VECTOR_LENGTH); 2015 memset(&res_type, 0, sizeof res_type); 2016 res_type.width = type.width; 2017 res_type.length = type.length; 2018 res_type.sign = 1; 2019 2020 /* Setup build context */ 2021 memset(&bld, 0, sizeof bld); 2022 lp_build_context_init(&bld.bld_base.base, gallivm, type); 2023 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); 2024 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); 2025 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); 2026 bld.mask = mask; 2027 bld.pos = pos; 2028 bld.inputs = inputs; 2029 bld.outputs = outputs; 2030 bld.consts_ptr = consts_ptr; 2031 bld.sampler = sampler; 2032 bld.bld_base.info = info; 2033 bld.indirect_files = info->indirect_files; 2034 2035 bld.bld_base.soa = TRUE; 2036 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; 2037 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; 2038 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; 2039 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; 2040 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value; 2041 bld.bld_base.emit_store = emit_store; 2042 2043 bld.bld_base.emit_declaration = lp_emit_declaration_soa; 2044 bld.bld_base.emit_immediate = lp_emit_immediate_soa; 2045 2046 bld.bld_base.emit_prologue = emit_prologue; 2047 bld.bld_base.emit_epilogue = emit_epilogue; 2048 2049 /* Set opcode actions */ 2050 lp_set_default_actions_cpu(&bld.bld_base); 2051 2052 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; 2053 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit; 2054 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit; 2055 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit; 2056 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit; 2057 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit; 2058 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit; 2059 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit; 2060 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; 2061 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; 2062 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit; 2063 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit; 2064 bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit; 2065 bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit; 2066 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit; 2067 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit; 2068 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit; 2069 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit; 2070 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit; 2071 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit; 2072 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit; 2073 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit; 2074 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; 2075 2076 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base); 2077 2078 bld.system_values = *system_values; 2079 2080 lp_build_tgsi_llvm(&bld.bld_base, tokens); 2081 2082 if (0) { 2083 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 2084 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2085 debug_printf("11111111111111111111111111111 \n"); 2086 tgsi_dump(tokens, 0); 2087 lp_debug_dump_value(function); 2088 debug_printf("2222222222222222222222222222 \n"); 2089 } 2090 2091 if (0) { 2092 LLVMModuleRef module = LLVMGetGlobalParent( 2093 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 2094 LLVMDumpModule(module); 2095 2096 } 2097} 2098