lp_bld_tgsi_soa.c revision 00eb74b275e21d567a0ab8a6731181e005208634
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_exec.h" 46#include "tgsi/tgsi_info.h" 47#include "tgsi/tgsi_parse.h" 48#include "tgsi/tgsi_util.h" 49#include "tgsi/tgsi_scan.h" 50#include "lp_bld_tgsi_action.h" 51#include "lp_bld_type.h" 52#include "lp_bld_const.h" 53#include "lp_bld_arit.h" 54#include "lp_bld_bitarit.h" 55#include "lp_bld_gather.h" 56#include "lp_bld_init.h" 57#include "lp_bld_logic.h" 58#include "lp_bld_swizzle.h" 59#include "lp_bld_flow.h" 60#include "lp_bld_quad.h" 61#include "lp_bld_tgsi.h" 62#include "lp_bld_limits.h" 63#include "lp_bld_debug.h" 64#include "lp_bld_printf.h" 65 66 67static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 68{ 69 LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context); 70 LLVMBuilderRef builder = bld->gallivm->builder; 71 72 mask->bld = bld; 73 mask->has_mask = FALSE; 74 mask->cond_stack_size = 0; 75 mask->loop_stack_size = 0; 76 mask->call_stack_size = 0; 77 78 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); 79 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 80 LLVMConstAllOnes(mask->int_vec_type); 81 82 mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter"); 83 84 LLVMBuildStore( 85 builder, 86 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false), 87 mask->loop_limiter); 88} 89 90static void lp_exec_mask_update(struct lp_exec_mask *mask) 91{ 92 LLVMBuilderRef builder = mask->bld->gallivm->builder; 93 94 if (mask->loop_stack_size) { 95 /*for loops we need to update the entire mask at runtime */ 96 LLVMValueRef tmp; 97 assert(mask->break_mask); 98 tmp = LLVMBuildAnd(builder, 99 mask->cont_mask, 100 mask->break_mask, 101 "maskcb"); 102 mask->exec_mask = LLVMBuildAnd(builder, 103 mask->cond_mask, 104 tmp, 105 "maskfull"); 106 } else 107 mask->exec_mask = mask->cond_mask; 108 109 if (mask->call_stack_size) { 110 mask->exec_mask = LLVMBuildAnd(builder, 111 mask->exec_mask, 112 mask->ret_mask, 113 "callmask"); 114 } 115 116 mask->has_mask = (mask->cond_stack_size > 0 || 117 mask->loop_stack_size > 0 || 118 mask->call_stack_size > 0); 119} 120 121static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 122 LLVMValueRef val) 123{ 124 LLVMBuilderRef builder = mask->bld->gallivm->builder; 125 126 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 127 if (mask->cond_stack_size == 0) { 128 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 129 } 130 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 131 assert(LLVMTypeOf(val) == mask->int_vec_type); 132 mask->cond_mask = LLVMBuildAnd(builder, 133 mask->cond_mask, 134 val, 135 ""); 136 lp_exec_mask_update(mask); 137} 138 139static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 140{ 141 LLVMBuilderRef builder = mask->bld->gallivm->builder; 142 LLVMValueRef prev_mask; 143 LLVMValueRef inv_mask; 144 145 assert(mask->cond_stack_size); 146 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 147 if (mask->cond_stack_size == 1) { 148 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 149 } 150 151 inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); 152 153 mask->cond_mask = LLVMBuildAnd(builder, 154 inv_mask, 155 prev_mask, ""); 156 lp_exec_mask_update(mask); 157} 158 159static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 160{ 161 assert(mask->cond_stack_size); 162 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 163 lp_exec_mask_update(mask); 164} 165 166static void lp_exec_bgnloop(struct lp_exec_mask *mask) 167{ 168 LLVMBuilderRef builder = mask->bld->gallivm->builder; 169 170 if (mask->loop_stack_size == 0) { 171 assert(mask->loop_block == NULL); 172 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 173 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 174 assert(mask->break_var == NULL); 175 } 176 177 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 178 179 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 180 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 181 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 182 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 183 ++mask->loop_stack_size; 184 185 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); 186 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 187 188 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); 189 190 LLVMBuildBr(builder, mask->loop_block); 191 LLVMPositionBuilderAtEnd(builder, mask->loop_block); 192 193 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, ""); 194 195 lp_exec_mask_update(mask); 196} 197 198static void lp_exec_break(struct lp_exec_mask *mask) 199{ 200 LLVMBuilderRef builder = mask->bld->gallivm->builder; 201 LLVMValueRef exec_mask = LLVMBuildNot(builder, 202 mask->exec_mask, 203 "break"); 204 205 mask->break_mask = LLVMBuildAnd(builder, 206 mask->break_mask, 207 exec_mask, "break_full"); 208 209 lp_exec_mask_update(mask); 210} 211 212static void lp_exec_continue(struct lp_exec_mask *mask) 213{ 214 LLVMBuilderRef builder = mask->bld->gallivm->builder; 215 LLVMValueRef exec_mask = LLVMBuildNot(builder, 216 mask->exec_mask, 217 ""); 218 219 mask->cont_mask = LLVMBuildAnd(builder, 220 mask->cont_mask, 221 exec_mask, ""); 222 223 lp_exec_mask_update(mask); 224} 225 226 227static void lp_exec_endloop(struct gallivm_state *gallivm, 228 struct lp_exec_mask *mask) 229{ 230 LLVMBuilderRef builder = mask->bld->gallivm->builder; 231 LLVMBasicBlockRef endloop; 232 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); 233 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, 234 mask->bld->type.width * 235 mask->bld->type.length); 236 LLVMValueRef i1cond, i2cond, icond, limiter; 237 238 assert(mask->break_mask); 239 240 /* 241 * Restore the cont_mask, but don't pop 242 */ 243 assert(mask->loop_stack_size); 244 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 245 lp_exec_mask_update(mask); 246 247 /* 248 * Unlike the continue mask, the break_mask must be preserved across loop 249 * iterations 250 */ 251 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 252 253 /* Decrement the loop limiter */ 254 limiter = LLVMBuildLoad(builder, mask->loop_limiter, ""); 255 256 limiter = LLVMBuildSub( 257 builder, 258 limiter, 259 LLVMConstInt(int_type, 1, false), 260 ""); 261 262 LLVMBuildStore(builder, limiter, mask->loop_limiter); 263 264 /* i1cond = (mask != 0) */ 265 i1cond = LLVMBuildICmp( 266 builder, 267 LLVMIntNE, 268 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), 269 LLVMConstNull(reg_type), ""); 270 271 /* i2cond = (looplimiter > 0) */ 272 i2cond = LLVMBuildICmp( 273 builder, 274 LLVMIntSGT, 275 limiter, 276 LLVMConstNull(int_type), ""); 277 278 /* if( i1cond && i2cond ) */ 279 icond = LLVMBuildAnd(builder, i1cond, i2cond, ""); 280 281 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); 282 283 LLVMBuildCondBr(builder, 284 icond, mask->loop_block, endloop); 285 286 LLVMPositionBuilderAtEnd(builder, endloop); 287 288 assert(mask->loop_stack_size); 289 --mask->loop_stack_size; 290 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 291 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 292 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 293 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 294 295 lp_exec_mask_update(mask); 296} 297 298/* stores val into an address pointed to by dst. 299 * mask->exec_mask is used to figure out which bits of val 300 * should be stored into the address 301 * (0 means don't store this bit, 1 means do store). 302 */ 303static void lp_exec_mask_store(struct lp_exec_mask *mask, 304 struct lp_build_context *bld_store, 305 LLVMValueRef pred, 306 LLVMValueRef val, 307 LLVMValueRef dst) 308{ 309 LLVMBuilderRef builder = mask->bld->gallivm->builder; 310 311 /* Mix the predicate and execution mask */ 312 if (mask->has_mask) { 313 if (pred) { 314 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 315 } else { 316 pred = mask->exec_mask; 317 } 318 } 319 320 if (pred) { 321 LLVMValueRef real_val, dst_val; 322 323 dst_val = LLVMBuildLoad(builder, dst, ""); 324 real_val = lp_build_select(bld_store, 325 pred, 326 val, dst_val); 327 328 LLVMBuildStore(builder, real_val, dst); 329 } else 330 LLVMBuildStore(builder, val, dst); 331} 332 333static void lp_exec_mask_call(struct lp_exec_mask *mask, 334 int func, 335 int *pc) 336{ 337 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 338 mask->call_stack[mask->call_stack_size].pc = *pc; 339 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 340 mask->call_stack_size++; 341 *pc = func; 342} 343 344static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 345{ 346 LLVMBuilderRef builder = mask->bld->gallivm->builder; 347 LLVMValueRef exec_mask; 348 349 if (mask->call_stack_size == 0) { 350 /* returning from main() */ 351 *pc = -1; 352 return; 353 } 354 exec_mask = LLVMBuildNot(builder, 355 mask->exec_mask, 356 "ret"); 357 358 mask->ret_mask = LLVMBuildAnd(builder, 359 mask->ret_mask, 360 exec_mask, "ret_full"); 361 362 lp_exec_mask_update(mask); 363} 364 365static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 366{ 367} 368 369static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 370{ 371 assert(mask->call_stack_size); 372 mask->call_stack_size--; 373 *pc = mask->call_stack[mask->call_stack_size].pc; 374 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 375 lp_exec_mask_update(mask); 376} 377 378 379/** 380 * Return pointer to a temporary register channel (src or dest). 381 * Note that indirect addressing cannot be handled here. 382 * \param index which temporary register 383 * \param chan which channel of the temp register. 384 */ 385LLVMValueRef 386lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld, 387 unsigned index, 388 unsigned chan) 389{ 390 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 391 assert(chan < 4); 392 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 393 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan); 394 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, ""); 395 } 396 else { 397 return bld->temps[index][chan]; 398 } 399} 400 401/** 402 * Return pointer to a output register channel (src or dest). 403 * Note that indirect addressing cannot be handled here. 404 * \param index which output register 405 * \param chan which channel of the output register. 406 */ 407LLVMValueRef 408lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, 409 unsigned index, 410 unsigned chan) 411{ 412 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 413 assert(chan < 4); 414 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 415 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, 416 index * 4 + chan); 417 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, ""); 418 } 419 else { 420 return bld->outputs[index][chan]; 421 } 422} 423 424/** 425 * Gather vector. 426 * XXX the lp_build_gather() function should be capable of doing this 427 * with a little work. 428 */ 429static LLVMValueRef 430build_gather(struct lp_build_context *bld, 431 LLVMValueRef base_ptr, 432 LLVMValueRef indexes) 433{ 434 LLVMBuilderRef builder = bld->gallivm->builder; 435 LLVMValueRef res = bld->undef; 436 unsigned i; 437 438 /* 439 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 440 */ 441 for (i = 0; i < bld->type.length; i++) { 442 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i); 443 LLVMValueRef index = LLVMBuildExtractElement(builder, 444 indexes, ii, ""); 445 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, 446 &index, 1, "gather_ptr"); 447 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 448 449 res = LLVMBuildInsertElement(builder, res, scalar, ii, ""); 450 } 451 452 return res; 453} 454 455 456/** 457 * Scatter/store vector. 458 */ 459static void 460emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 461 LLVMValueRef base_ptr, 462 LLVMValueRef indexes, 463 LLVMValueRef values, 464 struct lp_exec_mask *mask, 465 LLVMValueRef pred) 466{ 467 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 468 LLVMBuilderRef builder = gallivm->builder; 469 unsigned i; 470 471 /* Mix the predicate and execution mask */ 472 if (mask->has_mask) { 473 if (pred) { 474 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 475 } 476 else { 477 pred = mask->exec_mask; 478 } 479 } 480 481 /* 482 * Loop over elements of index_vec, store scalar value. 483 */ 484 for (i = 0; i < bld->bld_base.base.type.length; i++) { 485 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 486 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 487 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 488 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 489 LLVMValueRef scalar_pred = pred ? 490 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 491 492 if (0) 493 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", 494 ii, val, index, scalar_ptr); 495 496 if (scalar_pred) { 497 LLVMValueRef real_val, dst_val; 498 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 499 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 500 LLVMBuildStore(builder, real_val, scalar_ptr); 501 } 502 else { 503 LLVMBuildStore(builder, val, scalar_ptr); 504 } 505 } 506} 507 508 509/** 510 * Read the current value of the ADDR register, convert the floats to 511 * ints, add the base index and return the vector of offsets. 512 * The offsets will be used to index into the constant buffer or 513 * temporary register file. 514 */ 515static LLVMValueRef 516get_indirect_index(struct lp_build_tgsi_soa_context *bld, 517 unsigned reg_file, unsigned reg_index, 518 const struct tgsi_src_register *indirect_reg) 519{ 520 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 521 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 522 /* always use X component of address register */ 523 unsigned swizzle = indirect_reg->SwizzleX; 524 LLVMValueRef base; 525 LLVMValueRef rel; 526 LLVMValueRef max_index; 527 LLVMValueRef index; 528 529 assert(bld->indirect_files & (1 << reg_file)); 530 531 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index); 532 533 assert(swizzle < 4); 534 rel = LLVMBuildLoad(builder, 535 bld->addr[indirect_reg->Index][swizzle], 536 "load addr reg"); 537 538 index = lp_build_add(uint_bld, base, rel); 539 540 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm, 541 uint_bld->type, 542 bld->bld_base.info->file_max[reg_file]); 543 544 assert(!uint_bld->type.sign); 545 index = lp_build_min(uint_bld, index, max_index); 546 547 return index; 548} 549 550static struct lp_build_context * 551stype_to_fetch(struct lp_build_tgsi_context * bld_base, 552 enum tgsi_opcode_type stype) 553{ 554 struct lp_build_context *bld_fetch; 555 556 switch (stype) { 557 case TGSI_TYPE_FLOAT: 558 case TGSI_TYPE_UNTYPED: 559 bld_fetch = &bld_base->base; 560 break; 561 case TGSI_TYPE_UNSIGNED: 562 bld_fetch = &bld_base->uint_bld; 563 break; 564 case TGSI_TYPE_SIGNED: 565 bld_fetch = &bld_base->int_bld; 566 break; 567 case TGSI_TYPE_VOID: 568 case TGSI_TYPE_DOUBLE: 569 default: 570 assert(0); 571 bld_fetch = NULL; 572 break; 573 } 574 return bld_fetch; 575} 576 577static LLVMValueRef 578emit_fetch_constant( 579 struct lp_build_tgsi_context * bld_base, 580 const struct tgsi_full_src_register * reg, 581 enum tgsi_opcode_type stype, 582 unsigned swizzle) 583{ 584 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 585 struct gallivm_state *gallivm = bld_base->base.gallivm; 586 LLVMBuilderRef builder = gallivm->builder; 587 struct lp_build_context *uint_bld = &bld_base->uint_bld; 588 LLVMValueRef indirect_index = NULL; 589 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 590 591 /* XXX: Handle fetching xyzw components as a vector */ 592 assert(swizzle != ~0); 593 594 if (reg->Register.Indirect) { 595 indirect_index = get_indirect_index(bld, 596 reg->Register.File, 597 reg->Register.Index, 598 ®->Indirect); 599 } 600 601 if (reg->Register.Indirect) { 602 LLVMValueRef swizzle_vec = 603 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle); 604 LLVMValueRef index_vec; /* index into the const buffer */ 605 606 /* index_vec = indirect_index * 4 + swizzle */ 607 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 608 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 609 610 /* Gather values from the constant buffer */ 611 return build_gather(bld_fetch, bld->consts_ptr, index_vec); 612 } 613 else { 614 LLVMValueRef index; /* index into the const buffer */ 615 LLVMValueRef scalar, scalar_ptr; 616 617 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle); 618 619 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, 620 &index, 1, ""); 621 622 if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) { 623 LLVMTypeRef ivtype = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0); 624 LLVMValueRef temp_ptr; 625 temp_ptr = LLVMBuildBitCast(builder, scalar_ptr, ivtype, ""); 626 scalar = LLVMBuildLoad(builder, temp_ptr, ""); 627 } else 628 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 629 630 return lp_build_broadcast_scalar(bld_fetch, scalar); 631 } 632} 633 634static LLVMValueRef 635emit_fetch_immediate( 636 struct lp_build_tgsi_context * bld_base, 637 const struct tgsi_full_src_register * reg, 638 enum tgsi_opcode_type stype, 639 unsigned swizzle) 640{ 641 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 642 LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle]; 643 assert(res); 644 645 if (stype == TGSI_TYPE_UNSIGNED) { 646 res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type); 647 } else if (stype == TGSI_TYPE_SIGNED) { 648 res = LLVMConstBitCast(res, bld_base->int_bld.vec_type); 649 } 650 return res; 651} 652 653static LLVMValueRef 654emit_fetch_input( 655 struct lp_build_tgsi_context * bld_base, 656 const struct tgsi_full_src_register * reg, 657 enum tgsi_opcode_type stype, 658 unsigned swizzle) 659{ 660 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 661 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 662 LLVMBuilderRef builder = gallivm->builder; 663 struct lp_build_context *uint_bld = &bld_base->uint_bld; 664 LLVMValueRef indirect_index = NULL; 665 LLVMValueRef res; 666 667 if (reg->Register.Indirect) { 668 indirect_index = get_indirect_index(bld, 669 reg->Register.File, 670 reg->Register.Index, 671 ®->Indirect); 672 } 673 674 if (reg->Register.Indirect) { 675 LLVMValueRef swizzle_vec = 676 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); 677 LLVMValueRef length_vec = 678 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length); 679 LLVMValueRef index_vec; /* index into the const buffer */ 680 LLVMValueRef inputs_array; 681 LLVMTypeRef float4_ptr_type; 682 683 /* index_vec = (indirect_index * 4 + swizzle) * length */ 684 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 685 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 686 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 687 688 /* cast inputs_array pointer to float* */ 689 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 690 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, 691 float4_ptr_type, ""); 692 693 /* Gather values from the temporary register array */ 694 res = build_gather(&bld_base->base, inputs_array, index_vec); 695 } else { 696 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 697 LLVMValueRef lindex = lp_build_const_int32(gallivm, 698 reg->Register.Index * 4 + swizzle); 699 LLVMValueRef input_ptr = LLVMBuildGEP(builder, 700 bld->inputs_array, &lindex, 1, ""); 701 res = LLVMBuildLoad(builder, input_ptr, ""); 702 } 703 else { 704 res = bld->inputs[reg->Register.Index][swizzle]; 705 } 706 } 707 708 assert(res); 709 710 if (stype == TGSI_TYPE_UNSIGNED) { 711 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 712 } else if (stype == TGSI_TYPE_SIGNED) { 713 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 714 } 715 716 return res; 717} 718 719static LLVMValueRef 720emit_fetch_temporary( 721 struct lp_build_tgsi_context * bld_base, 722 const struct tgsi_full_src_register * reg, 723 enum tgsi_opcode_type stype, 724 unsigned swizzle) 725{ 726 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 727 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 728 LLVMBuilderRef builder = gallivm->builder; 729 struct lp_build_context *uint_bld = &bld_base->uint_bld; 730 LLVMValueRef indirect_index = NULL; 731 LLVMValueRef res; 732 733 if (reg->Register.Indirect) { 734 indirect_index = get_indirect_index(bld, 735 reg->Register.File, 736 reg->Register.Index, 737 ®->Indirect); 738 } 739 740 if (reg->Register.Indirect) { 741 LLVMValueRef swizzle_vec = 742 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle); 743 LLVMValueRef length_vec = 744 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, 745 bld->bld_base.base.type.length); 746 LLVMValueRef index_vec; /* index into the const buffer */ 747 LLVMValueRef temps_array; 748 LLVMTypeRef float4_ptr_type; 749 750 /* index_vec = (indirect_index * 4 + swizzle) * length */ 751 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 752 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 753 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 754 755 /* cast temps_array pointer to float* */ 756 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0); 757 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 758 float4_ptr_type, ""); 759 760 /* Gather values from the temporary register array */ 761 res = build_gather(&bld_base->base, temps_array, index_vec); 762 } 763 else { 764 LLVMValueRef temp_ptr; 765 if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) { 766 LLVMTypeRef itype = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0); 767 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, 768 swizzle); 769 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, itype, ""); 770 } else 771 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); 772 res = LLVMBuildLoad(builder, temp_ptr, ""); 773 if (!res) 774 return bld->bld_base.base.undef; 775 } 776 777 return res; 778} 779 780static LLVMValueRef 781emit_fetch_system_value( 782 struct lp_build_tgsi_context * bld_base, 783 const struct tgsi_full_src_register * reg, 784 enum tgsi_opcode_type stype, 785 unsigned swizzle) 786{ 787 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 788 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 789 LLVMBuilderRef builder = gallivm->builder; 790 LLVMValueRef index; /* index into the system value array */ 791 LLVMValueRef scalar, scalar_ptr; 792 793 assert(!reg->Register.Indirect); 794 795 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); 796 797 scalar_ptr = LLVMBuildGEP(builder, bld->system_values_array, &index, 1, ""); 798 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 799 800 return lp_build_broadcast_scalar(&bld->bld_base.base, scalar); 801} 802 803/** 804 * Register fetch with derivatives. 805 */ 806static void 807emit_fetch_deriv( 808 struct lp_build_tgsi_soa_context *bld, 809 LLVMValueRef src, 810 LLVMValueRef *res, 811 LLVMValueRef *ddx, 812 LLVMValueRef *ddy) 813{ 814 if(res) 815 *res = src; 816 817 /* TODO: use interpolation coeffs for inputs */ 818 819 if(ddx) 820 *ddx = lp_build_ddx(&bld->bld_base.base, src); 821 822 if(ddy) 823 *ddy = lp_build_ddy(&bld->bld_base.base, src); 824} 825 826 827/** 828 * Predicate. 829 */ 830static void 831emit_fetch_predicate( 832 struct lp_build_tgsi_soa_context *bld, 833 const struct tgsi_full_instruction *inst, 834 LLVMValueRef *pred) 835{ 836 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 837 unsigned index; 838 unsigned char swizzles[4]; 839 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 840 LLVMValueRef value; 841 unsigned chan; 842 843 if (!inst->Instruction.Predicate) { 844 TGSI_FOR_EACH_CHANNEL( chan ) { 845 pred[chan] = NULL; 846 } 847 return; 848 } 849 850 swizzles[0] = inst->Predicate.SwizzleX; 851 swizzles[1] = inst->Predicate.SwizzleY; 852 swizzles[2] = inst->Predicate.SwizzleZ; 853 swizzles[3] = inst->Predicate.SwizzleW; 854 855 index = inst->Predicate.Index; 856 assert(index < LP_MAX_TGSI_PREDS); 857 858 TGSI_FOR_EACH_CHANNEL( chan ) { 859 unsigned swizzle = swizzles[chan]; 860 861 /* 862 * Only fetch the predicate register channels that are actually listed 863 * in the swizzles 864 */ 865 if (!unswizzled[swizzle]) { 866 value = LLVMBuildLoad(builder, 867 bld->preds[index][swizzle], ""); 868 869 /* 870 * Convert the value to an integer mask. 871 * 872 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 873 * is needlessly causing two comparisons due to storing the intermediate 874 * result as float vector instead of an integer mask vector. 875 */ 876 value = lp_build_compare(bld->bld_base.base.gallivm, 877 bld->bld_base.base.type, 878 PIPE_FUNC_NOTEQUAL, 879 value, 880 bld->bld_base.base.zero); 881 if (inst->Predicate.Negate) { 882 value = LLVMBuildNot(builder, value, ""); 883 } 884 885 unswizzled[swizzle] = value; 886 } else { 887 value = unswizzled[swizzle]; 888 } 889 890 pred[chan] = value; 891 } 892} 893 894/** 895 * Register store. 896 */ 897static void 898emit_store_chan( 899 struct lp_build_tgsi_context *bld_base, 900 const struct tgsi_full_instruction *inst, 901 unsigned index, 902 unsigned chan_index, 903 LLVMValueRef pred, 904 LLVMValueRef value) 905{ 906 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 907 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 908 LLVMBuilderRef builder = gallivm->builder; 909 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 910 struct lp_build_context *uint_bld = &bld_base->uint_bld; 911 LLVMValueRef indirect_index = NULL; 912 struct lp_build_context *bld_store; 913 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); 914 915 switch (dtype) { 916 default: 917 case TGSI_TYPE_FLOAT: 918 case TGSI_TYPE_UNTYPED: 919 bld_store = &bld_base->base; 920 break; 921 case TGSI_TYPE_UNSIGNED: 922 bld_store = &bld_base->uint_bld; 923 break; 924 case TGSI_TYPE_SIGNED: 925 bld_store = &bld_base->int_bld; 926 break; 927 case TGSI_TYPE_DOUBLE: 928 case TGSI_TYPE_VOID: 929 assert(0); 930 bld_store = NULL; 931 break; 932 } 933 934 switch( inst->Instruction.Saturate ) { 935 case TGSI_SAT_NONE: 936 break; 937 938 case TGSI_SAT_ZERO_ONE: 939 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero); 940 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 941 break; 942 943 case TGSI_SAT_MINUS_PLUS_ONE: 944 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0)); 945 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 946 break; 947 948 default: 949 assert(0); 950 } 951 952 if (reg->Register.Indirect) { 953 indirect_index = get_indirect_index(bld, 954 reg->Register.File, 955 reg->Register.Index, 956 ®->Indirect); 957 } else { 958 assert(reg->Register.Index <= 959 bld->bld_base.info->file_max[reg->Register.File]); 960 } 961 962 switch( reg->Register.File ) { 963 case TGSI_FILE_OUTPUT: 964 if (reg->Register.Indirect) { 965 LLVMValueRef chan_vec = 966 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 967 LLVMValueRef length_vec = 968 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length); 969 LLVMValueRef index_vec; /* indexes into the temp registers */ 970 LLVMValueRef outputs_array; 971 LLVMValueRef pixel_offsets; 972 LLVMTypeRef float_ptr_type; 973 int i; 974 975 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 976 pixel_offsets = uint_bld->undef; 977 for (i = 0; i < bld->bld_base.base.type.length; i++) { 978 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 979 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 980 ii, ii, ""); 981 } 982 983 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 984 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 985 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 986 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 987 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 988 989 float_ptr_type = 990 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 991 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, 992 float_ptr_type, ""); 993 994 /* Scatter store values into temp registers */ 995 emit_mask_scatter(bld, outputs_array, index_vec, value, 996 &bld->exec_mask, pred); 997 } 998 else { 999 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index, 1000 chan_index); 1001 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr); 1002 } 1003 break; 1004 1005 case TGSI_FILE_TEMPORARY: 1006 if (reg->Register.Indirect) { 1007 LLVMValueRef chan_vec = 1008 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 1009 LLVMValueRef length_vec = 1010 lp_build_const_int_vec(gallivm, uint_bld->type, 1011 bld->bld_base.base.type.length); 1012 LLVMValueRef index_vec; /* indexes into the temp registers */ 1013 LLVMValueRef temps_array; 1014 LLVMValueRef pixel_offsets; 1015 LLVMTypeRef float_ptr_type; 1016 int i; 1017 1018 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 1019 pixel_offsets = uint_bld->undef; 1020 for (i = 0; i < bld->bld_base.base.type.length; i++) { 1021 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 1022 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 1023 ii, ii, ""); 1024 } 1025 1026 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 1027 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 1028 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 1029 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 1030 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 1031 1032 float_ptr_type = 1033 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1034 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 1035 float_ptr_type, ""); 1036 1037 /* Scatter store values into temp registers */ 1038 emit_mask_scatter(bld, temps_array, index_vec, value, 1039 &bld->exec_mask, pred); 1040 } 1041 else { 1042 LLVMValueRef temp_ptr; 1043 1044 switch (dtype) { 1045 case TGSI_TYPE_UNSIGNED: 1046 case TGSI_TYPE_SIGNED: { 1047 LLVMTypeRef itype = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4); 1048 LLVMTypeRef ivtype = LLVMPointerType(itype, 0); 1049 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, 1050 chan_index); 1051 LLVMValueRef temp_value_ptr; 1052 1053 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, ivtype, ""); 1054 temp_value_ptr = LLVMBuildBitCast(builder, value, itype, ""); 1055 value = temp_value_ptr; 1056 break; 1057 } 1058 default: 1059 case TGSI_TYPE_FLOAT: 1060 case TGSI_TYPE_UNTYPED: 1061 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, 1062 chan_index); 1063 break; 1064 } 1065 1066 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr); 1067 } 1068 break; 1069 1070 case TGSI_FILE_ADDRESS: 1071 assert(dtype == TGSI_TYPE_SIGNED); 1072 assert(LLVMTypeOf(value) == bld_base->base.int_vec_type); 1073 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, 1074 bld->addr[reg->Register.Index][chan_index]); 1075 break; 1076 1077 case TGSI_FILE_PREDICATE: 1078 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, 1079 bld->preds[reg->Register.Index][chan_index]); 1080 break; 1081 1082 default: 1083 assert( 0 ); 1084 } 1085} 1086 1087static void 1088emit_store( 1089 struct lp_build_tgsi_context * bld_base, 1090 const struct tgsi_full_instruction * inst, 1091 const struct tgsi_opcode_info * info, 1092 LLVMValueRef dst[4]) 1093 1094{ 1095 unsigned chan_index; 1096 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1097 1098 if(info->num_dst) { 1099 LLVMValueRef pred[TGSI_NUM_CHANNELS]; 1100 1101 emit_fetch_predicate( bld, inst, pred ); 1102 1103 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1104 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]); 1105 } 1106 } 1107} 1108 1109/** 1110 * High-level instruction translators. 1111 */ 1112 1113static void 1114emit_tex( struct lp_build_tgsi_soa_context *bld, 1115 const struct tgsi_full_instruction *inst, 1116 enum lp_build_tex_modifier modifier, 1117 LLVMValueRef *texel) 1118{ 1119 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1120 unsigned unit; 1121 LLVMValueRef lod_bias, explicit_lod; 1122 LLVMValueRef oow = NULL; 1123 LLVMValueRef coords[3]; 1124 LLVMValueRef ddx[3]; 1125 LLVMValueRef ddy[3]; 1126 unsigned num_coords; 1127 unsigned i; 1128 1129 if (!bld->sampler) { 1130 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 1131 for (i = 0; i < 4; i++) { 1132 texel[i] = bld->bld_base.base.undef; 1133 } 1134 return; 1135 } 1136 1137 switch (inst->Texture.Texture) { 1138 case TGSI_TEXTURE_1D: 1139 num_coords = 1; 1140 break; 1141 case TGSI_TEXTURE_1D_ARRAY: 1142 case TGSI_TEXTURE_2D: 1143 case TGSI_TEXTURE_RECT: 1144 num_coords = 2; 1145 break; 1146 case TGSI_TEXTURE_SHADOW1D: 1147 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1148 case TGSI_TEXTURE_SHADOW2D: 1149 case TGSI_TEXTURE_SHADOWRECT: 1150 case TGSI_TEXTURE_2D_ARRAY: 1151 case TGSI_TEXTURE_3D: 1152 case TGSI_TEXTURE_CUBE: 1153 num_coords = 3; 1154 break; 1155 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1156 num_coords = 4; 1157 break; 1158 default: 1159 assert(0); 1160 return; 1161 } 1162 1163 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 1164 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); 1165 explicit_lod = NULL; 1166 } 1167 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 1168 lod_bias = NULL; 1169 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); 1170 } 1171 else { 1172 lod_bias = NULL; 1173 explicit_lod = NULL; 1174 } 1175 1176 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 1177 oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); 1178 oow = lp_build_rcp(&bld->bld_base.base, oow); 1179 } 1180 1181 for (i = 0; i < num_coords; i++) { 1182 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i ); 1183 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 1184 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow); 1185 } 1186 for (i = num_coords; i < 3; i++) { 1187 coords[i] = bld->bld_base.base.undef; 1188 } 1189 1190 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 1191 LLVMValueRef index0 = lp_build_const_int32(bld->bld_base.base.gallivm, 0); 1192 for (i = 0; i < num_coords; i++) { 1193 LLVMValueRef src1 = lp_build_emit_fetch( &bld->bld_base, inst, 1, i ); 1194 LLVMValueRef src2 = lp_build_emit_fetch( &bld->bld_base, inst, 2, i ); 1195 ddx[i] = LLVMBuildExtractElement(builder, src1, index0, ""); 1196 ddy[i] = LLVMBuildExtractElement(builder, src2, index0, ""); 1197 } 1198 unit = inst->Src[3].Register.Index; 1199 } else { 1200 for (i = 0; i < num_coords; i++) { 1201 ddx[i] = lp_build_scalar_ddx( &bld->bld_base.base, coords[i] ); 1202 ddy[i] = lp_build_scalar_ddy( &bld->bld_base.base, coords[i] ); 1203 } 1204 unit = inst->Src[1].Register.Index; 1205 } 1206 for (i = num_coords; i < 3; i++) { 1207 ddx[i] = LLVMGetUndef(bld->bld_base.base.elem_type); 1208 ddy[i] = LLVMGetUndef(bld->bld_base.base.elem_type); 1209 } 1210 1211 bld->sampler->emit_fetch_texel(bld->sampler, 1212 bld->bld_base.base.gallivm, 1213 bld->bld_base.base.type, 1214 unit, num_coords, coords, 1215 ddx, ddy, 1216 lod_bias, explicit_lod, 1217 texel); 1218} 1219 1220static void 1221emit_txq( struct lp_build_tgsi_soa_context *bld, 1222 const struct tgsi_full_instruction *inst, 1223 LLVMValueRef *sizes_out) 1224{ 1225 LLVMValueRef explicit_lod; 1226 unsigned num_coords, has_lod; 1227 unsigned i; 1228 1229 switch (inst->Texture.Texture) { 1230 case TGSI_TEXTURE_1D: 1231 case TGSI_TEXTURE_SHADOW1D: 1232 case TGSI_TEXTURE_SHADOW2D: 1233 case TGSI_TEXTURE_SHADOWCUBE: 1234 num_coords = 1; 1235 has_lod = 1; 1236 break; 1237 case TGSI_TEXTURE_2D: 1238 case TGSI_TEXTURE_CUBE: 1239 case TGSI_TEXTURE_1D_ARRAY: 1240 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1241 num_coords = 2; 1242 has_lod = 1; 1243 break; 1244 case TGSI_TEXTURE_3D: 1245// case TGSI_TEXTURE_CUBE_ARRAY: 1246// case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 1247 case TGSI_TEXTURE_2D_ARRAY: 1248 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1249 num_coords = 3; 1250 has_lod = 1; 1251 break; 1252 1253 case TGSI_TEXTURE_BUFFER: 1254 num_coords = 1; 1255 has_lod = 0; 1256 break; 1257 1258 case TGSI_TEXTURE_RECT: 1259 case TGSI_TEXTURE_SHADOWRECT: 1260// case TGSI_TEXTURE_2D_MS: 1261 num_coords = 2; 1262 has_lod = 0; 1263 break; 1264 1265// case TGSI_TEXTURE_2D_MS_ARRAY: 1266// num_coords = 3; 1267// has_lod = 0; 1268// break; 1269 1270 default: 1271 assert(0); 1272 return; 1273 } 1274 1275 if (!bld->sampler) { 1276 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n"); 1277 for (i = 0; i < num_coords; i++) 1278 sizes_out[i] = bld->bld_base.base.undef; 1279 return; 1280 } 1281 1282 if (has_lod) 1283 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 2 ); 1284 else 1285 explicit_lod = NULL; 1286 1287 bld->sampler->emit_size_query(bld->sampler, 1288 bld->bld_base.base.gallivm, 1289 inst->Src[1].Register.Index, 1290 explicit_lod, 1291 sizes_out); 1292} 1293 1294static boolean 1295near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 1296 int pc) 1297{ 1298 int i; 1299 1300 for (i = 0; i < 5; i++) { 1301 unsigned opcode; 1302 1303 if (pc + i >= bld->bld_base.info->num_instructions) 1304 return TRUE; 1305 1306 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode; 1307 1308 if (opcode == TGSI_OPCODE_END) 1309 return TRUE; 1310 1311 if (opcode == TGSI_OPCODE_TEX || 1312 opcode == TGSI_OPCODE_TXP || 1313 opcode == TGSI_OPCODE_TXD || 1314 opcode == TGSI_OPCODE_TXB || 1315 opcode == TGSI_OPCODE_TXL || 1316 opcode == TGSI_OPCODE_TXF || 1317 opcode == TGSI_OPCODE_TXQ || 1318 opcode == TGSI_OPCODE_CAL || 1319 opcode == TGSI_OPCODE_CALLNZ || 1320 opcode == TGSI_OPCODE_IF || 1321 opcode == TGSI_OPCODE_IFC || 1322 opcode == TGSI_OPCODE_BGNLOOP || 1323 opcode == TGSI_OPCODE_SWITCH) 1324 return FALSE; 1325 } 1326 1327 return TRUE; 1328} 1329 1330 1331 1332/** 1333 * Kill fragment if any of the src register values are negative. 1334 */ 1335static void 1336emit_kil( 1337 struct lp_build_tgsi_soa_context *bld, 1338 const struct tgsi_full_instruction *inst, 1339 int pc) 1340{ 1341 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1342 const struct tgsi_full_src_register *reg = &inst->Src[0]; 1343 LLVMValueRef terms[TGSI_NUM_CHANNELS]; 1344 LLVMValueRef mask; 1345 unsigned chan_index; 1346 1347 memset(&terms, 0, sizeof terms); 1348 1349 TGSI_FOR_EACH_CHANNEL( chan_index ) { 1350 unsigned swizzle; 1351 1352 /* Unswizzle channel */ 1353 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1354 1355 /* Check if the component has not been already tested. */ 1356 assert(swizzle < TGSI_NUM_CHANNELS); 1357 if( !terms[swizzle] ) 1358 /* TODO: change the comparison operator instead of setting the sign */ 1359 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index ); 1360 } 1361 1362 mask = NULL; 1363 TGSI_FOR_EACH_CHANNEL( chan_index ) { 1364 if(terms[chan_index]) { 1365 LLVMValueRef chan_mask; 1366 1367 /* 1368 * If term < 0 then mask = 0 else mask = ~0. 1369 */ 1370 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero); 1371 1372 if(mask) 1373 mask = LLVMBuildAnd(builder, mask, chan_mask, ""); 1374 else 1375 mask = chan_mask; 1376 } 1377 } 1378 1379 if(mask) { 1380 lp_build_mask_update(bld->mask, mask); 1381 1382 if (!near_end_of_shader(bld, pc)) 1383 lp_build_mask_check(bld->mask); 1384 } 1385} 1386 1387 1388/** 1389 * Predicated fragment kill. 1390 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 1391 * The only predication is the execution mask which will apply if 1392 * we're inside a loop or conditional. 1393 */ 1394static void 1395emit_kilp(struct lp_build_tgsi_soa_context *bld, 1396 int pc) 1397{ 1398 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1399 LLVMValueRef mask; 1400 1401 /* For those channels which are "alive", disable fragment shader 1402 * execution. 1403 */ 1404 if (bld->exec_mask.has_mask) { 1405 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 1406 } 1407 else { 1408 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type); 1409 mask = zero; 1410 } 1411 1412 lp_build_mask_update(bld->mask, mask); 1413 1414 if (!near_end_of_shader(bld, pc)) 1415 lp_build_mask_check(bld->mask); 1416} 1417 1418 1419/** 1420 * Emit code which will dump the value of all the temporary registers 1421 * to stdout. 1422 */ 1423static void 1424emit_dump_temps(struct lp_build_tgsi_soa_context *bld) 1425{ 1426 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1427 LLVMBuilderRef builder = gallivm->builder; 1428 LLVMValueRef temp_ptr; 1429 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); 1430 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1); 1431 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); 1432 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); 1433 int index; 1434 int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY]; 1435 1436 for (index = 0; index < n; index++) { 1437 LLVMValueRef idx = lp_build_const_int32(gallivm, index); 1438 LLVMValueRef v[4][4], res; 1439 int chan; 1440 1441 lp_build_printf(gallivm, "TEMP[%d]:\n", idx); 1442 1443 for (chan = 0; chan < 4; chan++) { 1444 temp_ptr = lp_get_temp_ptr_soa(bld, index, chan); 1445 res = LLVMBuildLoad(builder, temp_ptr, ""); 1446 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); 1447 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); 1448 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); 1449 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); 1450 } 1451 1452 lp_build_printf(gallivm, " X: %f %f %f %f\n", 1453 v[0][0], v[0][1], v[0][2], v[0][3]); 1454 lp_build_printf(gallivm, " Y: %f %f %f %f\n", 1455 v[1][0], v[1][1], v[1][2], v[1][3]); 1456 lp_build_printf(gallivm, " Z: %f %f %f %f\n", 1457 v[2][0], v[2][1], v[2][2], v[2][3]); 1458 lp_build_printf(gallivm, " W: %f %f %f %f\n", 1459 v[3][0], v[3][1], v[3][2], v[3][3]); 1460 } 1461} 1462 1463 1464 1465void 1466lp_emit_declaration_soa( 1467 struct lp_build_tgsi_context *bld_base, 1468 const struct tgsi_full_declaration *decl) 1469{ 1470 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 1471 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1472 LLVMTypeRef vec_type = bld->bld_base.base.vec_type; 1473 const unsigned first = decl->Range.First; 1474 const unsigned last = decl->Range.Last; 1475 unsigned idx, i; 1476 1477 for (idx = first; idx <= last; ++idx) { 1478 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); 1479 switch (decl->Declaration.File) { 1480 case TGSI_FILE_TEMPORARY: 1481 assert(idx < LP_MAX_TGSI_TEMPS); 1482 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 1483 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 1484 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); 1485 } 1486 break; 1487 1488 case TGSI_FILE_OUTPUT: 1489 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 1490 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 1491 bld->outputs[idx][i] = lp_build_alloca(gallivm, 1492 vec_type, "output"); 1493 } 1494 break; 1495 1496 case TGSI_FILE_ADDRESS: 1497 /* ADDR registers are the only allocated with an integer LLVM IR type, 1498 * as they are guaranteed to always have integers. 1499 * XXX: Not sure if this exception is worthwhile (or the whole idea of 1500 * an ADDR register for that matter). 1501 */ 1502 assert(idx < LP_MAX_TGSI_ADDRS); 1503 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 1504 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr"); 1505 break; 1506 1507 case TGSI_FILE_PREDICATE: 1508 assert(idx < LP_MAX_TGSI_PREDS); 1509 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 1510 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, 1511 "predicate"); 1512 break; 1513 1514 default: 1515 /* don't need to declare other vars */ 1516 break; 1517 } 1518 } 1519} 1520 1521 1522void lp_emit_immediate_soa( 1523 struct lp_build_tgsi_context *bld_base, 1524 const struct tgsi_full_immediate *imm) 1525{ 1526 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 1527 struct gallivm_state * gallivm = bld_base->base.gallivm; 1528 1529 /* simply copy the immediate values into the next immediates[] slot */ 1530 unsigned i; 1531 const uint size = imm->Immediate.NrTokens - 1; 1532 assert(size <= 4); 1533 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES); 1534 switch (imm->Immediate.DataType) { 1535 case TGSI_IMM_FLOAT32: 1536 for( i = 0; i < size; ++i ) 1537 bld->immediates[bld->num_immediates][i] = 1538 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); 1539 1540 break; 1541 case TGSI_IMM_UINT32: 1542 for( i = 0; i < size; ++i ) { 1543 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint); 1544 bld->immediates[bld->num_immediates][i] = 1545 LLVMConstBitCast(tmp, bld_base->base.vec_type); 1546 } 1547 1548 break; 1549 case TGSI_IMM_INT32: 1550 for( i = 0; i < size; ++i ) { 1551 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int); 1552 bld->immediates[bld->num_immediates][i] = 1553 LLVMConstBitCast(tmp, bld_base->base.vec_type); 1554 } 1555 1556 break; 1557 } 1558 for( i = size; i < 4; ++i ) 1559 bld->immediates[bld->num_immediates][i] = bld_base->base.undef; 1560 1561 bld->num_immediates++; 1562} 1563 1564static void 1565ddx_emit( 1566 const struct lp_build_tgsi_action * action, 1567 struct lp_build_tgsi_context * bld_base, 1568 struct lp_build_emit_data * emit_data) 1569{ 1570 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1571 1572 emit_fetch_deriv(bld, emit_data->args[0], NULL, 1573 &emit_data->output[emit_data->chan], NULL); 1574} 1575 1576static void 1577ddy_emit( 1578 const struct lp_build_tgsi_action * action, 1579 struct lp_build_tgsi_context * bld_base, 1580 struct lp_build_emit_data * emit_data) 1581{ 1582 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1583 1584 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL, 1585 &emit_data->output[emit_data->chan]); 1586} 1587 1588static void 1589kilp_emit( 1590 const struct lp_build_tgsi_action * action, 1591 struct lp_build_tgsi_context * bld_base, 1592 struct lp_build_emit_data * emit_data) 1593{ 1594 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1595 1596 emit_kilp(bld, bld_base->pc - 1); 1597} 1598 1599static void 1600kil_emit( 1601 const struct lp_build_tgsi_action * action, 1602 struct lp_build_tgsi_context * bld_base, 1603 struct lp_build_emit_data * emit_data) 1604{ 1605 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1606 1607 emit_kil(bld, emit_data->inst, bld_base->pc - 1); 1608} 1609 1610static void 1611tex_emit( 1612 const struct lp_build_tgsi_action * action, 1613 struct lp_build_tgsi_context * bld_base, 1614 struct lp_build_emit_data * emit_data) 1615{ 1616 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1617 1618 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output); 1619} 1620 1621static void 1622txb_emit( 1623 const struct lp_build_tgsi_action * action, 1624 struct lp_build_tgsi_context * bld_base, 1625 struct lp_build_emit_data * emit_data) 1626{ 1627 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1628 1629 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 1630 emit_data->output); 1631} 1632 1633static void 1634txd_emit( 1635 const struct lp_build_tgsi_action * action, 1636 struct lp_build_tgsi_context * bld_base, 1637 struct lp_build_emit_data * emit_data) 1638{ 1639 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1640 1641 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 1642 emit_data->output); 1643} 1644 1645static void 1646txl_emit( 1647 const struct lp_build_tgsi_action * action, 1648 struct lp_build_tgsi_context * bld_base, 1649 struct lp_build_emit_data * emit_data) 1650{ 1651 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1652 1653 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 1654 emit_data->output); 1655} 1656 1657static void 1658txp_emit( 1659 const struct lp_build_tgsi_action * action, 1660 struct lp_build_tgsi_context * bld_base, 1661 struct lp_build_emit_data * emit_data) 1662{ 1663 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1664 1665 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED, 1666 emit_data->output); 1667} 1668 1669static void 1670txq_emit( 1671 const struct lp_build_tgsi_action * action, 1672 struct lp_build_tgsi_context * bld_base, 1673 struct lp_build_emit_data * emit_data) 1674{ 1675 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1676 1677 emit_txq(bld, emit_data->inst, emit_data->output); 1678} 1679 1680static void 1681cal_emit( 1682 const struct lp_build_tgsi_action * action, 1683 struct lp_build_tgsi_context * bld_base, 1684 struct lp_build_emit_data * emit_data) 1685{ 1686 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1687 1688 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label, 1689 &bld_base->pc); 1690} 1691 1692static void 1693ret_emit( 1694 const struct lp_build_tgsi_action * action, 1695 struct lp_build_tgsi_context * bld_base, 1696 struct lp_build_emit_data * emit_data) 1697{ 1698 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1699 1700 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc); 1701} 1702 1703static void 1704brk_emit( 1705 const struct lp_build_tgsi_action * action, 1706 struct lp_build_tgsi_context * bld_base, 1707 struct lp_build_emit_data * emit_data) 1708{ 1709 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1710 1711 lp_exec_break(&bld->exec_mask); 1712} 1713 1714static void 1715if_emit( 1716 const struct lp_build_tgsi_action * action, 1717 struct lp_build_tgsi_context * bld_base, 1718 struct lp_build_emit_data * emit_data) 1719{ 1720 LLVMValueRef tmp; 1721 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1722 1723 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL, 1724 emit_data->args[0], bld->bld_base.base.zero); 1725 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 1726} 1727 1728static void 1729bgnloop_emit( 1730 const struct lp_build_tgsi_action * action, 1731 struct lp_build_tgsi_context * bld_base, 1732 struct lp_build_emit_data * emit_data) 1733{ 1734 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1735 1736 lp_exec_bgnloop(&bld->exec_mask); 1737} 1738 1739static void 1740bgnsub_emit( 1741 const struct lp_build_tgsi_action * action, 1742 struct lp_build_tgsi_context * bld_base, 1743 struct lp_build_emit_data * emit_data) 1744{ 1745 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1746 1747 lp_exec_mask_bgnsub(&bld->exec_mask); 1748} 1749 1750static void 1751else_emit( 1752 const struct lp_build_tgsi_action * action, 1753 struct lp_build_tgsi_context * bld_base, 1754 struct lp_build_emit_data * emit_data) 1755{ 1756 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1757 1758 lp_exec_mask_cond_invert(&bld->exec_mask); 1759} 1760 1761static void 1762endif_emit( 1763 const struct lp_build_tgsi_action * action, 1764 struct lp_build_tgsi_context * bld_base, 1765 struct lp_build_emit_data * emit_data) 1766{ 1767 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1768 1769 lp_exec_mask_cond_pop(&bld->exec_mask); 1770} 1771 1772static void 1773endloop_emit( 1774 const struct lp_build_tgsi_action * action, 1775 struct lp_build_tgsi_context * bld_base, 1776 struct lp_build_emit_data * emit_data) 1777{ 1778 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1779 1780 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask); 1781} 1782 1783static void 1784endsub_emit( 1785 const struct lp_build_tgsi_action * action, 1786 struct lp_build_tgsi_context * bld_base, 1787 struct lp_build_emit_data * emit_data) 1788{ 1789 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1790 1791 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc); 1792} 1793 1794static void 1795cont_emit( 1796 const struct lp_build_tgsi_action * action, 1797 struct lp_build_tgsi_context * bld_base, 1798 struct lp_build_emit_data * emit_data) 1799{ 1800 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1801 1802 lp_exec_continue(&bld->exec_mask); 1803} 1804 1805/* XXX: Refactor and move it to lp_bld_tgsi_action.c 1806 * 1807 * XXX: What do the comments about xmm registers mean? Maybe they are left over 1808 * from old code, but there is no garauntee that LLVM will use those registers 1809 * for this code. 1810 * 1811 * XXX: There should be no calls to lp_build_emit_fetch in this function. This 1812 * should be handled by the emit_data->fetch_args function. */ 1813static void 1814nrm_emit( 1815 const struct lp_build_tgsi_action * action, 1816 struct lp_build_tgsi_context * bld_base, 1817 struct lp_build_emit_data * emit_data) 1818{ 1819 LLVMValueRef tmp0, tmp1; 1820 LLVMValueRef tmp4 = NULL; 1821 LLVMValueRef tmp5 = NULL; 1822 LLVMValueRef tmp6 = NULL; 1823 LLVMValueRef tmp7 = NULL; 1824 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1825 1826 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1827 1828 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) || 1829 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) || 1830 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) || 1831 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) { 1832 1833 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1834 1835 /* xmm4 = src.x */ 1836 /* xmm0 = src.x * src.x */ 1837 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X); 1838 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) { 1839 tmp4 = tmp0; 1840 } 1841 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0); 1842 1843 /* xmm5 = src.y */ 1844 /* xmm0 = xmm0 + src.y * src.y */ 1845 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y); 1846 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) { 1847 tmp5 = tmp1; 1848 } 1849 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1); 1850 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1); 1851 1852 /* xmm6 = src.z */ 1853 /* xmm0 = xmm0 + src.z * src.z */ 1854 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z); 1855 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) { 1856 tmp6 = tmp1; 1857 } 1858 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1); 1859 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1); 1860 1861 if (dims == 4) { 1862 /* xmm7 = src.w */ 1863 /* xmm0 = xmm0 + src.w * src.w */ 1864 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W); 1865 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) { 1866 tmp7 = tmp1; 1867 } 1868 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1); 1869 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1); 1870 } 1871 /* xmm1 = 1 / sqrt(xmm0) */ 1872 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0); 1873 /* dst.x = xmm1 * src.x */ 1874 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) { 1875 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1); 1876 } 1877 /* dst.y = xmm1 * src.y */ 1878 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) { 1879 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1); 1880 } 1881 1882 /* dst.z = xmm1 * src.z */ 1883 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) { 1884 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1); 1885 } 1886 /* dst.w = xmm1 * src.w */ 1887 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) { 1888 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1); 1889 } 1890 } 1891 1892 /* dst.w = 1.0 */ 1893 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) { 1894 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one; 1895 } 1896} 1897 1898static void emit_prologue(struct lp_build_tgsi_context * bld_base) 1899{ 1900 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1901 struct gallivm_state * gallivm = bld_base->base.gallivm; 1902 1903 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 1904 LLVMValueRef array_size = 1905 lp_build_const_int32(gallivm, 1906 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); 1907 bld->temps_array = lp_build_array_alloca(gallivm, 1908 bld_base->base.vec_type, array_size, 1909 "temp_array"); 1910 } 1911 1912 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 1913 LLVMValueRef array_size = 1914 lp_build_const_int32(gallivm, 1915 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); 1916 bld->outputs_array = lp_build_array_alloca(gallivm, 1917 bld_base->base.vec_type, array_size, 1918 "output_array"); 1919 } 1920 1921 /* If we have indirect addressing in inputs we need to copy them into 1922 * our alloca array to be able to iterate over them */ 1923 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 1924 unsigned index, chan; 1925 LLVMTypeRef vec_type = bld_base->base.vec_type; 1926 LLVMValueRef array_size = lp_build_const_int32(gallivm, 1927 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4); 1928 bld->inputs_array = lp_build_array_alloca(gallivm, 1929 vec_type, array_size, 1930 "input_array"); 1931 1932 assert(bld_base->info->num_inputs 1933 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1); 1934 1935 for (index = 0; index < bld_base->info->num_inputs; ++index) { 1936 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 1937 LLVMValueRef lindex = 1938 lp_build_const_int32(gallivm, index * 4 + chan); 1939 LLVMValueRef input_ptr = 1940 LLVMBuildGEP(gallivm->builder, bld->inputs_array, 1941 &lindex, 1, ""); 1942 LLVMValueRef value = bld->inputs[index][chan]; 1943 if (value) 1944 LLVMBuildStore(gallivm->builder, value, input_ptr); 1945 } 1946 } 1947 } 1948} 1949 1950static void emit_epilogue(struct lp_build_tgsi_context * bld_base) 1951{ 1952 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1953 1954 if (0) { 1955 /* for debugging */ 1956 emit_dump_temps(bld); 1957 } 1958 1959 /* If we have indirect addressing in outputs we need to copy our alloca array 1960 * to the outputs slots specified by the called */ 1961 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 1962 unsigned index, chan; 1963 assert(bld_base->info->num_outputs <= 1964 bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1); 1965 for (index = 0; index < bld_base->info->num_outputs; ++index) { 1966 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 1967 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); 1968 } 1969 } 1970 } 1971} 1972 1973void 1974lp_build_tgsi_soa(struct gallivm_state *gallivm, 1975 const struct tgsi_token *tokens, 1976 struct lp_type type, 1977 struct lp_build_mask_context *mask, 1978 LLVMValueRef consts_ptr, 1979 LLVMValueRef system_values_array, 1980 const LLVMValueRef *pos, 1981 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], 1982 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], 1983 struct lp_build_sampler_soa *sampler, 1984 const struct tgsi_shader_info *info) 1985{ 1986 struct lp_build_tgsi_soa_context bld; 1987 1988 struct lp_type res_type; 1989 1990 assert(type.length <= LP_MAX_VECTOR_LENGTH); 1991 memset(&res_type, 0, sizeof res_type); 1992 res_type.width = type.width; 1993 res_type.length = type.length; 1994 res_type.sign = 1; 1995 1996 /* Setup build context */ 1997 memset(&bld, 0, sizeof bld); 1998 lp_build_context_init(&bld.bld_base.base, gallivm, type); 1999 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); 2000 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); 2001 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); 2002 bld.mask = mask; 2003 bld.pos = pos; 2004 bld.inputs = inputs; 2005 bld.outputs = outputs; 2006 bld.consts_ptr = consts_ptr; 2007 bld.sampler = sampler; 2008 bld.bld_base.info = info; 2009 bld.indirect_files = info->indirect_files; 2010 2011 bld.bld_base.soa = TRUE; 2012 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; 2013 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; 2014 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; 2015 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; 2016 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value; 2017 bld.bld_base.emit_store = emit_store; 2018 2019 bld.bld_base.emit_declaration = lp_emit_declaration_soa; 2020 bld.bld_base.emit_immediate = lp_emit_immediate_soa; 2021 2022 bld.bld_base.emit_prologue = emit_prologue; 2023 bld.bld_base.emit_epilogue = emit_epilogue; 2024 2025 /* Set opcode actions */ 2026 lp_set_default_actions_cpu(&bld.bld_base); 2027 2028 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; 2029 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit; 2030 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit; 2031 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit; 2032 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit; 2033 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit; 2034 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit; 2035 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit; 2036 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; 2037 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; 2038 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit; 2039 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit; 2040 bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit; 2041 bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit; 2042 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit; 2043 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit; 2044 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit; 2045 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit; 2046 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit; 2047 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit; 2048 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit; 2049 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit; 2050 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; 2051 2052 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base); 2053 2054 2055 bld.system_values_array = system_values_array; 2056 2057 lp_build_tgsi_llvm(&bld.bld_base, tokens); 2058 2059 if (0) { 2060 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 2061 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2062 debug_printf("11111111111111111111111111111 \n"); 2063 tgsi_dump(tokens, 0); 2064 lp_debug_dump_value(function); 2065 debug_printf("2222222222222222222222222222 \n"); 2066 } 2067 2068 if (0) { 2069 LLVMModuleRef module = LLVMGetGlobalParent( 2070 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 2071 LLVMDumpModule(module); 2072 2073 } 2074} 2075 2076 2077/** 2078 * Build up the system values array out of individual values such as 2079 * the instance ID, front-face, primitive ID, etc. The shader info is 2080 * used to determine which system values are needed and where to put 2081 * them in the system values array. 2082 * 2083 * XXX only instance ID is implemented at this time. 2084 * 2085 * The system values register file is similar to the constants buffer. 2086 * Example declaration: 2087 * DCL SV[0], INSTANCEID 2088 * Example instruction: 2089 * MOVE foo, SV[0].xxxx; 2090 * 2091 * \return LLVM float array (interpreted as float [][4]) 2092 */ 2093LLVMValueRef 2094lp_build_system_values_array(struct gallivm_state *gallivm, 2095 const struct tgsi_shader_info *info, 2096 LLVMValueRef instance_id, 2097 LLVMValueRef facing) 2098{ 2099 LLVMValueRef size = lp_build_const_int32(gallivm, 4 * info->num_system_values); 2100 LLVMTypeRef float_t = LLVMFloatTypeInContext(gallivm->context); 2101 LLVMValueRef array = lp_build_array_alloca(gallivm, float_t, 2102 size, "sysvals_array"); 2103 unsigned i; 2104 2105 for (i = 0; i < info->num_system_values; i++) { 2106 LLVMValueRef index = lp_build_const_int32(gallivm, i * 4); 2107 LLVMValueRef ptr, value = 0; 2108 2109 switch (info->system_value_semantic_name[i]) { 2110 case TGSI_SEMANTIC_INSTANCEID: 2111 /* convert instance ID from int to float */ 2112 value = LLVMBuildSIToFP(gallivm->builder, instance_id, float_t, 2113 "sysval_instanceid"); 2114 break; 2115 case TGSI_SEMANTIC_FACE: 2116 /* fall-through */ 2117 default: 2118 assert(0 && "unexpected semantic in build_system_values_array()"); 2119 } 2120 2121 ptr = LLVMBuildGEP(gallivm->builder, array, &index, 1, ""); 2122 LLVMBuildStore(gallivm->builder, value, ptr); 2123 } 2124 2125 return array; 2126} 2127