lp_bld_tgsi_soa.c revision 1d6f3543a063ab9e740fd0c149dcce26c282d773
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_scan.h" 49#include "lp_bld_type.h" 50#include "lp_bld_const.h" 51#include "lp_bld_arit.h" 52#include "lp_bld_bitarit.h" 53#include "lp_bld_gather.h" 54#include "lp_bld_logic.h" 55#include "lp_bld_swizzle.h" 56#include "lp_bld_flow.h" 57#include "lp_bld_quad.h" 58#include "lp_bld_tgsi.h" 59#include "lp_bld_limits.h" 60#include "lp_bld_debug.h" 61#include "lp_bld_printf.h" 62 63 64#define FOR_EACH_CHANNEL( CHAN )\ 65 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 66 67#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 68 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 69 70#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 71 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 72 73#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 74 FOR_EACH_CHANNEL( CHAN )\ 75 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 76 77#define CHAN_X 0 78#define CHAN_Y 1 79#define CHAN_Z 2 80#define CHAN_W 3 81#define NUM_CHANNELS 4 82 83#define LP_MAX_INSTRUCTIONS 256 84 85 86struct lp_exec_mask { 87 struct lp_build_context *bld; 88 89 boolean has_mask; 90 91 LLVMTypeRef int_vec_type; 92 93 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 94 int cond_stack_size; 95 LLVMValueRef cond_mask; 96 97 LLVMBasicBlockRef loop_block; 98 LLVMValueRef cont_mask; 99 LLVMValueRef break_mask; 100 LLVMValueRef break_var; 101 struct { 102 LLVMBasicBlockRef loop_block; 103 LLVMValueRef cont_mask; 104 LLVMValueRef break_mask; 105 LLVMValueRef break_var; 106 } loop_stack[LP_MAX_TGSI_NESTING]; 107 int loop_stack_size; 108 109 LLVMValueRef ret_mask; 110 struct { 111 int pc; 112 LLVMValueRef ret_mask; 113 } call_stack[LP_MAX_TGSI_NESTING]; 114 int call_stack_size; 115 116 LLVMValueRef exec_mask; 117}; 118 119struct lp_build_tgsi_soa_context 120{ 121 struct lp_build_context base; 122 123 /* Builder for vector integer masks and indices */ 124 struct lp_build_context uint_bld; 125 126 /* Builder for scalar elements of shader's data type (float) */ 127 struct lp_build_context elem_bld; 128 129 LLVMValueRef consts_ptr; 130 const LLVMValueRef *pos; 131 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 132 LLVMValueRef (*outputs)[NUM_CHANNELS]; 133 134 const struct lp_build_sampler_soa *sampler; 135 136 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 137 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 138 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 139 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; 140 141 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 142 * set in the indirect_files field. 143 * The temps[] array above is unused then. 144 */ 145 LLVMValueRef temps_array; 146 147 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is 148 * set in the indirect_files field. 149 * The outputs[] array above is unused then. 150 */ 151 LLVMValueRef outputs_array; 152 153 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is 154 * set in the indirect_files field. 155 * The inputs[] array above is unused then. 156 */ 157 LLVMValueRef inputs_array; 158 159 LLVMValueRef system_values_array; 160 161 const struct tgsi_shader_info *info; 162 /** bitmask indicating which register files are accessed indirectly */ 163 unsigned indirect_files; 164 165 struct lp_build_mask_context *mask; 166 struct lp_exec_mask exec_mask; 167 168 struct tgsi_full_instruction *instructions; 169 uint max_instructions; 170}; 171 172static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 173{ 174 mask->bld = bld; 175 mask->has_mask = FALSE; 176 mask->cond_stack_size = 0; 177 mask->loop_stack_size = 0; 178 mask->call_stack_size = 0; 179 180 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); 181 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 182 LLVMConstAllOnes(mask->int_vec_type); 183} 184 185static void lp_exec_mask_update(struct lp_exec_mask *mask) 186{ 187 if (mask->loop_stack_size) { 188 /*for loops we need to update the entire mask at runtime */ 189 LLVMValueRef tmp; 190 assert(mask->break_mask); 191 tmp = LLVMBuildAnd(mask->bld->builder, 192 mask->cont_mask, 193 mask->break_mask, 194 "maskcb"); 195 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 196 mask->cond_mask, 197 tmp, 198 "maskfull"); 199 } else 200 mask->exec_mask = mask->cond_mask; 201 202 if (mask->call_stack_size) { 203 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 204 mask->exec_mask, 205 mask->ret_mask, 206 "callmask"); 207 } 208 209 mask->has_mask = (mask->cond_stack_size > 0 || 210 mask->loop_stack_size > 0 || 211 mask->call_stack_size > 0); 212} 213 214static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 215 LLVMValueRef val) 216{ 217 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 218 if (mask->cond_stack_size == 0) { 219 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 220 } 221 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 222 assert(LLVMTypeOf(val) == mask->int_vec_type); 223 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 224 mask->cond_mask, 225 val, 226 ""); 227 lp_exec_mask_update(mask); 228} 229 230static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 231{ 232 LLVMValueRef prev_mask; 233 LLVMValueRef inv_mask; 234 235 assert(mask->cond_stack_size); 236 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 237 if (mask->cond_stack_size == 1) { 238 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 239 } 240 241 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, ""); 242 243 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 244 inv_mask, 245 prev_mask, ""); 246 lp_exec_mask_update(mask); 247} 248 249static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 250{ 251 assert(mask->cond_stack_size); 252 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 253 lp_exec_mask_update(mask); 254} 255 256static void lp_exec_bgnloop(struct lp_exec_mask *mask) 257{ 258 if (mask->loop_stack_size == 0) { 259 assert(mask->loop_block == NULL); 260 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 261 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 262 assert(mask->break_var == NULL); 263 } 264 265 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 266 267 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 268 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 269 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 270 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 271 ++mask->loop_stack_size; 272 273 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, ""); 274 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 275 276 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); 277 LLVMBuildBr(mask->bld->builder, mask->loop_block); 278 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); 279 280 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, ""); 281 282 lp_exec_mask_update(mask); 283} 284 285static void lp_exec_break(struct lp_exec_mask *mask) 286{ 287 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 288 mask->exec_mask, 289 "break"); 290 291 mask->break_mask = LLVMBuildAnd(mask->bld->builder, 292 mask->break_mask, 293 exec_mask, "break_full"); 294 295 lp_exec_mask_update(mask); 296} 297 298static void lp_exec_continue(struct lp_exec_mask *mask) 299{ 300 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 301 mask->exec_mask, 302 ""); 303 304 mask->cont_mask = LLVMBuildAnd(mask->bld->builder, 305 mask->cont_mask, 306 exec_mask, ""); 307 308 lp_exec_mask_update(mask); 309} 310 311 312static void lp_exec_endloop(struct lp_exec_mask *mask) 313{ 314 LLVMBasicBlockRef endloop; 315 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* 316 mask->bld->type.length); 317 LLVMValueRef i1cond; 318 319 assert(mask->break_mask); 320 321 /* 322 * Restore the cont_mask, but don't pop 323 */ 324 assert(mask->loop_stack_size); 325 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 326 lp_exec_mask_update(mask); 327 328 /* 329 * Unlike the continue mask, the break_mask must be preserved across loop 330 * iterations 331 */ 332 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 333 334 /* i1cond = (mask == 0) */ 335 i1cond = LLVMBuildICmp( 336 mask->bld->builder, 337 LLVMIntNE, 338 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""), 339 LLVMConstNull(reg_type), ""); 340 341 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); 342 343 LLVMBuildCondBr(mask->bld->builder, 344 i1cond, mask->loop_block, endloop); 345 346 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); 347 348 assert(mask->loop_stack_size); 349 --mask->loop_stack_size; 350 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 351 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 352 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 353 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 354 355 lp_exec_mask_update(mask); 356} 357 358/* stores val into an address pointed to by dst. 359 * mask->exec_mask is used to figure out which bits of val 360 * should be stored into the address 361 * (0 means don't store this bit, 1 means do store). 362 */ 363static void lp_exec_mask_store(struct lp_exec_mask *mask, 364 LLVMValueRef pred, 365 LLVMValueRef val, 366 LLVMValueRef dst) 367{ 368 /* Mix the predicate and execution mask */ 369 if (mask->has_mask) { 370 if (pred) { 371 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); 372 } else { 373 pred = mask->exec_mask; 374 } 375 } 376 377 if (pred) { 378 LLVMValueRef real_val, dst_val; 379 380 dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); 381 real_val = lp_build_select(mask->bld, 382 pred, 383 val, dst_val); 384 385 LLVMBuildStore(mask->bld->builder, real_val, dst); 386 } else 387 LLVMBuildStore(mask->bld->builder, val, dst); 388} 389 390static void lp_exec_mask_call(struct lp_exec_mask *mask, 391 int func, 392 int *pc) 393{ 394 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 395 mask->call_stack[mask->call_stack_size].pc = *pc; 396 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 397 mask->call_stack_size++; 398 *pc = func; 399} 400 401static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 402{ 403 LLVMValueRef exec_mask; 404 405 if (mask->call_stack_size == 0) { 406 /* returning from main() */ 407 *pc = -1; 408 return; 409 } 410 exec_mask = LLVMBuildNot(mask->bld->builder, 411 mask->exec_mask, 412 "ret"); 413 414 mask->ret_mask = LLVMBuildAnd(mask->bld->builder, 415 mask->ret_mask, 416 exec_mask, "ret_full"); 417 418 lp_exec_mask_update(mask); 419} 420 421static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 422{ 423} 424 425static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 426{ 427 assert(mask->call_stack_size); 428 mask->call_stack_size--; 429 *pc = mask->call_stack[mask->call_stack_size].pc; 430 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 431 lp_exec_mask_update(mask); 432} 433 434 435/** 436 * Return pointer to a temporary register channel (src or dest). 437 * Note that indirect addressing cannot be handled here. 438 * \param index which temporary register 439 * \param chan which channel of the temp register. 440 */ 441static LLVMValueRef 442get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 443 unsigned index, 444 unsigned chan) 445{ 446 assert(chan < 4); 447 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 448 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); 449 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); 450 } 451 else { 452 return bld->temps[index][chan]; 453 } 454} 455 456/** 457 * Return pointer to a output register channel (src or dest). 458 * Note that indirect addressing cannot be handled here. 459 * \param index which output register 460 * \param chan which channel of the output register. 461 */ 462static LLVMValueRef 463get_output_ptr(struct lp_build_tgsi_soa_context *bld, 464 unsigned index, 465 unsigned chan) 466{ 467 assert(chan < 4); 468 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 469 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); 470 return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, ""); 471 } 472 else { 473 return bld->outputs[index][chan]; 474 } 475} 476 477/** 478 * Gather vector. 479 * XXX the lp_build_gather() function should be capable of doing this 480 * with a little work. 481 */ 482static LLVMValueRef 483build_gather(struct lp_build_tgsi_soa_context *bld, 484 LLVMValueRef base_ptr, 485 LLVMValueRef indexes) 486{ 487 LLVMValueRef res = bld->base.undef; 488 unsigned i; 489 490 /* 491 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 492 */ 493 for (i = 0; i < bld->base.type.length; i++) { 494 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); 495 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder, 496 indexes, ii, ""); 497 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr, 498 &index, 1, "gather_ptr"); 499 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 500 501 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, ""); 502 } 503 504 return res; 505} 506 507 508/** 509 * Scatter/store vector. 510 */ 511static void 512emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 513 LLVMValueRef base_ptr, 514 LLVMValueRef indexes, 515 LLVMValueRef values, 516 struct lp_exec_mask *mask, 517 LLVMValueRef pred) 518{ 519 LLVMBuilderRef builder = bld->base.builder; 520 unsigned i; 521 522 /* Mix the predicate and execution mask */ 523 if (mask->has_mask) { 524 if (pred) { 525 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); 526 } 527 else { 528 pred = mask->exec_mask; 529 } 530 } 531 532 /* 533 * Loop over elements of index_vec, store scalar value. 534 */ 535 for (i = 0; i < bld->base.type.length; i++) { 536 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); 537 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 538 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 539 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 540 LLVMValueRef scalar_pred = pred ? 541 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 542 543 if (0) 544 lp_build_printf(builder, "scatter %d: val %f at %d %p\n", 545 ii, val, index, scalar_ptr); 546 547 if (scalar_pred) { 548 LLVMValueRef real_val, dst_val; 549 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 550 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 551 LLVMBuildStore(builder, real_val, scalar_ptr); 552 } 553 else { 554 LLVMBuildStore(builder, val, scalar_ptr); 555 } 556 } 557} 558 559 560/** 561 * Read the current value of the ADDR register, convert the floats to 562 * ints, add the base index and return the vector of offsets. 563 * The offsets will be used to index into the constant buffer or 564 * temporary register file. 565 */ 566static LLVMValueRef 567get_indirect_index(struct lp_build_tgsi_soa_context *bld, 568 unsigned reg_file, unsigned reg_index, 569 const struct tgsi_src_register *indirect_reg) 570{ 571 struct lp_build_context *uint_bld = &bld->uint_bld; 572 /* always use X component of address register */ 573 unsigned swizzle = indirect_reg->SwizzleX; 574 LLVMValueRef base; 575 LLVMValueRef rel; 576 LLVMValueRef max_index; 577 LLVMValueRef index; 578 579 assert(bld->indirect_files & (1 << reg_file)); 580 581 base = lp_build_const_int_vec(uint_bld->type, reg_index); 582 583 assert(swizzle < 4); 584 rel = LLVMBuildLoad(bld->base.builder, 585 bld->addr[indirect_reg->Index][swizzle], 586 "load addr reg"); 587 588 /* for indexing we want integers */ 589 rel = LLVMBuildFPToSI(bld->base.builder, 590 rel, 591 uint_bld->vec_type, ""); 592 593 index = lp_build_add(uint_bld, base, rel); 594 595 max_index = lp_build_const_int_vec(uint_bld->type, 596 bld->info->file_max[reg_file]); 597 598 assert(!uint_bld->type.sign); 599 index = lp_build_min(uint_bld, index, max_index); 600 601 return index; 602} 603 604 605/** 606 * Register fetch. 607 */ 608static LLVMValueRef 609emit_fetch( 610 struct lp_build_tgsi_soa_context *bld, 611 const struct tgsi_full_instruction *inst, 612 unsigned src_op, 613 const unsigned chan_index ) 614{ 615 struct lp_build_context *uint_bld = &bld->uint_bld; 616 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 617 const unsigned swizzle = 618 tgsi_util_get_full_src_register_swizzle(reg, chan_index); 619 LLVMValueRef res; 620 LLVMValueRef indirect_index = NULL; 621 622 if (swizzle > 3) { 623 assert(0 && "invalid swizzle in emit_fetch()"); 624 return bld->base.undef; 625 } 626 627 if (reg->Register.Indirect) { 628 indirect_index = get_indirect_index(bld, 629 reg->Register.File, 630 reg->Register.Index, 631 ®->Indirect); 632 } else { 633 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 634 } 635 636 switch (reg->Register.File) { 637 case TGSI_FILE_CONSTANT: 638 if (reg->Register.Indirect) { 639 LLVMValueRef swizzle_vec = 640 lp_build_const_int_vec(uint_bld->type, swizzle); 641 LLVMValueRef index_vec; /* index into the const buffer */ 642 643 /* index_vec = indirect_index * 4 + swizzle */ 644 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 645 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 646 647 /* Gather values from the constant buffer */ 648 res = build_gather(bld, bld->consts_ptr, index_vec); 649 } 650 else { 651 LLVMValueRef index; /* index into the const buffer */ 652 LLVMValueRef scalar, scalar_ptr; 653 654 index = lp_build_const_int32(reg->Register.Index*4 + swizzle); 655 656 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, 657 &index, 1, ""); 658 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 659 660 res = lp_build_broadcast_scalar(&bld->base, scalar); 661 } 662 break; 663 664 case TGSI_FILE_IMMEDIATE: 665 res = bld->immediates[reg->Register.Index][swizzle]; 666 assert(res); 667 break; 668 669 case TGSI_FILE_INPUT: 670 if (reg->Register.Indirect) { 671 LLVMValueRef swizzle_vec = 672 lp_build_const_int_vec(uint_bld->type, swizzle); 673 LLVMValueRef length_vec = 674 lp_build_const_int_vec(uint_bld->type, bld->base.type.length); 675 LLVMValueRef index_vec; /* index into the const buffer */ 676 LLVMValueRef inputs_array; 677 LLVMTypeRef float4_ptr_type; 678 679 /* index_vec = (indirect_index * 4 + swizzle) * length */ 680 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 681 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 682 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 683 684 /* cast inputs_array pointer to float* */ 685 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); 686 inputs_array = LLVMBuildBitCast(uint_bld->builder, bld->inputs_array, 687 float4_ptr_type, ""); 688 689 /* Gather values from the temporary register array */ 690 res = build_gather(bld, inputs_array, index_vec); 691 } else { 692 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 693 LLVMValueRef lindex = lp_build_const_int32(reg->Register.Index * 4 + swizzle); 694 LLVMValueRef input_ptr = LLVMBuildGEP(bld->base.builder, 695 bld->inputs_array, &lindex, 1, ""); 696 res = LLVMBuildLoad(bld->base.builder, input_ptr, ""); 697 } 698 else { 699 res = bld->inputs[reg->Register.Index][swizzle]; 700 } 701 } 702 assert(res); 703 break; 704 705 case TGSI_FILE_TEMPORARY: 706 if (reg->Register.Indirect) { 707 LLVMValueRef swizzle_vec = 708 lp_build_const_int_vec(uint_bld->type, swizzle); 709 LLVMValueRef length_vec = 710 lp_build_const_int_vec(uint_bld->type, bld->base.type.length); 711 LLVMValueRef index_vec; /* index into the const buffer */ 712 LLVMValueRef temps_array; 713 LLVMTypeRef float4_ptr_type; 714 715 /* index_vec = (indirect_index * 4 + swizzle) * length */ 716 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 717 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 718 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 719 720 /* cast temps_array pointer to float* */ 721 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); 722 temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array, 723 float4_ptr_type, ""); 724 725 /* Gather values from the temporary register array */ 726 res = build_gather(bld, temps_array, index_vec); 727 } 728 else { 729 LLVMValueRef temp_ptr; 730 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); 731 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 732 if (!res) 733 return bld->base.undef; 734 } 735 break; 736 737 case TGSI_FILE_SYSTEM_VALUE: 738 assert(!reg->Register.Indirect); 739 { 740 LLVMValueRef index; /* index into the system value array */ 741 LLVMValueRef scalar, scalar_ptr; 742 743 index = lp_build_const_int32(reg->Register.Index * 4 + swizzle); 744 745 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->system_values_array, 746 &index, 1, ""); 747 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 748 749 res = lp_build_broadcast_scalar(&bld->base, scalar); 750 } 751 break; 752 753 default: 754 assert(0 && "invalid src register in emit_fetch()"); 755 return bld->base.undef; 756 } 757 758 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 759 case TGSI_UTIL_SIGN_CLEAR: 760 res = lp_build_abs( &bld->base, res ); 761 break; 762 763 case TGSI_UTIL_SIGN_SET: 764 res = lp_build_abs( &bld->base, res ); 765 /* fall through */ 766 case TGSI_UTIL_SIGN_TOGGLE: 767 res = lp_build_negate( &bld->base, res ); 768 break; 769 770 case TGSI_UTIL_SIGN_KEEP: 771 break; 772 } 773 774 return res; 775} 776 777 778/** 779 * Register fetch with derivatives. 780 */ 781static void 782emit_fetch_deriv( 783 struct lp_build_tgsi_soa_context *bld, 784 const struct tgsi_full_instruction *inst, 785 unsigned index, 786 const unsigned chan_index, 787 LLVMValueRef *res, 788 LLVMValueRef *ddx, 789 LLVMValueRef *ddy) 790{ 791 LLVMValueRef src; 792 793 src = emit_fetch(bld, inst, index, chan_index); 794 795 if(res) 796 *res = src; 797 798 /* TODO: use interpolation coeffs for inputs */ 799 800 if(ddx) 801 *ddx = lp_build_ddx(&bld->base, src); 802 803 if(ddy) 804 *ddy = lp_build_ddy(&bld->base, src); 805} 806 807 808/** 809 * Predicate. 810 */ 811static void 812emit_fetch_predicate( 813 struct lp_build_tgsi_soa_context *bld, 814 const struct tgsi_full_instruction *inst, 815 LLVMValueRef *pred) 816{ 817 unsigned index; 818 unsigned char swizzles[4]; 819 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 820 LLVMValueRef value; 821 unsigned chan; 822 823 if (!inst->Instruction.Predicate) { 824 FOR_EACH_CHANNEL( chan ) { 825 pred[chan] = NULL; 826 } 827 return; 828 } 829 830 swizzles[0] = inst->Predicate.SwizzleX; 831 swizzles[1] = inst->Predicate.SwizzleY; 832 swizzles[2] = inst->Predicate.SwizzleZ; 833 swizzles[3] = inst->Predicate.SwizzleW; 834 835 index = inst->Predicate.Index; 836 assert(index < LP_MAX_TGSI_PREDS); 837 838 FOR_EACH_CHANNEL( chan ) { 839 unsigned swizzle = swizzles[chan]; 840 841 /* 842 * Only fetch the predicate register channels that are actually listed 843 * in the swizzles 844 */ 845 if (!unswizzled[swizzle]) { 846 value = LLVMBuildLoad(bld->base.builder, 847 bld->preds[index][swizzle], ""); 848 849 /* 850 * Convert the value to an integer mask. 851 * 852 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 853 * is needlessly causing two comparisons due to storing the intermediate 854 * result as float vector instead of an integer mask vector. 855 */ 856 value = lp_build_compare(bld->base.builder, 857 bld->base.type, 858 PIPE_FUNC_NOTEQUAL, 859 value, 860 bld->base.zero); 861 if (inst->Predicate.Negate) { 862 value = LLVMBuildNot(bld->base.builder, value, ""); 863 } 864 865 unswizzled[swizzle] = value; 866 } else { 867 value = unswizzled[swizzle]; 868 } 869 870 pred[chan] = value; 871 } 872} 873 874 875/** 876 * Register store. 877 */ 878static void 879emit_store( 880 struct lp_build_tgsi_soa_context *bld, 881 const struct tgsi_full_instruction *inst, 882 unsigned index, 883 unsigned chan_index, 884 LLVMValueRef pred, 885 LLVMValueRef value) 886{ 887 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 888 struct lp_build_context *uint_bld = &bld->uint_bld; 889 LLVMValueRef indirect_index = NULL; 890 891 switch( inst->Instruction.Saturate ) { 892 case TGSI_SAT_NONE: 893 break; 894 895 case TGSI_SAT_ZERO_ONE: 896 value = lp_build_max(&bld->base, value, bld->base.zero); 897 value = lp_build_min(&bld->base, value, bld->base.one); 898 break; 899 900 case TGSI_SAT_MINUS_PLUS_ONE: 901 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); 902 value = lp_build_min(&bld->base, value, bld->base.one); 903 break; 904 905 default: 906 assert(0); 907 } 908 909 if (reg->Register.Indirect) { 910 indirect_index = get_indirect_index(bld, 911 reg->Register.File, 912 reg->Register.Index, 913 ®->Indirect); 914 } else { 915 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 916 } 917 918 switch( reg->Register.File ) { 919 case TGSI_FILE_OUTPUT: 920 if (reg->Register.Indirect) { 921 LLVMBuilderRef builder = bld->base.builder; 922 LLVMValueRef chan_vec = 923 lp_build_const_int_vec(uint_bld->type, chan_index); 924 LLVMValueRef length_vec = 925 lp_build_const_int_vec(uint_bld->type, bld->base.type.length); 926 LLVMValueRef index_vec; /* indexes into the temp registers */ 927 LLVMValueRef outputs_array; 928 LLVMValueRef pixel_offsets; 929 LLVMTypeRef float_ptr_type; 930 int i; 931 932 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 933 pixel_offsets = uint_bld->undef; 934 for (i = 0; i < bld->base.type.length; i++) { 935 LLVMValueRef ii = lp_build_const_int32(i); 936 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 937 ii, ii, ""); 938 } 939 940 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 941 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 942 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 943 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 944 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 945 946 float_ptr_type = LLVMPointerType(LLVMFloatType(), 0); 947 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, 948 float_ptr_type, ""); 949 950 /* Scatter store values into temp registers */ 951 emit_mask_scatter(bld, outputs_array, index_vec, value, 952 &bld->exec_mask, pred); 953 } 954 else { 955 LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index, 956 chan_index); 957 lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr); 958 } 959 break; 960 961 case TGSI_FILE_TEMPORARY: 962 if (reg->Register.Indirect) { 963 LLVMBuilderRef builder = bld->base.builder; 964 LLVMValueRef chan_vec = 965 lp_build_const_int_vec(uint_bld->type, chan_index); 966 LLVMValueRef length_vec = 967 lp_build_const_int_vec(uint_bld->type, bld->base.type.length); 968 LLVMValueRef index_vec; /* indexes into the temp registers */ 969 LLVMValueRef temps_array; 970 LLVMValueRef pixel_offsets; 971 LLVMTypeRef float_ptr_type; 972 int i; 973 974 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 975 pixel_offsets = uint_bld->undef; 976 for (i = 0; i < bld->base.type.length; i++) { 977 LLVMValueRef ii = lp_build_const_int32(i); 978 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 979 ii, ii, ""); 980 } 981 982 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 983 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 984 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 985 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 986 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 987 988 float_ptr_type = LLVMPointerType(LLVMFloatType(), 0); 989 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 990 float_ptr_type, ""); 991 992 /* Scatter store values into temp registers */ 993 emit_mask_scatter(bld, temps_array, index_vec, value, 994 &bld->exec_mask, pred); 995 } 996 else { 997 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 998 chan_index); 999 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); 1000 } 1001 break; 1002 1003 case TGSI_FILE_ADDRESS: 1004 lp_exec_mask_store(&bld->exec_mask, pred, value, 1005 bld->addr[reg->Indirect.Index][chan_index]); 1006 break; 1007 1008 case TGSI_FILE_PREDICATE: 1009 lp_exec_mask_store(&bld->exec_mask, pred, value, 1010 bld->preds[reg->Register.Index][chan_index]); 1011 break; 1012 1013 default: 1014 assert( 0 ); 1015 } 1016} 1017 1018 1019/** 1020 * High-level instruction translators. 1021 */ 1022 1023static void 1024emit_tex( struct lp_build_tgsi_soa_context *bld, 1025 const struct tgsi_full_instruction *inst, 1026 enum lp_build_tex_modifier modifier, 1027 LLVMValueRef *texel) 1028{ 1029 unsigned unit; 1030 LLVMValueRef lod_bias, explicit_lod; 1031 LLVMValueRef oow = NULL; 1032 LLVMValueRef coords[3]; 1033 LLVMValueRef ddx[3]; 1034 LLVMValueRef ddy[3]; 1035 unsigned num_coords; 1036 unsigned i; 1037 1038 if (!bld->sampler) { 1039 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 1040 for (i = 0; i < 4; i++) { 1041 texel[i] = bld->base.undef; 1042 } 1043 return; 1044 } 1045 1046 switch (inst->Texture.Texture) { 1047 case TGSI_TEXTURE_1D: 1048 num_coords = 1; 1049 break; 1050 case TGSI_TEXTURE_2D: 1051 case TGSI_TEXTURE_RECT: 1052 num_coords = 2; 1053 break; 1054 case TGSI_TEXTURE_SHADOW1D: 1055 case TGSI_TEXTURE_SHADOW2D: 1056 case TGSI_TEXTURE_SHADOWRECT: 1057 case TGSI_TEXTURE_3D: 1058 case TGSI_TEXTURE_CUBE: 1059 num_coords = 3; 1060 break; 1061 default: 1062 assert(0); 1063 return; 1064 } 1065 1066 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 1067 lod_bias = emit_fetch( bld, inst, 0, 3 ); 1068 explicit_lod = NULL; 1069 } 1070 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 1071 lod_bias = NULL; 1072 explicit_lod = emit_fetch( bld, inst, 0, 3 ); 1073 } 1074 else { 1075 lod_bias = NULL; 1076 explicit_lod = NULL; 1077 } 1078 1079 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 1080 oow = emit_fetch( bld, inst, 0, 3 ); 1081 oow = lp_build_rcp(&bld->base, oow); 1082 } 1083 1084 for (i = 0; i < num_coords; i++) { 1085 coords[i] = emit_fetch( bld, inst, 0, i ); 1086 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 1087 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 1088 } 1089 for (i = num_coords; i < 3; i++) { 1090 coords[i] = bld->base.undef; 1091 } 1092 1093 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 1094 LLVMTypeRef i32t = LLVMInt32Type(); 1095 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); 1096 for (i = 0; i < num_coords; i++) { 1097 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); 1098 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); 1099 ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, ""); 1100 ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, ""); 1101 } 1102 unit = inst->Src[3].Register.Index; 1103 } else { 1104 for (i = 0; i < num_coords; i++) { 1105 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); 1106 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); 1107 } 1108 unit = inst->Src[1].Register.Index; 1109 } 1110 for (i = num_coords; i < 3; i++) { 1111 ddx[i] = LLVMGetUndef(bld->base.elem_type); 1112 ddy[i] = LLVMGetUndef(bld->base.elem_type); 1113 } 1114 1115 bld->sampler->emit_fetch_texel(bld->sampler, 1116 bld->base.builder, 1117 bld->base.type, 1118 unit, num_coords, coords, 1119 ddx, ddy, 1120 lod_bias, explicit_lod, 1121 texel); 1122} 1123 1124static boolean 1125near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 1126 int pc) 1127{ 1128 int i; 1129 1130 for (i = 0; i < 5; i++) { 1131 unsigned opcode; 1132 1133 if (pc + i >= bld->info->num_instructions) 1134 return TRUE; 1135 1136 opcode = bld->instructions[pc + i].Instruction.Opcode; 1137 1138 if (opcode == TGSI_OPCODE_END) 1139 return TRUE; 1140 1141 if (opcode == TGSI_OPCODE_TEX || 1142 opcode == TGSI_OPCODE_TXP || 1143 opcode == TGSI_OPCODE_TXD || 1144 opcode == TGSI_OPCODE_TXB || 1145 opcode == TGSI_OPCODE_TXL || 1146 opcode == TGSI_OPCODE_TXF || 1147 opcode == TGSI_OPCODE_TXQ || 1148 opcode == TGSI_OPCODE_CAL || 1149 opcode == TGSI_OPCODE_CALLNZ || 1150 opcode == TGSI_OPCODE_IF || 1151 opcode == TGSI_OPCODE_IFC || 1152 opcode == TGSI_OPCODE_BGNLOOP || 1153 opcode == TGSI_OPCODE_SWITCH) 1154 return FALSE; 1155 } 1156 1157 return TRUE; 1158} 1159 1160 1161 1162/** 1163 * Kill fragment if any of the src register values are negative. 1164 */ 1165static void 1166emit_kil( 1167 struct lp_build_tgsi_soa_context *bld, 1168 const struct tgsi_full_instruction *inst, 1169 int pc) 1170{ 1171 const struct tgsi_full_src_register *reg = &inst->Src[0]; 1172 LLVMValueRef terms[NUM_CHANNELS]; 1173 LLVMValueRef mask; 1174 unsigned chan_index; 1175 1176 memset(&terms, 0, sizeof terms); 1177 1178 FOR_EACH_CHANNEL( chan_index ) { 1179 unsigned swizzle; 1180 1181 /* Unswizzle channel */ 1182 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1183 1184 /* Check if the component has not been already tested. */ 1185 assert(swizzle < NUM_CHANNELS); 1186 if( !terms[swizzle] ) 1187 /* TODO: change the comparison operator instead of setting the sign */ 1188 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 1189 } 1190 1191 mask = NULL; 1192 FOR_EACH_CHANNEL( chan_index ) { 1193 if(terms[chan_index]) { 1194 LLVMValueRef chan_mask; 1195 1196 /* 1197 * If term < 0 then mask = 0 else mask = ~0. 1198 */ 1199 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 1200 1201 if(mask) 1202 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); 1203 else 1204 mask = chan_mask; 1205 } 1206 } 1207 1208 if(mask) { 1209 lp_build_mask_update(bld->mask, mask); 1210 1211 if (!near_end_of_shader(bld, pc)) 1212 lp_build_mask_check(bld->mask); 1213 } 1214} 1215 1216 1217/** 1218 * Predicated fragment kill. 1219 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 1220 * The only predication is the execution mask which will apply if 1221 * we're inside a loop or conditional. 1222 */ 1223static void 1224emit_kilp(struct lp_build_tgsi_soa_context *bld, 1225 const struct tgsi_full_instruction *inst, 1226 int pc) 1227{ 1228 LLVMValueRef mask; 1229 1230 /* For those channels which are "alive", disable fragment shader 1231 * execution. 1232 */ 1233 if (bld->exec_mask.has_mask) { 1234 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); 1235 } 1236 else { 1237 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type); 1238 mask = zero; 1239 } 1240 1241 lp_build_mask_update(bld->mask, mask); 1242 1243 if (!near_end_of_shader(bld, pc)) 1244 lp_build_mask_check(bld->mask); 1245} 1246 1247 1248/** 1249 * Emit code which will dump the value of all the temporary registers 1250 * to stdout. 1251 */ 1252static void 1253emit_dump_temps(struct lp_build_tgsi_soa_context *bld) 1254{ 1255 LLVMBuilderRef builder = bld->base.builder; 1256 LLVMValueRef temp_ptr; 1257 LLVMValueRef i0 = lp_build_const_int32(0); 1258 LLVMValueRef i1 = lp_build_const_int32(1); 1259 LLVMValueRef i2 = lp_build_const_int32(2); 1260 LLVMValueRef i3 = lp_build_const_int32(3); 1261 int index; 1262 int n = bld->info->file_max[TGSI_FILE_TEMPORARY]; 1263 1264 for (index = 0; index < n; index++) { 1265 LLVMValueRef idx = lp_build_const_int32(index); 1266 LLVMValueRef v[4][4], res; 1267 int chan; 1268 1269 lp_build_printf(builder, "TEMP[%d]:\n", idx); 1270 1271 for (chan = 0; chan < 4; chan++) { 1272 temp_ptr = get_temp_ptr(bld, index, chan); 1273 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 1274 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); 1275 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); 1276 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); 1277 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); 1278 } 1279 1280 lp_build_printf(builder, " X: %f %f %f %f\n", 1281 v[0][0], v[0][1], v[0][2], v[0][3]); 1282 lp_build_printf(builder, " Y: %f %f %f %f\n", 1283 v[1][0], v[1][1], v[1][2], v[1][3]); 1284 lp_build_printf(builder, " Z: %f %f %f %f\n", 1285 v[2][0], v[2][1], v[2][2], v[2][3]); 1286 lp_build_printf(builder, " W: %f %f %f %f\n", 1287 v[3][0], v[3][1], v[3][2], v[3][3]); 1288 } 1289} 1290 1291 1292 1293static void 1294emit_declaration( 1295 struct lp_build_tgsi_soa_context *bld, 1296 const struct tgsi_full_declaration *decl) 1297{ 1298 LLVMTypeRef vec_type = bld->base.vec_type; 1299 const unsigned first = decl->Range.First; 1300 const unsigned last = decl->Range.Last; 1301 unsigned idx, i; 1302 1303 for (idx = first; idx <= last; ++idx) { 1304 assert(last <= bld->info->file_max[decl->Declaration.File]); 1305 switch (decl->Declaration.File) { 1306 case TGSI_FILE_TEMPORARY: 1307 assert(idx < LP_MAX_TGSI_TEMPS); 1308 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 1309 for (i = 0; i < NUM_CHANNELS; i++) 1310 bld->temps[idx][i] = lp_build_alloca(bld->base.builder, 1311 vec_type, "temp"); 1312 } 1313 break; 1314 1315 case TGSI_FILE_OUTPUT: 1316 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 1317 for (i = 0; i < NUM_CHANNELS; i++) 1318 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, 1319 vec_type, "output"); 1320 } 1321 break; 1322 1323 case TGSI_FILE_ADDRESS: 1324 assert(idx < LP_MAX_TGSI_ADDRS); 1325 for (i = 0; i < NUM_CHANNELS; i++) 1326 bld->addr[idx][i] = lp_build_alloca(bld->base.builder, 1327 vec_type, "addr"); 1328 break; 1329 1330 case TGSI_FILE_PREDICATE: 1331 assert(idx < LP_MAX_TGSI_PREDS); 1332 for (i = 0; i < NUM_CHANNELS; i++) 1333 bld->preds[idx][i] = lp_build_alloca(bld->base.builder, 1334 vec_type, "predicate"); 1335 break; 1336 1337 default: 1338 /* don't need to declare other vars */ 1339 break; 1340 } 1341 } 1342} 1343 1344 1345/** 1346 * Emit LLVM for one TGSI instruction. 1347 * \param return TRUE for success, FALSE otherwise 1348 */ 1349static boolean 1350emit_instruction( 1351 struct lp_build_tgsi_soa_context *bld, 1352 const struct tgsi_full_instruction *inst, 1353 const struct tgsi_opcode_info *info, 1354 int *pc) 1355{ 1356 unsigned chan_index; 1357 LLVMValueRef src0, src1, src2; 1358 LLVMValueRef tmp0, tmp1, tmp2; 1359 LLVMValueRef tmp3 = NULL; 1360 LLVMValueRef tmp4 = NULL; 1361 LLVMValueRef tmp5 = NULL; 1362 LLVMValueRef tmp6 = NULL; 1363 LLVMValueRef tmp7 = NULL; 1364 LLVMValueRef res; 1365 LLVMValueRef dst0[NUM_CHANNELS]; 1366 1367 /* 1368 * Stores and write masks are handled in a general fashion after the long 1369 * instruction opcode switch statement. 1370 * 1371 * Although not stricitly necessary, we avoid generating instructions for 1372 * channels which won't be stored, in cases where's that easy. For some 1373 * complex instructions, like texture sampling, it is more convenient to 1374 * assume a full writemask and then let LLVM optimization passes eliminate 1375 * redundant code. 1376 */ 1377 1378 (*pc)++; 1379 1380 assert(info->num_dst <= 1); 1381 if (info->num_dst) { 1382 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1383 dst0[chan_index] = bld->base.undef; 1384 } 1385 } 1386 1387 switch (inst->Instruction.Opcode) { 1388 case TGSI_OPCODE_ARL: 1389 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1390 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1391 tmp0 = lp_build_floor(&bld->base, tmp0); 1392 dst0[chan_index] = tmp0; 1393 } 1394 break; 1395 1396 case TGSI_OPCODE_MOV: 1397 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1398 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 1399 } 1400 break; 1401 1402 case TGSI_OPCODE_LIT: 1403 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 1404 dst0[CHAN_X] = bld->base.one; 1405 } 1406 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1407 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1408 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 1409 } 1410 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1411 /* XMM[1] = SrcReg[0].yyyy */ 1412 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1413 /* XMM[1] = max(XMM[1], 0) */ 1414 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 1415 /* XMM[2] = SrcReg[0].wwww */ 1416 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 1417 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 1418 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1419 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 1420 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 1421 } 1422 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 1423 dst0[CHAN_W] = bld->base.one; 1424 } 1425 break; 1426 1427 case TGSI_OPCODE_RCP: 1428 /* TGSI_OPCODE_RECIP */ 1429 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1430 res = lp_build_rcp(&bld->base, src0); 1431 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1432 dst0[chan_index] = res; 1433 } 1434 break; 1435 1436 case TGSI_OPCODE_RSQ: 1437 /* TGSI_OPCODE_RECIPSQRT */ 1438 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1439 src0 = lp_build_abs(&bld->base, src0); 1440 res = lp_build_rsqrt(&bld->base, src0); 1441 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1442 dst0[chan_index] = res; 1443 } 1444 break; 1445 1446 case TGSI_OPCODE_EXP: 1447 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1448 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1449 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1450 LLVMValueRef *p_exp2_int_part = NULL; 1451 LLVMValueRef *p_frac_part = NULL; 1452 LLVMValueRef *p_exp2 = NULL; 1453 1454 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1455 1456 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1457 p_exp2_int_part = &tmp0; 1458 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1459 p_frac_part = &tmp1; 1460 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1461 p_exp2 = &tmp2; 1462 1463 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 1464 1465 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1466 dst0[CHAN_X] = tmp0; 1467 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1468 dst0[CHAN_Y] = tmp1; 1469 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1470 dst0[CHAN_Z] = tmp2; 1471 } 1472 /* dst.w = 1.0 */ 1473 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1474 dst0[CHAN_W] = bld->base.one; 1475 } 1476 break; 1477 1478 case TGSI_OPCODE_LOG: 1479 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1480 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1481 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1482 LLVMValueRef *p_floor_log2 = NULL; 1483 LLVMValueRef *p_exp = NULL; 1484 LLVMValueRef *p_log2 = NULL; 1485 1486 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1487 src0 = lp_build_abs( &bld->base, src0 ); 1488 1489 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1490 p_floor_log2 = &tmp0; 1491 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1492 p_exp = &tmp1; 1493 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1494 p_log2 = &tmp2; 1495 1496 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 1497 1498 /* dst.x = floor(lg2(abs(src.x))) */ 1499 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1500 dst0[CHAN_X] = tmp0; 1501 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 1502 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 1503 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 1504 } 1505 /* dst.z = lg2(abs(src.x)) */ 1506 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1507 dst0[CHAN_Z] = tmp2; 1508 } 1509 /* dst.w = 1.0 */ 1510 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1511 dst0[CHAN_W] = bld->base.one; 1512 } 1513 break; 1514 1515 case TGSI_OPCODE_MUL: 1516 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1517 src0 = emit_fetch( bld, inst, 0, chan_index ); 1518 src1 = emit_fetch( bld, inst, 1, chan_index ); 1519 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 1520 } 1521 break; 1522 1523 case TGSI_OPCODE_ADD: 1524 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1525 src0 = emit_fetch( bld, inst, 0, chan_index ); 1526 src1 = emit_fetch( bld, inst, 1, chan_index ); 1527 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 1528 } 1529 break; 1530 1531 case TGSI_OPCODE_DP3: 1532 /* TGSI_OPCODE_DOT3 */ 1533 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1534 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1535 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1536 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1537 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1538 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1539 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1540 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1541 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1542 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1543 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1544 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1545 dst0[chan_index] = tmp0; 1546 } 1547 break; 1548 1549 case TGSI_OPCODE_DP4: 1550 /* TGSI_OPCODE_DOT4 */ 1551 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1552 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1553 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1554 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1555 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1556 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1557 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1558 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1559 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1560 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1561 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1562 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1563 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1564 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1565 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1566 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1567 dst0[chan_index] = tmp0; 1568 } 1569 break; 1570 1571 case TGSI_OPCODE_DST: 1572 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1573 dst0[CHAN_X] = bld->base.one; 1574 } 1575 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1576 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1577 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1578 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1579 } 1580 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1581 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1582 } 1583 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1584 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1585 } 1586 break; 1587 1588 case TGSI_OPCODE_MIN: 1589 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1590 src0 = emit_fetch( bld, inst, 0, chan_index ); 1591 src1 = emit_fetch( bld, inst, 1, chan_index ); 1592 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1593 } 1594 break; 1595 1596 case TGSI_OPCODE_MAX: 1597 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1598 src0 = emit_fetch( bld, inst, 0, chan_index ); 1599 src1 = emit_fetch( bld, inst, 1, chan_index ); 1600 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1601 } 1602 break; 1603 1604 case TGSI_OPCODE_SLT: 1605 /* TGSI_OPCODE_SETLT */ 1606 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1607 src0 = emit_fetch( bld, inst, 0, chan_index ); 1608 src1 = emit_fetch( bld, inst, 1, chan_index ); 1609 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1610 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1611 } 1612 break; 1613 1614 case TGSI_OPCODE_SGE: 1615 /* TGSI_OPCODE_SETGE */ 1616 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1617 src0 = emit_fetch( bld, inst, 0, chan_index ); 1618 src1 = emit_fetch( bld, inst, 1, chan_index ); 1619 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1620 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1621 } 1622 break; 1623 1624 case TGSI_OPCODE_MAD: 1625 /* TGSI_OPCODE_MADD */ 1626 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1627 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1628 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1629 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1630 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1631 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1632 dst0[chan_index] = tmp0; 1633 } 1634 break; 1635 1636 case TGSI_OPCODE_SUB: 1637 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1638 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1639 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1640 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1641 } 1642 break; 1643 1644 case TGSI_OPCODE_LRP: 1645 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1646 src0 = emit_fetch( bld, inst, 0, chan_index ); 1647 src1 = emit_fetch( bld, inst, 1, chan_index ); 1648 src2 = emit_fetch( bld, inst, 2, chan_index ); 1649 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1650 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1651 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1652 } 1653 break; 1654 1655 case TGSI_OPCODE_CND: 1656 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1657 src0 = emit_fetch( bld, inst, 0, chan_index ); 1658 src1 = emit_fetch( bld, inst, 1, chan_index ); 1659 src2 = emit_fetch( bld, inst, 2, chan_index ); 1660 tmp1 = lp_build_const_vec(bld->base.type, 0.5); 1661 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1662 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1663 } 1664 break; 1665 1666 case TGSI_OPCODE_DP2A: 1667 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1668 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1669 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1670 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1671 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1672 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1673 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1674 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1675 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1676 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1677 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1678 } 1679 break; 1680 1681 case TGSI_OPCODE_FRC: 1682 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1683 src0 = emit_fetch( bld, inst, 0, chan_index ); 1684 tmp0 = lp_build_floor(&bld->base, src0); 1685 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1686 dst0[chan_index] = tmp0; 1687 } 1688 break; 1689 1690 case TGSI_OPCODE_CLAMP: 1691 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1692 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1693 src1 = emit_fetch( bld, inst, 1, chan_index ); 1694 src2 = emit_fetch( bld, inst, 2, chan_index ); 1695 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1696 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1697 dst0[chan_index] = tmp0; 1698 } 1699 break; 1700 1701 case TGSI_OPCODE_FLR: 1702 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1703 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1704 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1705 } 1706 break; 1707 1708 case TGSI_OPCODE_ROUND: 1709 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1710 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1711 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1712 } 1713 break; 1714 1715 case TGSI_OPCODE_EX2: { 1716 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1717 tmp0 = lp_build_exp2( &bld->base, tmp0); 1718 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1719 dst0[chan_index] = tmp0; 1720 } 1721 break; 1722 } 1723 1724 case TGSI_OPCODE_LG2: 1725 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1726 tmp0 = lp_build_log2( &bld->base, tmp0); 1727 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1728 dst0[chan_index] = tmp0; 1729 } 1730 break; 1731 1732 case TGSI_OPCODE_POW: 1733 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1734 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1735 res = lp_build_pow( &bld->base, src0, src1 ); 1736 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1737 dst0[chan_index] = res; 1738 } 1739 break; 1740 1741 case TGSI_OPCODE_XPD: 1742 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1743 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1744 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1745 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1746 } 1747 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1748 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1749 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1750 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1751 } 1752 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1753 tmp2 = tmp0; 1754 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1755 tmp5 = tmp3; 1756 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1757 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1758 dst0[CHAN_X] = tmp2; 1759 } 1760 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1761 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1762 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1763 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1764 } 1765 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1766 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1767 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1768 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1769 dst0[CHAN_Y] = tmp3; 1770 } 1771 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1772 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1773 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1774 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1775 dst0[CHAN_Z] = tmp5; 1776 } 1777 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1778 dst0[CHAN_W] = bld->base.one; 1779 } 1780 break; 1781 1782 case TGSI_OPCODE_ABS: 1783 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1784 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1785 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1786 } 1787 break; 1788 1789 case TGSI_OPCODE_RCC: 1790 /* deprecated? */ 1791 assert(0); 1792 return FALSE; 1793 1794 case TGSI_OPCODE_DPH: 1795 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1796 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1797 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1798 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1799 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1800 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1801 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1802 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1803 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1804 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1805 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1806 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1807 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1808 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1809 dst0[chan_index] = tmp0; 1810 } 1811 break; 1812 1813 case TGSI_OPCODE_COS: 1814 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1815 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1816 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1817 dst0[chan_index] = tmp0; 1818 } 1819 break; 1820 1821 case TGSI_OPCODE_DDX: 1822 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1823 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1824 } 1825 break; 1826 1827 case TGSI_OPCODE_DDY: 1828 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1829 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1830 } 1831 break; 1832 1833 case TGSI_OPCODE_KILP: 1834 /* predicated kill */ 1835 emit_kilp( bld, inst, (*pc)-1 ); 1836 break; 1837 1838 case TGSI_OPCODE_KIL: 1839 /* conditional kill */ 1840 emit_kil( bld, inst, (*pc)-1 ); 1841 break; 1842 1843 case TGSI_OPCODE_PK2H: 1844 return FALSE; 1845 break; 1846 1847 case TGSI_OPCODE_PK2US: 1848 return FALSE; 1849 break; 1850 1851 case TGSI_OPCODE_PK4B: 1852 return FALSE; 1853 break; 1854 1855 case TGSI_OPCODE_PK4UB: 1856 return FALSE; 1857 break; 1858 1859 case TGSI_OPCODE_RFL: 1860 return FALSE; 1861 break; 1862 1863 case TGSI_OPCODE_SEQ: 1864 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1865 src0 = emit_fetch( bld, inst, 0, chan_index ); 1866 src1 = emit_fetch( bld, inst, 1, chan_index ); 1867 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1868 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1869 } 1870 break; 1871 1872 case TGSI_OPCODE_SFL: 1873 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1874 dst0[chan_index] = bld->base.zero; 1875 } 1876 break; 1877 1878 case TGSI_OPCODE_SGT: 1879 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1880 src0 = emit_fetch( bld, inst, 0, chan_index ); 1881 src1 = emit_fetch( bld, inst, 1, chan_index ); 1882 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1883 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1884 } 1885 break; 1886 1887 case TGSI_OPCODE_SIN: 1888 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1889 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1890 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1891 dst0[chan_index] = tmp0; 1892 } 1893 break; 1894 1895 case TGSI_OPCODE_SLE: 1896 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1897 src0 = emit_fetch( bld, inst, 0, chan_index ); 1898 src1 = emit_fetch( bld, inst, 1, chan_index ); 1899 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1900 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1901 } 1902 break; 1903 1904 case TGSI_OPCODE_SNE: 1905 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1906 src0 = emit_fetch( bld, inst, 0, chan_index ); 1907 src1 = emit_fetch( bld, inst, 1, chan_index ); 1908 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1909 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1910 } 1911 break; 1912 1913 case TGSI_OPCODE_STR: 1914 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1915 dst0[chan_index] = bld->base.one; 1916 } 1917 break; 1918 1919 case TGSI_OPCODE_TEX: 1920 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); 1921 break; 1922 1923 case TGSI_OPCODE_TXD: 1924 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1925 break; 1926 1927 case TGSI_OPCODE_UP2H: 1928 /* deprecated */ 1929 assert (0); 1930 return FALSE; 1931 break; 1932 1933 case TGSI_OPCODE_UP2US: 1934 /* deprecated */ 1935 assert(0); 1936 return FALSE; 1937 break; 1938 1939 case TGSI_OPCODE_UP4B: 1940 /* deprecated */ 1941 assert(0); 1942 return FALSE; 1943 break; 1944 1945 case TGSI_OPCODE_UP4UB: 1946 /* deprecated */ 1947 assert(0); 1948 return FALSE; 1949 break; 1950 1951 case TGSI_OPCODE_X2D: 1952 /* deprecated? */ 1953 assert(0); 1954 return FALSE; 1955 break; 1956 1957 case TGSI_OPCODE_ARA: 1958 /* deprecated */ 1959 assert(0); 1960 return FALSE; 1961 break; 1962 1963 case TGSI_OPCODE_ARR: 1964 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1965 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1966 tmp0 = lp_build_round(&bld->base, tmp0); 1967 dst0[chan_index] = tmp0; 1968 } 1969 break; 1970 1971 case TGSI_OPCODE_BRA: 1972 /* deprecated */ 1973 assert(0); 1974 return FALSE; 1975 break; 1976 1977 case TGSI_OPCODE_CAL: 1978 lp_exec_mask_call(&bld->exec_mask, 1979 inst->Label.Label, 1980 pc); 1981 1982 break; 1983 1984 case TGSI_OPCODE_RET: 1985 lp_exec_mask_ret(&bld->exec_mask, pc); 1986 break; 1987 1988 case TGSI_OPCODE_END: 1989 if (0) { 1990 /* for debugging */ 1991 emit_dump_temps(bld); 1992 } 1993 *pc = -1; 1994 break; 1995 1996 case TGSI_OPCODE_SSG: 1997 /* TGSI_OPCODE_SGN */ 1998 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1999 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2000 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 2001 } 2002 break; 2003 2004 case TGSI_OPCODE_CMP: 2005 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2006 src0 = emit_fetch( bld, inst, 0, chan_index ); 2007 src1 = emit_fetch( bld, inst, 1, chan_index ); 2008 src2 = emit_fetch( bld, inst, 2, chan_index ); 2009 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 2010 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 2011 } 2012 break; 2013 2014 case TGSI_OPCODE_SCS: 2015 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 2016 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 2017 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 2018 } 2019 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 2020 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 2021 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 2022 } 2023 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 2024 dst0[CHAN_Z] = bld->base.zero; 2025 } 2026 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 2027 dst0[CHAN_W] = bld->base.one; 2028 } 2029 break; 2030 2031 case TGSI_OPCODE_TXB: 2032 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); 2033 break; 2034 2035 case TGSI_OPCODE_NRM: 2036 /* fall-through */ 2037 case TGSI_OPCODE_NRM4: 2038 /* 3 or 4-component normalization */ 2039 { 2040 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 2041 2042 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 2043 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 2044 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 2045 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 2046 2047 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 2048 2049 /* xmm4 = src.x */ 2050 /* xmm0 = src.x * src.x */ 2051 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 2052 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 2053 tmp4 = tmp0; 2054 } 2055 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 2056 2057 /* xmm5 = src.y */ 2058 /* xmm0 = xmm0 + src.y * src.y */ 2059 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 2060 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 2061 tmp5 = tmp1; 2062 } 2063 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2064 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2065 2066 /* xmm6 = src.z */ 2067 /* xmm0 = xmm0 + src.z * src.z */ 2068 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 2069 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 2070 tmp6 = tmp1; 2071 } 2072 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2073 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2074 2075 if (dims == 4) { 2076 /* xmm7 = src.w */ 2077 /* xmm0 = xmm0 + src.w * src.w */ 2078 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 2079 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 2080 tmp7 = tmp1; 2081 } 2082 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2083 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2084 } 2085 2086 /* xmm1 = 1 / sqrt(xmm0) */ 2087 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 2088 2089 /* dst.x = xmm1 * src.x */ 2090 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 2091 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 2092 } 2093 2094 /* dst.y = xmm1 * src.y */ 2095 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 2096 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 2097 } 2098 2099 /* dst.z = xmm1 * src.z */ 2100 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 2101 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 2102 } 2103 2104 /* dst.w = xmm1 * src.w */ 2105 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 2106 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 2107 } 2108 } 2109 2110 /* dst.w = 1.0 */ 2111 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 2112 dst0[CHAN_W] = bld->base.one; 2113 } 2114 } 2115 break; 2116 2117 case TGSI_OPCODE_DIV: 2118 /* deprecated */ 2119 assert( 0 ); 2120 return FALSE; 2121 break; 2122 2123 case TGSI_OPCODE_DP2: 2124 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 2125 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 2126 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 2127 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 2128 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 2129 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 2130 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 2131 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2132 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 2133 } 2134 break; 2135 2136 case TGSI_OPCODE_TXL: 2137 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 2138 break; 2139 2140 case TGSI_OPCODE_TXP: 2141 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); 2142 break; 2143 2144 case TGSI_OPCODE_BRK: 2145 lp_exec_break(&bld->exec_mask); 2146 break; 2147 2148 case TGSI_OPCODE_IF: 2149 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 2150 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 2151 tmp0, bld->base.zero); 2152 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 2153 break; 2154 2155 case TGSI_OPCODE_BGNLOOP: 2156 lp_exec_bgnloop(&bld->exec_mask); 2157 break; 2158 2159 case TGSI_OPCODE_BGNSUB: 2160 lp_exec_mask_bgnsub(&bld->exec_mask); 2161 break; 2162 2163 case TGSI_OPCODE_ELSE: 2164 lp_exec_mask_cond_invert(&bld->exec_mask); 2165 break; 2166 2167 case TGSI_OPCODE_ENDIF: 2168 lp_exec_mask_cond_pop(&bld->exec_mask); 2169 break; 2170 2171 case TGSI_OPCODE_ENDLOOP: 2172 lp_exec_endloop(&bld->exec_mask); 2173 break; 2174 2175 case TGSI_OPCODE_ENDSUB: 2176 lp_exec_mask_endsub(&bld->exec_mask, pc); 2177 break; 2178 2179 case TGSI_OPCODE_PUSHA: 2180 /* deprecated? */ 2181 assert(0); 2182 return FALSE; 2183 break; 2184 2185 case TGSI_OPCODE_POPA: 2186 /* deprecated? */ 2187 assert(0); 2188 return FALSE; 2189 break; 2190 2191 case TGSI_OPCODE_CEIL: 2192 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2193 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2194 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 2195 } 2196 break; 2197 2198 case TGSI_OPCODE_I2F: 2199 /* deprecated? */ 2200 assert(0); 2201 return FALSE; 2202 break; 2203 2204 case TGSI_OPCODE_NOT: 2205 /* deprecated? */ 2206 assert(0); 2207 return FALSE; 2208 break; 2209 2210 case TGSI_OPCODE_TRUNC: 2211 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2212 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2213 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 2214 } 2215 break; 2216 2217 case TGSI_OPCODE_SHL: 2218 /* deprecated? */ 2219 assert(0); 2220 return FALSE; 2221 break; 2222 2223 case TGSI_OPCODE_ISHR: 2224 /* deprecated? */ 2225 assert(0); 2226 return FALSE; 2227 break; 2228 2229 case TGSI_OPCODE_AND: 2230 /* deprecated? */ 2231 assert(0); 2232 return FALSE; 2233 break; 2234 2235 case TGSI_OPCODE_OR: 2236 /* deprecated? */ 2237 assert(0); 2238 return FALSE; 2239 break; 2240 2241 case TGSI_OPCODE_MOD: 2242 /* deprecated? */ 2243 assert(0); 2244 return FALSE; 2245 break; 2246 2247 case TGSI_OPCODE_XOR: 2248 /* deprecated? */ 2249 assert(0); 2250 return FALSE; 2251 break; 2252 2253 case TGSI_OPCODE_SAD: 2254 /* deprecated? */ 2255 assert(0); 2256 return FALSE; 2257 break; 2258 2259 case TGSI_OPCODE_TXF: 2260 /* deprecated? */ 2261 assert(0); 2262 return FALSE; 2263 break; 2264 2265 case TGSI_OPCODE_TXQ: 2266 /* deprecated? */ 2267 assert(0); 2268 return FALSE; 2269 break; 2270 2271 case TGSI_OPCODE_CONT: 2272 lp_exec_continue(&bld->exec_mask); 2273 break; 2274 2275 case TGSI_OPCODE_EMIT: 2276 return FALSE; 2277 break; 2278 2279 case TGSI_OPCODE_ENDPRIM: 2280 return FALSE; 2281 break; 2282 2283 case TGSI_OPCODE_NOP: 2284 break; 2285 2286 default: 2287 return FALSE; 2288 } 2289 2290 if(info->num_dst) { 2291 LLVMValueRef pred[NUM_CHANNELS]; 2292 2293 emit_fetch_predicate( bld, inst, pred ); 2294 2295 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2296 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); 2297 } 2298 } 2299 2300 return TRUE; 2301} 2302 2303 2304void 2305lp_build_tgsi_soa(LLVMBuilderRef builder, 2306 const struct tgsi_token *tokens, 2307 struct lp_type type, 2308 struct lp_build_mask_context *mask, 2309 LLVMValueRef consts_ptr, 2310 LLVMValueRef system_values_array, 2311 const LLVMValueRef *pos, 2312 const LLVMValueRef (*inputs)[NUM_CHANNELS], 2313 LLVMValueRef (*outputs)[NUM_CHANNELS], 2314 struct lp_build_sampler_soa *sampler, 2315 const struct tgsi_shader_info *info) 2316{ 2317 struct lp_build_tgsi_soa_context bld; 2318 struct tgsi_parse_context parse; 2319 uint num_immediates = 0; 2320 uint num_instructions = 0; 2321 unsigned i; 2322 int pc = 0; 2323 2324 struct lp_type res_type; 2325 2326 assert(type.length <= LP_MAX_VECTOR_LENGTH); 2327 memset(&res_type, 0, sizeof res_type); 2328 res_type.width = type.width; 2329 res_type.length = type.length; 2330 res_type.sign = 1; 2331 2332 /* Setup build context */ 2333 memset(&bld, 0, sizeof bld); 2334 lp_build_context_init(&bld.base, builder, type); 2335 lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type)); 2336 lp_build_context_init(&bld.elem_bld, builder, lp_elem_type(type)); 2337 bld.mask = mask; 2338 bld.pos = pos; 2339 bld.inputs = inputs; 2340 bld.outputs = outputs; 2341 bld.consts_ptr = consts_ptr; 2342 bld.sampler = sampler; 2343 bld.info = info; 2344 bld.indirect_files = info->indirect_files; 2345 bld.instructions = (struct tgsi_full_instruction *) 2346 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); 2347 bld.max_instructions = LP_MAX_INSTRUCTIONS; 2348 2349 if (!bld.instructions) { 2350 return; 2351 } 2352 2353 lp_exec_mask_init(&bld.exec_mask, &bld.base); 2354 2355 if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 2356 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), 2357 info->file_max[TGSI_FILE_TEMPORARY]*4 + 4, 0); 2358 bld.temps_array = lp_build_array_alloca(bld.base.builder, 2359 bld.base.vec_type, array_size, 2360 "temp_array"); 2361 } 2362 2363 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2364 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), 2365 info->file_max[TGSI_FILE_OUTPUT]*4 + 4, 0); 2366 bld.outputs_array = lp_build_array_alloca(bld.base.builder, 2367 bld.base.vec_type, array_size, 2368 "output_array"); 2369 } 2370 2371 /* If we have indirect addressing in inputs we need to copy them into 2372 * our alloca array to be able to iterate over them */ 2373 if (bld.indirect_files & (1 << TGSI_FILE_INPUT)) { 2374 unsigned index, chan; 2375 LLVMTypeRef vec_type = bld.base.vec_type; 2376 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), 2377 info->file_max[TGSI_FILE_INPUT]*4 + 4, 0); 2378 bld.inputs_array = lp_build_array_alloca(bld.base.builder, 2379 vec_type, array_size, 2380 "input_array"); 2381 2382 assert(info->num_inputs <= info->file_max[TGSI_FILE_INPUT] + 1); 2383 2384 for (index = 0; index < info->num_inputs; ++index) { 2385 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2386 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); 2387 LLVMValueRef input_ptr = 2388 LLVMBuildGEP(bld.base.builder, bld.inputs_array, 2389 &lindex, 1, ""); 2390 LLVMValueRef value = bld.inputs[index][chan]; 2391 if (value) 2392 LLVMBuildStore(bld.base.builder, value, input_ptr); 2393 } 2394 } 2395 } 2396 2397 bld.system_values_array = system_values_array; 2398 2399 tgsi_parse_init( &parse, tokens ); 2400 2401 while( !tgsi_parse_end_of_tokens( &parse ) ) { 2402 tgsi_parse_token( &parse ); 2403 2404 switch( parse.FullToken.Token.Type ) { 2405 case TGSI_TOKEN_TYPE_DECLARATION: 2406 /* Inputs already interpolated */ 2407 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 2408 break; 2409 2410 case TGSI_TOKEN_TYPE_INSTRUCTION: 2411 { 2412 /* save expanded instruction */ 2413 if (num_instructions == bld.max_instructions) { 2414 struct tgsi_full_instruction *instructions; 2415 instructions = REALLOC(bld.instructions, 2416 bld.max_instructions 2417 * sizeof(struct tgsi_full_instruction), 2418 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 2419 * sizeof(struct tgsi_full_instruction)); 2420 if (!instructions) { 2421 break; 2422 } 2423 bld.instructions = instructions; 2424 bld.max_instructions += LP_MAX_INSTRUCTIONS; 2425 } 2426 2427 memcpy(bld.instructions + num_instructions, 2428 &parse.FullToken.FullInstruction, 2429 sizeof(bld.instructions[0])); 2430 2431 num_instructions++; 2432 } 2433 2434 break; 2435 2436 case TGSI_TOKEN_TYPE_IMMEDIATE: 2437 /* simply copy the immediate values into the next immediates[] slot */ 2438 { 2439 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 2440 assert(size <= 4); 2441 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 2442 for( i = 0; i < size; ++i ) 2443 bld.immediates[num_immediates][i] = 2444 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); 2445 for( i = size; i < 4; ++i ) 2446 bld.immediates[num_immediates][i] = bld.base.undef; 2447 num_immediates++; 2448 } 2449 break; 2450 2451 case TGSI_TOKEN_TYPE_PROPERTY: 2452 break; 2453 2454 default: 2455 assert( 0 ); 2456 } 2457 } 2458 2459 while (pc != -1) { 2460 struct tgsi_full_instruction *instr = bld.instructions + pc; 2461 const struct tgsi_opcode_info *opcode_info = 2462 tgsi_get_opcode_info(instr->Instruction.Opcode); 2463 if (!emit_instruction( &bld, instr, opcode_info, &pc )) 2464 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 2465 opcode_info->mnemonic); 2466 } 2467 2468 /* If we have indirect addressing in outputs we need to copy our alloca array 2469 * to the outputs slots specified by the called */ 2470 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2471 unsigned index, chan; 2472 assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1); 2473 for (index = 0; index < info->num_outputs; ++index) { 2474 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2475 bld.outputs[index][chan] = get_output_ptr(&bld, index, chan); 2476 } 2477 } 2478 } 2479 2480 if (0) { 2481 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); 2482 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2483 debug_printf("11111111111111111111111111111 \n"); 2484 tgsi_dump(tokens, 0); 2485 lp_debug_dump_value(function); 2486 debug_printf("2222222222222222222222222222 \n"); 2487 } 2488 tgsi_parse_free( &parse ); 2489 2490 if (0) { 2491 LLVMModuleRef module = LLVMGetGlobalParent( 2492 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); 2493 LLVMDumpModule(module); 2494 2495 } 2496 2497 FREE( bld.instructions ); 2498} 2499 2500 2501/** 2502 * Build up the system values array out of individual values such as 2503 * the instance ID, front-face, primitive ID, etc. The shader info is 2504 * used to determine which system values are needed and where to put 2505 * them in the system values array. 2506 * 2507 * XXX only instance ID is implemented at this time. 2508 * 2509 * The system values register file is similar to the constants buffer. 2510 * Example declaration: 2511 * DCL SV[0], INSTANCEID 2512 * Example instruction: 2513 * MOVE foo, SV[0].xxxx; 2514 * 2515 * \return LLVM float array (interpreted as float [][4]) 2516 */ 2517LLVMValueRef 2518lp_build_system_values_array(LLVMBuilderRef builder, 2519 const struct tgsi_shader_info *info, 2520 LLVMValueRef instance_id, 2521 LLVMValueRef facing) 2522{ 2523 LLVMValueRef size = lp_build_const_int32(4 * info->num_system_values); 2524 LLVMValueRef array = lp_build_array_alloca(builder, LLVMFloatType(), 2525 size, "sysvals_array"); 2526 unsigned i; 2527 2528 for (i = 0; i < info->num_system_values; i++) { 2529 LLVMValueRef index = lp_build_const_int32(i * 4); 2530 LLVMValueRef ptr, value; 2531 2532 switch (info->system_value_semantic_name[i]) { 2533 case TGSI_SEMANTIC_INSTANCEID: 2534 /* convert instance ID from int to float */ 2535 value = LLVMBuildSIToFP(builder, instance_id, LLVMFloatType(), 2536 "sysval_instanceid"); 2537 break; 2538 case TGSI_SEMANTIC_FACE: 2539 /* fall-through */ 2540 default: 2541 assert(0 && "unexpected semantic in build_system_values_array()"); 2542 } 2543 2544 ptr = LLVMBuildGEP(builder, array, &index, 1, ""); 2545 LLVMBuildStore(builder, value, ptr); 2546 } 2547 2548 return array; 2549} 2550