lp_bld_tgsi_soa.c revision 10740acf46e08960dde790005d65a98440f313bc
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_scan.h" 49#include "lp_bld_type.h" 50#include "lp_bld_const.h" 51#include "lp_bld_arit.h" 52#include "lp_bld_bitarit.h" 53#include "lp_bld_gather.h" 54#include "lp_bld_logic.h" 55#include "lp_bld_swizzle.h" 56#include "lp_bld_flow.h" 57#include "lp_bld_quad.h" 58#include "lp_bld_tgsi.h" 59#include "lp_bld_limits.h" 60#include "lp_bld_debug.h" 61#include "lp_bld_printf.h" 62 63 64#define FOR_EACH_CHANNEL( CHAN )\ 65 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 66 67#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 68 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 69 70#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 71 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 72 73#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 74 FOR_EACH_CHANNEL( CHAN )\ 75 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 76 77#define CHAN_X 0 78#define CHAN_Y 1 79#define CHAN_Z 2 80#define CHAN_W 3 81#define NUM_CHANNELS 4 82 83#define LP_MAX_INSTRUCTIONS 256 84 85 86struct lp_exec_mask { 87 struct lp_build_context *bld; 88 89 boolean has_mask; 90 91 LLVMTypeRef int_vec_type; 92 93 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 94 int cond_stack_size; 95 LLVMValueRef cond_mask; 96 97 LLVMBasicBlockRef loop_block; 98 LLVMValueRef cont_mask; 99 LLVMValueRef break_mask; 100 LLVMValueRef break_var; 101 struct { 102 LLVMBasicBlockRef loop_block; 103 LLVMValueRef cont_mask; 104 LLVMValueRef break_mask; 105 LLVMValueRef break_var; 106 } loop_stack[LP_MAX_TGSI_NESTING]; 107 int loop_stack_size; 108 109 LLVMValueRef ret_mask; 110 struct { 111 int pc; 112 LLVMValueRef ret_mask; 113 } call_stack[LP_MAX_TGSI_NESTING]; 114 int call_stack_size; 115 116 LLVMValueRef exec_mask; 117}; 118 119struct lp_build_tgsi_soa_context 120{ 121 struct lp_build_context base; 122 123 /* Builder for vector integer masks and indices */ 124 struct lp_build_context uint_bld; 125 126 /* Builder for scalar elements of shader's data type (float) */ 127 struct lp_build_context elem_bld; 128 129 LLVMValueRef consts_ptr; 130 const LLVMValueRef *pos; 131 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 132 LLVMValueRef (*outputs)[NUM_CHANNELS]; 133 134 const struct lp_build_sampler_soa *sampler; 135 136 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 137 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 138 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 139 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; 140 141 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 142 * set in the indirect_files field. 143 * The temps[] array above is unused then. 144 */ 145 LLVMValueRef temps_array; 146 147 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is 148 * set in the indirect_files field. 149 * The outputs[] array above is unused then. 150 */ 151 LLVMValueRef outputs_array; 152 153 const struct tgsi_shader_info *info; 154 /** bitmask indicating which register files are accessed indirectly */ 155 unsigned indirect_files; 156 157 struct lp_build_mask_context *mask; 158 struct lp_exec_mask exec_mask; 159 160 struct tgsi_full_instruction *instructions; 161 uint max_instructions; 162}; 163 164static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 165{ 166 mask->bld = bld; 167 mask->has_mask = FALSE; 168 mask->cond_stack_size = 0; 169 mask->loop_stack_size = 0; 170 mask->call_stack_size = 0; 171 172 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); 173 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 174 LLVMConstAllOnes(mask->int_vec_type); 175} 176 177static void lp_exec_mask_update(struct lp_exec_mask *mask) 178{ 179 if (mask->loop_stack_size) { 180 /*for loops we need to update the entire mask at runtime */ 181 LLVMValueRef tmp; 182 assert(mask->break_mask); 183 tmp = LLVMBuildAnd(mask->bld->builder, 184 mask->cont_mask, 185 mask->break_mask, 186 "maskcb"); 187 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 188 mask->cond_mask, 189 tmp, 190 "maskfull"); 191 } else 192 mask->exec_mask = mask->cond_mask; 193 194 if (mask->call_stack_size) { 195 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 196 mask->exec_mask, 197 mask->ret_mask, 198 "callmask"); 199 } 200 201 mask->has_mask = (mask->cond_stack_size > 0 || 202 mask->loop_stack_size > 0 || 203 mask->call_stack_size > 0); 204} 205 206static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 207 LLVMValueRef val) 208{ 209 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 210 if (mask->cond_stack_size == 0) { 211 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 212 } 213 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 214 assert(LLVMTypeOf(val) == mask->int_vec_type); 215 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 216 mask->cond_mask, 217 val, 218 ""); 219 lp_exec_mask_update(mask); 220} 221 222static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 223{ 224 LLVMValueRef prev_mask; 225 LLVMValueRef inv_mask; 226 227 assert(mask->cond_stack_size); 228 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 229 if (mask->cond_stack_size == 1) { 230 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 231 } 232 233 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, ""); 234 235 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 236 inv_mask, 237 prev_mask, ""); 238 lp_exec_mask_update(mask); 239} 240 241static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 242{ 243 assert(mask->cond_stack_size); 244 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 245 lp_exec_mask_update(mask); 246} 247 248static void lp_exec_bgnloop(struct lp_exec_mask *mask) 249{ 250 if (mask->loop_stack_size == 0) { 251 assert(mask->loop_block == NULL); 252 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 253 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 254 assert(mask->break_var == NULL); 255 } 256 257 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 258 259 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 260 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 261 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 262 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 263 ++mask->loop_stack_size; 264 265 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, ""); 266 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 267 268 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); 269 LLVMBuildBr(mask->bld->builder, mask->loop_block); 270 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); 271 272 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, ""); 273 274 lp_exec_mask_update(mask); 275} 276 277static void lp_exec_break(struct lp_exec_mask *mask) 278{ 279 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 280 mask->exec_mask, 281 "break"); 282 283 mask->break_mask = LLVMBuildAnd(mask->bld->builder, 284 mask->break_mask, 285 exec_mask, "break_full"); 286 287 lp_exec_mask_update(mask); 288} 289 290static void lp_exec_continue(struct lp_exec_mask *mask) 291{ 292 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 293 mask->exec_mask, 294 ""); 295 296 mask->cont_mask = LLVMBuildAnd(mask->bld->builder, 297 mask->cont_mask, 298 exec_mask, ""); 299 300 lp_exec_mask_update(mask); 301} 302 303 304static void lp_exec_endloop(struct lp_exec_mask *mask) 305{ 306 LLVMBasicBlockRef endloop; 307 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* 308 mask->bld->type.length); 309 LLVMValueRef i1cond; 310 311 assert(mask->break_mask); 312 313 /* 314 * Restore the cont_mask, but don't pop 315 */ 316 assert(mask->loop_stack_size); 317 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 318 lp_exec_mask_update(mask); 319 320 /* 321 * Unlike the continue mask, the break_mask must be preserved across loop 322 * iterations 323 */ 324 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 325 326 /* i1cond = (mask == 0) */ 327 i1cond = LLVMBuildICmp( 328 mask->bld->builder, 329 LLVMIntNE, 330 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""), 331 LLVMConstNull(reg_type), ""); 332 333 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); 334 335 LLVMBuildCondBr(mask->bld->builder, 336 i1cond, mask->loop_block, endloop); 337 338 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); 339 340 assert(mask->loop_stack_size); 341 --mask->loop_stack_size; 342 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 343 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 344 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 345 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 346 347 lp_exec_mask_update(mask); 348} 349 350/* stores val into an address pointed to by dst. 351 * mask->exec_mask is used to figure out which bits of val 352 * should be stored into the address 353 * (0 means don't store this bit, 1 means do store). 354 */ 355static void lp_exec_mask_store(struct lp_exec_mask *mask, 356 LLVMValueRef pred, 357 LLVMValueRef val, 358 LLVMValueRef dst) 359{ 360 /* Mix the predicate and execution mask */ 361 if (mask->has_mask) { 362 if (pred) { 363 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); 364 } else { 365 pred = mask->exec_mask; 366 } 367 } 368 369 if (pred) { 370 LLVMValueRef real_val, dst_val; 371 372 dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); 373 real_val = lp_build_select(mask->bld, 374 pred, 375 val, dst_val); 376 377 LLVMBuildStore(mask->bld->builder, real_val, dst); 378 } else 379 LLVMBuildStore(mask->bld->builder, val, dst); 380} 381 382static void lp_exec_mask_call(struct lp_exec_mask *mask, 383 int func, 384 int *pc) 385{ 386 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 387 mask->call_stack[mask->call_stack_size].pc = *pc; 388 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 389 mask->call_stack_size++; 390 *pc = func; 391} 392 393static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 394{ 395 LLVMValueRef exec_mask; 396 397 if (mask->call_stack_size == 0) { 398 /* returning from main() */ 399 *pc = -1; 400 return; 401 } 402 exec_mask = LLVMBuildNot(mask->bld->builder, 403 mask->exec_mask, 404 "ret"); 405 406 mask->ret_mask = LLVMBuildAnd(mask->bld->builder, 407 mask->ret_mask, 408 exec_mask, "ret_full"); 409 410 lp_exec_mask_update(mask); 411} 412 413static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 414{ 415} 416 417static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 418{ 419 assert(mask->call_stack_size); 420 mask->call_stack_size--; 421 *pc = mask->call_stack[mask->call_stack_size].pc; 422 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 423 lp_exec_mask_update(mask); 424} 425 426 427/** 428 * Return pointer to a temporary register channel (src or dest). 429 * Note that indirect addressing cannot be handled here. 430 * \param index which temporary register 431 * \param chan which channel of the temp register. 432 */ 433static LLVMValueRef 434get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 435 unsigned index, 436 unsigned chan) 437{ 438 assert(chan < 4); 439 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 440 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); 441 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); 442 } 443 else { 444 return bld->temps[index][chan]; 445 } 446} 447 448/** 449 * Return pointer to a output register channel (src or dest). 450 * Note that indirect addressing cannot be handled here. 451 * \param index which output register 452 * \param chan which channel of the output register. 453 */ 454static LLVMValueRef 455get_output_ptr(struct lp_build_tgsi_soa_context *bld, 456 unsigned index, 457 unsigned chan) 458{ 459 assert(chan < 4); 460 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 461 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); 462 return LLVMBuildGEP(bld->base.builder, bld->outputs_array, &lindex, 1, ""); 463 } 464 else { 465 return bld->outputs[index][chan]; 466 } 467} 468 469 470 471/** 472 * Gather vector. 473 * XXX the lp_build_gather() function should be capable of doing this 474 * with a little work. 475 */ 476static LLVMValueRef 477build_gather(struct lp_build_tgsi_soa_context *bld, 478 LLVMValueRef base_ptr, 479 LLVMValueRef indexes) 480{ 481 LLVMValueRef res = bld->base.undef; 482 unsigned i; 483 484 /* 485 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 486 */ 487 for (i = 0; i < bld->base.type.length; i++) { 488 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); 489 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder, 490 indexes, ii, ""); 491 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr, 492 &index, 1, "gather_ptr"); 493 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 494 495 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, ""); 496 } 497 498 return res; 499} 500 501 502/** 503 * Scatter/store vector. 504 */ 505static void 506emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 507 LLVMValueRef base_ptr, 508 LLVMValueRef indexes, 509 LLVMValueRef values, 510 struct lp_exec_mask *mask, 511 LLVMValueRef pred) 512{ 513 LLVMBuilderRef builder = bld->base.builder; 514 unsigned i; 515 516 /* Mix the predicate and execution mask */ 517 if (mask->has_mask) { 518 if (pred) { 519 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); 520 } 521 else { 522 pred = mask->exec_mask; 523 } 524 } 525 526 /* 527 * Loop over elements of index_vec, store scalar value. 528 */ 529 for (i = 0; i < bld->base.type.length; i++) { 530 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); 531 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 532 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 533 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 534 LLVMValueRef scalar_pred = pred ? 535 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 536 537 if (0) 538 lp_build_printf(builder, "scatter %d: val %f at %d %p\n", 539 ii, val, index, scalar_ptr); 540 541 if (scalar_pred) { 542 LLVMValueRef real_val, dst_val; 543 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 544 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 545 LLVMBuildStore(builder, real_val, scalar_ptr); 546 } 547 else { 548 LLVMBuildStore(builder, val, scalar_ptr); 549 } 550 } 551} 552 553 554/** 555 * Read the current value of the ADDR register, convert the floats to 556 * ints, add the base index and return the vector of offsets. 557 * The offsets will be used to index into the constant buffer or 558 * temporary register file. 559 */ 560static LLVMValueRef 561get_indirect_index(struct lp_build_tgsi_soa_context *bld, 562 unsigned reg_file, unsigned reg_index, 563 const struct tgsi_src_register *indirect_reg) 564{ 565 struct lp_build_context *uint_bld = &bld->uint_bld; 566 /* always use X component of address register */ 567 unsigned swizzle = indirect_reg->SwizzleX; 568 LLVMValueRef base; 569 LLVMValueRef rel; 570 LLVMValueRef max_index; 571 LLVMValueRef index; 572 573 assert(bld->indirect_files & (1 << reg_file)); 574 575 base = lp_build_const_int_vec(uint_bld->type, reg_index); 576 577 assert(swizzle < 4); 578 rel = LLVMBuildLoad(bld->base.builder, 579 bld->addr[indirect_reg->Index][swizzle], 580 "load addr reg"); 581 582 /* for indexing we want integers */ 583 rel = LLVMBuildFPToSI(bld->base.builder, 584 rel, 585 uint_bld->vec_type, ""); 586 587 index = lp_build_add(uint_bld, base, rel); 588 589 max_index = lp_build_const_int_vec(uint_bld->type, 590 bld->info->file_max[reg_file]); 591 592 assert(!uint_bld->type.sign); 593 index = lp_build_min(uint_bld, index, max_index); 594 595 return index; 596} 597 598 599/** 600 * Register fetch. 601 */ 602static LLVMValueRef 603emit_fetch( 604 struct lp_build_tgsi_soa_context *bld, 605 const struct tgsi_full_instruction *inst, 606 unsigned src_op, 607 const unsigned chan_index ) 608{ 609 struct lp_build_context *uint_bld = &bld->uint_bld; 610 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 611 const unsigned swizzle = 612 tgsi_util_get_full_src_register_swizzle(reg, chan_index); 613 LLVMValueRef res; 614 LLVMValueRef indirect_index = NULL; 615 616 if (swizzle > 3) { 617 assert(0 && "invalid swizzle in emit_fetch()"); 618 return bld->base.undef; 619 } 620 621 if (reg->Register.Indirect) { 622 indirect_index = get_indirect_index(bld, 623 reg->Register.File, 624 reg->Register.Index, 625 ®->Indirect); 626 } else { 627 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 628 } 629 630 switch (reg->Register.File) { 631 case TGSI_FILE_CONSTANT: 632 if (reg->Register.Indirect) { 633 LLVMValueRef swizzle_vec = 634 lp_build_const_int_vec(uint_bld->type, swizzle); 635 LLVMValueRef index_vec; /* index into the const buffer */ 636 637 /* index_vec = indirect_index * 4 + swizzle */ 638 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 639 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 640 641 /* Gather values from the constant buffer */ 642 res = build_gather(bld, bld->consts_ptr, index_vec); 643 } 644 else { 645 LLVMValueRef index; /* index into the const buffer */ 646 LLVMValueRef scalar, scalar_ptr; 647 648 index = lp_build_const_int32(reg->Register.Index*4 + swizzle); 649 650 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, 651 &index, 1, ""); 652 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 653 654 res = lp_build_broadcast_scalar(&bld->base, scalar); 655 } 656 break; 657 658 case TGSI_FILE_IMMEDIATE: 659 res = bld->immediates[reg->Register.Index][swizzle]; 660 assert(res); 661 break; 662 663 case TGSI_FILE_INPUT: 664 res = bld->inputs[reg->Register.Index][swizzle]; 665 assert(res); 666 break; 667 668 case TGSI_FILE_TEMPORARY: 669 if (reg->Register.Indirect) { 670 LLVMValueRef swizzle_vec = 671 lp_build_const_int_vec(uint_bld->type, swizzle); 672 LLVMValueRef length_vec = 673 lp_build_const_int_vec(uint_bld->type, bld->base.type.length); 674 LLVMValueRef index_vec; /* index into the const buffer */ 675 LLVMValueRef temps_array; 676 LLVMTypeRef float4_ptr_type; 677 678 /* index_vec = (indirect_index * 4 + swizzle) * length */ 679 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 680 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 681 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 682 683 /* cast temps_array pointer to float* */ 684 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); 685 temps_array = LLVMBuildBitCast(uint_bld->builder, bld->temps_array, 686 float4_ptr_type, ""); 687 688 /* Gather values from the temporary register array */ 689 res = build_gather(bld, temps_array, index_vec); 690 } 691 else { 692 LLVMValueRef temp_ptr; 693 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); 694 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 695 if (!res) 696 return bld->base.undef; 697 } 698 break; 699 700 default: 701 assert(0 && "invalid src register in emit_fetch()"); 702 return bld->base.undef; 703 } 704 705 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 706 case TGSI_UTIL_SIGN_CLEAR: 707 res = lp_build_abs( &bld->base, res ); 708 break; 709 710 case TGSI_UTIL_SIGN_SET: 711 res = lp_build_abs( &bld->base, res ); 712 /* fall through */ 713 case TGSI_UTIL_SIGN_TOGGLE: 714 res = lp_build_negate( &bld->base, res ); 715 break; 716 717 case TGSI_UTIL_SIGN_KEEP: 718 break; 719 } 720 721 return res; 722} 723 724 725/** 726 * Register fetch with derivatives. 727 */ 728static void 729emit_fetch_deriv( 730 struct lp_build_tgsi_soa_context *bld, 731 const struct tgsi_full_instruction *inst, 732 unsigned index, 733 const unsigned chan_index, 734 LLVMValueRef *res, 735 LLVMValueRef *ddx, 736 LLVMValueRef *ddy) 737{ 738 LLVMValueRef src; 739 740 src = emit_fetch(bld, inst, index, chan_index); 741 742 if(res) 743 *res = src; 744 745 /* TODO: use interpolation coeffs for inputs */ 746 747 if(ddx) 748 *ddx = lp_build_ddx(&bld->base, src); 749 750 if(ddy) 751 *ddy = lp_build_ddy(&bld->base, src); 752} 753 754 755/** 756 * Predicate. 757 */ 758static void 759emit_fetch_predicate( 760 struct lp_build_tgsi_soa_context *bld, 761 const struct tgsi_full_instruction *inst, 762 LLVMValueRef *pred) 763{ 764 unsigned index; 765 unsigned char swizzles[4]; 766 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 767 LLVMValueRef value; 768 unsigned chan; 769 770 if (!inst->Instruction.Predicate) { 771 FOR_EACH_CHANNEL( chan ) { 772 pred[chan] = NULL; 773 } 774 return; 775 } 776 777 swizzles[0] = inst->Predicate.SwizzleX; 778 swizzles[1] = inst->Predicate.SwizzleY; 779 swizzles[2] = inst->Predicate.SwizzleZ; 780 swizzles[3] = inst->Predicate.SwizzleW; 781 782 index = inst->Predicate.Index; 783 assert(index < LP_MAX_TGSI_PREDS); 784 785 FOR_EACH_CHANNEL( chan ) { 786 unsigned swizzle = swizzles[chan]; 787 788 /* 789 * Only fetch the predicate register channels that are actually listed 790 * in the swizzles 791 */ 792 if (!unswizzled[swizzle]) { 793 value = LLVMBuildLoad(bld->base.builder, 794 bld->preds[index][swizzle], ""); 795 796 /* 797 * Convert the value to an integer mask. 798 * 799 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 800 * is needlessly causing two comparisons due to storing the intermediate 801 * result as float vector instead of an integer mask vector. 802 */ 803 value = lp_build_compare(bld->base.builder, 804 bld->base.type, 805 PIPE_FUNC_NOTEQUAL, 806 value, 807 bld->base.zero); 808 if (inst->Predicate.Negate) { 809 value = LLVMBuildNot(bld->base.builder, value, ""); 810 } 811 812 unswizzled[swizzle] = value; 813 } else { 814 value = unswizzled[swizzle]; 815 } 816 817 pred[chan] = value; 818 } 819} 820 821 822/** 823 * Register store. 824 */ 825static void 826emit_store( 827 struct lp_build_tgsi_soa_context *bld, 828 const struct tgsi_full_instruction *inst, 829 unsigned index, 830 unsigned chan_index, 831 LLVMValueRef pred, 832 LLVMValueRef value) 833{ 834 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 835 struct lp_build_context *uint_bld = &bld->uint_bld; 836 LLVMValueRef indirect_index = NULL; 837 838 switch( inst->Instruction.Saturate ) { 839 case TGSI_SAT_NONE: 840 break; 841 842 case TGSI_SAT_ZERO_ONE: 843 value = lp_build_max(&bld->base, value, bld->base.zero); 844 value = lp_build_min(&bld->base, value, bld->base.one); 845 break; 846 847 case TGSI_SAT_MINUS_PLUS_ONE: 848 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); 849 value = lp_build_min(&bld->base, value, bld->base.one); 850 break; 851 852 default: 853 assert(0); 854 } 855 856 if (reg->Register.Indirect) { 857 indirect_index = get_indirect_index(bld, 858 reg->Register.File, 859 reg->Register.Index, 860 ®->Indirect); 861 } else { 862 assert(reg->Register.Index <= bld->info->file_max[reg->Register.File]); 863 } 864 865 switch( reg->Register.File ) { 866 case TGSI_FILE_OUTPUT: 867 if (reg->Register.Indirect) { 868 LLVMBuilderRef builder = bld->base.builder; 869 LLVMValueRef chan_vec = 870 lp_build_const_int_vec(uint_bld->type, chan_index); 871 LLVMValueRef length_vec = 872 lp_build_const_int_vec(uint_bld->type, bld->base.type.length); 873 LLVMValueRef index_vec; /* indexes into the temp registers */ 874 LLVMValueRef outputs_array; 875 LLVMValueRef pixel_offsets; 876 LLVMTypeRef float_ptr_type; 877 int i; 878 879 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 880 pixel_offsets = uint_bld->undef; 881 for (i = 0; i < bld->base.type.length; i++) { 882 LLVMValueRef ii = lp_build_const_int32(i); 883 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 884 ii, ii, ""); 885 } 886 887 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 888 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 889 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 890 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 891 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 892 893 float_ptr_type = LLVMPointerType(LLVMFloatType(), 0); 894 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, 895 float_ptr_type, ""); 896 897 /* Scatter store values into temp registers */ 898 emit_mask_scatter(bld, outputs_array, index_vec, value, 899 &bld->exec_mask, pred); 900 } 901 else { 902 LLVMValueRef out_ptr = get_output_ptr(bld, reg->Register.Index, 903 chan_index); 904 lp_exec_mask_store(&bld->exec_mask, pred, value, out_ptr); 905 } 906 break; 907 908 case TGSI_FILE_TEMPORARY: 909 if (reg->Register.Indirect) { 910 LLVMBuilderRef builder = bld->base.builder; 911 LLVMValueRef chan_vec = 912 lp_build_const_int_vec(uint_bld->type, chan_index); 913 LLVMValueRef length_vec = 914 lp_build_const_int_vec(uint_bld->type, bld->base.type.length); 915 LLVMValueRef index_vec; /* indexes into the temp registers */ 916 LLVMValueRef temps_array; 917 LLVMValueRef pixel_offsets; 918 LLVMTypeRef float_ptr_type; 919 int i; 920 921 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 922 pixel_offsets = uint_bld->undef; 923 for (i = 0; i < bld->base.type.length; i++) { 924 LLVMValueRef ii = lp_build_const_int32(i); 925 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 926 ii, ii, ""); 927 } 928 929 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 930 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 931 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 932 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 933 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 934 935 float_ptr_type = LLVMPointerType(LLVMFloatType(), 0); 936 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 937 float_ptr_type, ""); 938 939 /* Scatter store values into temp registers */ 940 emit_mask_scatter(bld, temps_array, index_vec, value, 941 &bld->exec_mask, pred); 942 } 943 else { 944 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 945 chan_index); 946 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); 947 } 948 break; 949 950 case TGSI_FILE_ADDRESS: 951 lp_exec_mask_store(&bld->exec_mask, pred, value, 952 bld->addr[reg->Indirect.Index][chan_index]); 953 break; 954 955 case TGSI_FILE_PREDICATE: 956 lp_exec_mask_store(&bld->exec_mask, pred, value, 957 bld->preds[reg->Register.Index][chan_index]); 958 break; 959 960 default: 961 assert( 0 ); 962 } 963} 964 965 966/** 967 * High-level instruction translators. 968 */ 969 970static void 971emit_tex( struct lp_build_tgsi_soa_context *bld, 972 const struct tgsi_full_instruction *inst, 973 enum lp_build_tex_modifier modifier, 974 LLVMValueRef *texel) 975{ 976 unsigned unit; 977 LLVMValueRef lod_bias, explicit_lod; 978 LLVMValueRef oow = NULL; 979 LLVMValueRef coords[3]; 980 LLVMValueRef ddx[3]; 981 LLVMValueRef ddy[3]; 982 unsigned num_coords; 983 unsigned i; 984 985 if (!bld->sampler) { 986 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 987 for (i = 0; i < 4; i++) { 988 texel[i] = bld->base.undef; 989 } 990 return; 991 } 992 993 switch (inst->Texture.Texture) { 994 case TGSI_TEXTURE_1D: 995 num_coords = 1; 996 break; 997 case TGSI_TEXTURE_2D: 998 case TGSI_TEXTURE_RECT: 999 num_coords = 2; 1000 break; 1001 case TGSI_TEXTURE_SHADOW1D: 1002 case TGSI_TEXTURE_SHADOW2D: 1003 case TGSI_TEXTURE_SHADOWRECT: 1004 case TGSI_TEXTURE_3D: 1005 case TGSI_TEXTURE_CUBE: 1006 num_coords = 3; 1007 break; 1008 default: 1009 assert(0); 1010 return; 1011 } 1012 1013 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 1014 lod_bias = emit_fetch( bld, inst, 0, 3 ); 1015 explicit_lod = NULL; 1016 } 1017 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 1018 lod_bias = NULL; 1019 explicit_lod = emit_fetch( bld, inst, 0, 3 ); 1020 } 1021 else { 1022 lod_bias = NULL; 1023 explicit_lod = NULL; 1024 } 1025 1026 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 1027 oow = emit_fetch( bld, inst, 0, 3 ); 1028 oow = lp_build_rcp(&bld->base, oow); 1029 } 1030 1031 for (i = 0; i < num_coords; i++) { 1032 coords[i] = emit_fetch( bld, inst, 0, i ); 1033 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 1034 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 1035 } 1036 for (i = num_coords; i < 3; i++) { 1037 coords[i] = bld->base.undef; 1038 } 1039 1040 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 1041 LLVMTypeRef i32t = LLVMInt32Type(); 1042 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); 1043 for (i = 0; i < num_coords; i++) { 1044 LLVMValueRef src1 = emit_fetch( bld, inst, 1, i ); 1045 LLVMValueRef src2 = emit_fetch( bld, inst, 2, i ); 1046 ddx[i] = LLVMBuildExtractElement(bld->base.builder, src1, index0, ""); 1047 ddy[i] = LLVMBuildExtractElement(bld->base.builder, src2, index0, ""); 1048 } 1049 unit = inst->Src[3].Register.Index; 1050 } else { 1051 for (i = 0; i < num_coords; i++) { 1052 ddx[i] = lp_build_scalar_ddx( &bld->base, coords[i] ); 1053 ddy[i] = lp_build_scalar_ddy( &bld->base, coords[i] ); 1054 } 1055 unit = inst->Src[1].Register.Index; 1056 } 1057 for (i = num_coords; i < 3; i++) { 1058 ddx[i] = LLVMGetUndef(bld->base.elem_type); 1059 ddy[i] = LLVMGetUndef(bld->base.elem_type); 1060 } 1061 1062 bld->sampler->emit_fetch_texel(bld->sampler, 1063 bld->base.builder, 1064 bld->base.type, 1065 unit, num_coords, coords, 1066 ddx, ddy, 1067 lod_bias, explicit_lod, 1068 texel); 1069} 1070 1071static boolean 1072near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 1073 int pc) 1074{ 1075 int i; 1076 1077 for (i = 0; i < 5; i++) { 1078 unsigned opcode; 1079 1080 if (pc + i >= bld->info->num_instructions) 1081 return TRUE; 1082 1083 opcode = bld->instructions[pc + i].Instruction.Opcode; 1084 1085 if (opcode == TGSI_OPCODE_END) 1086 return TRUE; 1087 1088 if (opcode == TGSI_OPCODE_TEX || 1089 opcode == TGSI_OPCODE_TXP || 1090 opcode == TGSI_OPCODE_TXD || 1091 opcode == TGSI_OPCODE_TXB || 1092 opcode == TGSI_OPCODE_TXL || 1093 opcode == TGSI_OPCODE_TXF || 1094 opcode == TGSI_OPCODE_TXQ || 1095 opcode == TGSI_OPCODE_CAL || 1096 opcode == TGSI_OPCODE_CALLNZ || 1097 opcode == TGSI_OPCODE_IF || 1098 opcode == TGSI_OPCODE_IFC || 1099 opcode == TGSI_OPCODE_BGNLOOP || 1100 opcode == TGSI_OPCODE_SWITCH) 1101 return FALSE; 1102 } 1103 1104 return TRUE; 1105} 1106 1107 1108 1109/** 1110 * Kill fragment if any of the src register values are negative. 1111 */ 1112static void 1113emit_kil( 1114 struct lp_build_tgsi_soa_context *bld, 1115 const struct tgsi_full_instruction *inst, 1116 int pc) 1117{ 1118 const struct tgsi_full_src_register *reg = &inst->Src[0]; 1119 LLVMValueRef terms[NUM_CHANNELS]; 1120 LLVMValueRef mask; 1121 unsigned chan_index; 1122 1123 memset(&terms, 0, sizeof terms); 1124 1125 FOR_EACH_CHANNEL( chan_index ) { 1126 unsigned swizzle; 1127 1128 /* Unswizzle channel */ 1129 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1130 1131 /* Check if the component has not been already tested. */ 1132 assert(swizzle < NUM_CHANNELS); 1133 if( !terms[swizzle] ) 1134 /* TODO: change the comparison operator instead of setting the sign */ 1135 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 1136 } 1137 1138 mask = NULL; 1139 FOR_EACH_CHANNEL( chan_index ) { 1140 if(terms[chan_index]) { 1141 LLVMValueRef chan_mask; 1142 1143 /* 1144 * If term < 0 then mask = 0 else mask = ~0. 1145 */ 1146 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 1147 1148 if(mask) 1149 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); 1150 else 1151 mask = chan_mask; 1152 } 1153 } 1154 1155 if(mask) { 1156 lp_build_mask_update(bld->mask, mask); 1157 1158 if (!near_end_of_shader(bld, pc)) 1159 lp_build_mask_check(bld->mask); 1160 } 1161} 1162 1163 1164/** 1165 * Predicated fragment kill. 1166 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 1167 * The only predication is the execution mask which will apply if 1168 * we're inside a loop or conditional. 1169 */ 1170static void 1171emit_kilp(struct lp_build_tgsi_soa_context *bld, 1172 const struct tgsi_full_instruction *inst, 1173 int pc) 1174{ 1175 LLVMValueRef mask; 1176 1177 /* For those channels which are "alive", disable fragment shader 1178 * execution. 1179 */ 1180 if (bld->exec_mask.has_mask) { 1181 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); 1182 } 1183 else { 1184 LLVMValueRef zero = LLVMConstNull(bld->base.int_vec_type); 1185 mask = zero; 1186 } 1187 1188 lp_build_mask_update(bld->mask, mask); 1189 1190 if (!near_end_of_shader(bld, pc)) 1191 lp_build_mask_check(bld->mask); 1192} 1193 1194 1195/** 1196 * Emit code which will dump the value of all the temporary registers 1197 * to stdout. 1198 */ 1199static void 1200emit_dump_temps(struct lp_build_tgsi_soa_context *bld) 1201{ 1202 LLVMBuilderRef builder = bld->base.builder; 1203 LLVMValueRef temp_ptr; 1204 LLVMValueRef i0 = lp_build_const_int32(0); 1205 LLVMValueRef i1 = lp_build_const_int32(1); 1206 LLVMValueRef i2 = lp_build_const_int32(2); 1207 LLVMValueRef i3 = lp_build_const_int32(3); 1208 int index; 1209 int n = bld->info->file_max[TGSI_FILE_TEMPORARY]; 1210 1211 for (index = 0; index < n; index++) { 1212 LLVMValueRef idx = lp_build_const_int32(index); 1213 LLVMValueRef v[4][4], res; 1214 int chan; 1215 1216 lp_build_printf(builder, "TEMP[%d]:\n", idx); 1217 1218 for (chan = 0; chan < 4; chan++) { 1219 temp_ptr = get_temp_ptr(bld, index, chan); 1220 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 1221 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); 1222 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); 1223 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); 1224 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); 1225 } 1226 1227 lp_build_printf(builder, " X: %f %f %f %f\n", 1228 v[0][0], v[0][1], v[0][2], v[0][3]); 1229 lp_build_printf(builder, " Y: %f %f %f %f\n", 1230 v[1][0], v[1][1], v[1][2], v[1][3]); 1231 lp_build_printf(builder, " Z: %f %f %f %f\n", 1232 v[2][0], v[2][1], v[2][2], v[2][3]); 1233 lp_build_printf(builder, " W: %f %f %f %f\n", 1234 v[3][0], v[3][1], v[3][2], v[3][3]); 1235 } 1236} 1237 1238 1239 1240static void 1241emit_declaration( 1242 struct lp_build_tgsi_soa_context *bld, 1243 const struct tgsi_full_declaration *decl) 1244{ 1245 LLVMTypeRef vec_type = bld->base.vec_type; 1246 const unsigned first = decl->Range.First; 1247 const unsigned last = decl->Range.Last; 1248 unsigned idx, i; 1249 1250 for (idx = first; idx <= last; ++idx) { 1251 assert(last <= bld->info->file_max[decl->Declaration.File]); 1252 switch (decl->Declaration.File) { 1253 case TGSI_FILE_TEMPORARY: 1254 assert(idx < LP_MAX_TGSI_TEMPS); 1255 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 1256 for (i = 0; i < NUM_CHANNELS; i++) 1257 bld->temps[idx][i] = lp_build_alloca(bld->base.builder, 1258 vec_type, "temp"); 1259 } 1260 break; 1261 1262 case TGSI_FILE_OUTPUT: 1263 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 1264 for (i = 0; i < NUM_CHANNELS; i++) 1265 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, 1266 vec_type, "output"); 1267 } 1268 break; 1269 1270 case TGSI_FILE_ADDRESS: 1271 assert(idx < LP_MAX_TGSI_ADDRS); 1272 for (i = 0; i < NUM_CHANNELS; i++) 1273 bld->addr[idx][i] = lp_build_alloca(bld->base.builder, 1274 vec_type, "addr"); 1275 break; 1276 1277 case TGSI_FILE_PREDICATE: 1278 assert(idx < LP_MAX_TGSI_PREDS); 1279 for (i = 0; i < NUM_CHANNELS; i++) 1280 bld->preds[idx][i] = lp_build_alloca(bld->base.builder, 1281 vec_type, "predicate"); 1282 break; 1283 1284 default: 1285 /* don't need to declare other vars */ 1286 break; 1287 } 1288 } 1289} 1290 1291 1292/** 1293 * Emit LLVM for one TGSI instruction. 1294 * \param return TRUE for success, FALSE otherwise 1295 */ 1296static boolean 1297emit_instruction( 1298 struct lp_build_tgsi_soa_context *bld, 1299 const struct tgsi_full_instruction *inst, 1300 const struct tgsi_opcode_info *info, 1301 int *pc) 1302{ 1303 unsigned chan_index; 1304 LLVMValueRef src0, src1, src2; 1305 LLVMValueRef tmp0, tmp1, tmp2; 1306 LLVMValueRef tmp3 = NULL; 1307 LLVMValueRef tmp4 = NULL; 1308 LLVMValueRef tmp5 = NULL; 1309 LLVMValueRef tmp6 = NULL; 1310 LLVMValueRef tmp7 = NULL; 1311 LLVMValueRef res; 1312 LLVMValueRef dst0[NUM_CHANNELS]; 1313 1314 /* 1315 * Stores and write masks are handled in a general fashion after the long 1316 * instruction opcode switch statement. 1317 * 1318 * Although not stricitly necessary, we avoid generating instructions for 1319 * channels which won't be stored, in cases where's that easy. For some 1320 * complex instructions, like texture sampling, it is more convenient to 1321 * assume a full writemask and then let LLVM optimization passes eliminate 1322 * redundant code. 1323 */ 1324 1325 (*pc)++; 1326 1327 assert(info->num_dst <= 1); 1328 if (info->num_dst) { 1329 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1330 dst0[chan_index] = bld->base.undef; 1331 } 1332 } 1333 1334 switch (inst->Instruction.Opcode) { 1335 case TGSI_OPCODE_ARL: 1336 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1337 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1338 tmp0 = lp_build_floor(&bld->base, tmp0); 1339 dst0[chan_index] = tmp0; 1340 } 1341 break; 1342 1343 case TGSI_OPCODE_MOV: 1344 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1345 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 1346 } 1347 break; 1348 1349 case TGSI_OPCODE_LIT: 1350 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 1351 dst0[CHAN_X] = bld->base.one; 1352 } 1353 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1354 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1355 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 1356 } 1357 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1358 /* XMM[1] = SrcReg[0].yyyy */ 1359 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1360 /* XMM[1] = max(XMM[1], 0) */ 1361 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 1362 /* XMM[2] = SrcReg[0].wwww */ 1363 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 1364 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 1365 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1366 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 1367 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 1368 } 1369 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 1370 dst0[CHAN_W] = bld->base.one; 1371 } 1372 break; 1373 1374 case TGSI_OPCODE_RCP: 1375 /* TGSI_OPCODE_RECIP */ 1376 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1377 res = lp_build_rcp(&bld->base, src0); 1378 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1379 dst0[chan_index] = res; 1380 } 1381 break; 1382 1383 case TGSI_OPCODE_RSQ: 1384 /* TGSI_OPCODE_RECIPSQRT */ 1385 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1386 src0 = lp_build_abs(&bld->base, src0); 1387 res = lp_build_rsqrt(&bld->base, src0); 1388 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1389 dst0[chan_index] = res; 1390 } 1391 break; 1392 1393 case TGSI_OPCODE_EXP: 1394 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1395 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1396 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1397 LLVMValueRef *p_exp2_int_part = NULL; 1398 LLVMValueRef *p_frac_part = NULL; 1399 LLVMValueRef *p_exp2 = NULL; 1400 1401 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1402 1403 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1404 p_exp2_int_part = &tmp0; 1405 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1406 p_frac_part = &tmp1; 1407 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1408 p_exp2 = &tmp2; 1409 1410 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 1411 1412 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1413 dst0[CHAN_X] = tmp0; 1414 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1415 dst0[CHAN_Y] = tmp1; 1416 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1417 dst0[CHAN_Z] = tmp2; 1418 } 1419 /* dst.w = 1.0 */ 1420 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1421 dst0[CHAN_W] = bld->base.one; 1422 } 1423 break; 1424 1425 case TGSI_OPCODE_LOG: 1426 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1427 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1428 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1429 LLVMValueRef *p_floor_log2 = NULL; 1430 LLVMValueRef *p_exp = NULL; 1431 LLVMValueRef *p_log2 = NULL; 1432 1433 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1434 src0 = lp_build_abs( &bld->base, src0 ); 1435 1436 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1437 p_floor_log2 = &tmp0; 1438 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1439 p_exp = &tmp1; 1440 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1441 p_log2 = &tmp2; 1442 1443 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 1444 1445 /* dst.x = floor(lg2(abs(src.x))) */ 1446 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1447 dst0[CHAN_X] = tmp0; 1448 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 1449 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 1450 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 1451 } 1452 /* dst.z = lg2(abs(src.x)) */ 1453 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1454 dst0[CHAN_Z] = tmp2; 1455 } 1456 /* dst.w = 1.0 */ 1457 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1458 dst0[CHAN_W] = bld->base.one; 1459 } 1460 break; 1461 1462 case TGSI_OPCODE_MUL: 1463 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1464 src0 = emit_fetch( bld, inst, 0, chan_index ); 1465 src1 = emit_fetch( bld, inst, 1, chan_index ); 1466 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 1467 } 1468 break; 1469 1470 case TGSI_OPCODE_ADD: 1471 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1472 src0 = emit_fetch( bld, inst, 0, chan_index ); 1473 src1 = emit_fetch( bld, inst, 1, chan_index ); 1474 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 1475 } 1476 break; 1477 1478 case TGSI_OPCODE_DP3: 1479 /* TGSI_OPCODE_DOT3 */ 1480 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1481 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1482 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1483 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1484 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1485 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1486 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1487 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1488 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1489 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1490 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1491 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1492 dst0[chan_index] = tmp0; 1493 } 1494 break; 1495 1496 case TGSI_OPCODE_DP4: 1497 /* TGSI_OPCODE_DOT4 */ 1498 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1499 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1500 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1501 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1502 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1503 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1504 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1505 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1506 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1507 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1508 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1509 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1510 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1511 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1512 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1513 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1514 dst0[chan_index] = tmp0; 1515 } 1516 break; 1517 1518 case TGSI_OPCODE_DST: 1519 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1520 dst0[CHAN_X] = bld->base.one; 1521 } 1522 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1523 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1524 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1525 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1526 } 1527 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1528 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1529 } 1530 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1531 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1532 } 1533 break; 1534 1535 case TGSI_OPCODE_MIN: 1536 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1537 src0 = emit_fetch( bld, inst, 0, chan_index ); 1538 src1 = emit_fetch( bld, inst, 1, chan_index ); 1539 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1540 } 1541 break; 1542 1543 case TGSI_OPCODE_MAX: 1544 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1545 src0 = emit_fetch( bld, inst, 0, chan_index ); 1546 src1 = emit_fetch( bld, inst, 1, chan_index ); 1547 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1548 } 1549 break; 1550 1551 case TGSI_OPCODE_SLT: 1552 /* TGSI_OPCODE_SETLT */ 1553 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1554 src0 = emit_fetch( bld, inst, 0, chan_index ); 1555 src1 = emit_fetch( bld, inst, 1, chan_index ); 1556 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1557 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1558 } 1559 break; 1560 1561 case TGSI_OPCODE_SGE: 1562 /* TGSI_OPCODE_SETGE */ 1563 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1564 src0 = emit_fetch( bld, inst, 0, chan_index ); 1565 src1 = emit_fetch( bld, inst, 1, chan_index ); 1566 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1567 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1568 } 1569 break; 1570 1571 case TGSI_OPCODE_MAD: 1572 /* TGSI_OPCODE_MADD */ 1573 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1574 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1575 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1576 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1577 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1578 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1579 dst0[chan_index] = tmp0; 1580 } 1581 break; 1582 1583 case TGSI_OPCODE_SUB: 1584 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1585 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1586 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1587 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1588 } 1589 break; 1590 1591 case TGSI_OPCODE_LRP: 1592 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1593 src0 = emit_fetch( bld, inst, 0, chan_index ); 1594 src1 = emit_fetch( bld, inst, 1, chan_index ); 1595 src2 = emit_fetch( bld, inst, 2, chan_index ); 1596 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1597 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1598 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1599 } 1600 break; 1601 1602 case TGSI_OPCODE_CND: 1603 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1604 src0 = emit_fetch( bld, inst, 0, chan_index ); 1605 src1 = emit_fetch( bld, inst, 1, chan_index ); 1606 src2 = emit_fetch( bld, inst, 2, chan_index ); 1607 tmp1 = lp_build_const_vec(bld->base.type, 0.5); 1608 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1609 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1610 } 1611 break; 1612 1613 case TGSI_OPCODE_DP2A: 1614 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1615 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1616 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1617 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1618 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1619 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1620 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1621 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1622 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1623 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1624 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1625 } 1626 break; 1627 1628 case TGSI_OPCODE_FRC: 1629 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1630 src0 = emit_fetch( bld, inst, 0, chan_index ); 1631 tmp0 = lp_build_floor(&bld->base, src0); 1632 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1633 dst0[chan_index] = tmp0; 1634 } 1635 break; 1636 1637 case TGSI_OPCODE_CLAMP: 1638 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1639 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1640 src1 = emit_fetch( bld, inst, 1, chan_index ); 1641 src2 = emit_fetch( bld, inst, 2, chan_index ); 1642 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1643 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1644 dst0[chan_index] = tmp0; 1645 } 1646 break; 1647 1648 case TGSI_OPCODE_FLR: 1649 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1650 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1651 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1652 } 1653 break; 1654 1655 case TGSI_OPCODE_ROUND: 1656 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1657 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1658 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1659 } 1660 break; 1661 1662 case TGSI_OPCODE_EX2: { 1663 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1664 tmp0 = lp_build_exp2( &bld->base, tmp0); 1665 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1666 dst0[chan_index] = tmp0; 1667 } 1668 break; 1669 } 1670 1671 case TGSI_OPCODE_LG2: 1672 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1673 tmp0 = lp_build_log2( &bld->base, tmp0); 1674 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1675 dst0[chan_index] = tmp0; 1676 } 1677 break; 1678 1679 case TGSI_OPCODE_POW: 1680 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1681 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1682 res = lp_build_pow( &bld->base, src0, src1 ); 1683 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1684 dst0[chan_index] = res; 1685 } 1686 break; 1687 1688 case TGSI_OPCODE_XPD: 1689 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1690 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1691 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1692 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1693 } 1694 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1695 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1696 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1697 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1698 } 1699 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1700 tmp2 = tmp0; 1701 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1702 tmp5 = tmp3; 1703 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1704 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1705 dst0[CHAN_X] = tmp2; 1706 } 1707 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1708 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1709 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1710 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1711 } 1712 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1713 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1714 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1715 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1716 dst0[CHAN_Y] = tmp3; 1717 } 1718 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1719 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1720 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1721 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1722 dst0[CHAN_Z] = tmp5; 1723 } 1724 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1725 dst0[CHAN_W] = bld->base.one; 1726 } 1727 break; 1728 1729 case TGSI_OPCODE_ABS: 1730 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1731 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1732 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1733 } 1734 break; 1735 1736 case TGSI_OPCODE_RCC: 1737 /* deprecated? */ 1738 assert(0); 1739 return FALSE; 1740 1741 case TGSI_OPCODE_DPH: 1742 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1743 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1744 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1745 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1746 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1747 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1748 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1749 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1750 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1751 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1752 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1753 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1754 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1755 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1756 dst0[chan_index] = tmp0; 1757 } 1758 break; 1759 1760 case TGSI_OPCODE_COS: 1761 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1762 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1763 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1764 dst0[chan_index] = tmp0; 1765 } 1766 break; 1767 1768 case TGSI_OPCODE_DDX: 1769 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1770 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1771 } 1772 break; 1773 1774 case TGSI_OPCODE_DDY: 1775 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1776 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1777 } 1778 break; 1779 1780 case TGSI_OPCODE_KILP: 1781 /* predicated kill */ 1782 emit_kilp( bld, inst, (*pc)-1 ); 1783 break; 1784 1785 case TGSI_OPCODE_KIL: 1786 /* conditional kill */ 1787 emit_kil( bld, inst, (*pc)-1 ); 1788 break; 1789 1790 case TGSI_OPCODE_PK2H: 1791 return FALSE; 1792 break; 1793 1794 case TGSI_OPCODE_PK2US: 1795 return FALSE; 1796 break; 1797 1798 case TGSI_OPCODE_PK4B: 1799 return FALSE; 1800 break; 1801 1802 case TGSI_OPCODE_PK4UB: 1803 return FALSE; 1804 break; 1805 1806 case TGSI_OPCODE_RFL: 1807 return FALSE; 1808 break; 1809 1810 case TGSI_OPCODE_SEQ: 1811 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1812 src0 = emit_fetch( bld, inst, 0, chan_index ); 1813 src1 = emit_fetch( bld, inst, 1, chan_index ); 1814 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1815 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1816 } 1817 break; 1818 1819 case TGSI_OPCODE_SFL: 1820 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1821 dst0[chan_index] = bld->base.zero; 1822 } 1823 break; 1824 1825 case TGSI_OPCODE_SGT: 1826 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1827 src0 = emit_fetch( bld, inst, 0, chan_index ); 1828 src1 = emit_fetch( bld, inst, 1, chan_index ); 1829 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1830 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1831 } 1832 break; 1833 1834 case TGSI_OPCODE_SIN: 1835 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1836 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1837 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1838 dst0[chan_index] = tmp0; 1839 } 1840 break; 1841 1842 case TGSI_OPCODE_SLE: 1843 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1844 src0 = emit_fetch( bld, inst, 0, chan_index ); 1845 src1 = emit_fetch( bld, inst, 1, chan_index ); 1846 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1847 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1848 } 1849 break; 1850 1851 case TGSI_OPCODE_SNE: 1852 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1853 src0 = emit_fetch( bld, inst, 0, chan_index ); 1854 src1 = emit_fetch( bld, inst, 1, chan_index ); 1855 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1856 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1857 } 1858 break; 1859 1860 case TGSI_OPCODE_STR: 1861 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1862 dst0[chan_index] = bld->base.one; 1863 } 1864 break; 1865 1866 case TGSI_OPCODE_TEX: 1867 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); 1868 break; 1869 1870 case TGSI_OPCODE_TXD: 1871 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1872 break; 1873 1874 case TGSI_OPCODE_UP2H: 1875 /* deprecated */ 1876 assert (0); 1877 return FALSE; 1878 break; 1879 1880 case TGSI_OPCODE_UP2US: 1881 /* deprecated */ 1882 assert(0); 1883 return FALSE; 1884 break; 1885 1886 case TGSI_OPCODE_UP4B: 1887 /* deprecated */ 1888 assert(0); 1889 return FALSE; 1890 break; 1891 1892 case TGSI_OPCODE_UP4UB: 1893 /* deprecated */ 1894 assert(0); 1895 return FALSE; 1896 break; 1897 1898 case TGSI_OPCODE_X2D: 1899 /* deprecated? */ 1900 assert(0); 1901 return FALSE; 1902 break; 1903 1904 case TGSI_OPCODE_ARA: 1905 /* deprecated */ 1906 assert(0); 1907 return FALSE; 1908 break; 1909 1910 case TGSI_OPCODE_ARR: 1911 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1912 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1913 tmp0 = lp_build_round(&bld->base, tmp0); 1914 dst0[chan_index] = tmp0; 1915 } 1916 break; 1917 1918 case TGSI_OPCODE_BRA: 1919 /* deprecated */ 1920 assert(0); 1921 return FALSE; 1922 break; 1923 1924 case TGSI_OPCODE_CAL: 1925 lp_exec_mask_call(&bld->exec_mask, 1926 inst->Label.Label, 1927 pc); 1928 1929 break; 1930 1931 case TGSI_OPCODE_RET: 1932 lp_exec_mask_ret(&bld->exec_mask, pc); 1933 break; 1934 1935 case TGSI_OPCODE_END: 1936 if (0) { 1937 /* for debugging */ 1938 emit_dump_temps(bld); 1939 } 1940 *pc = -1; 1941 break; 1942 1943 case TGSI_OPCODE_SSG: 1944 /* TGSI_OPCODE_SGN */ 1945 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1946 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1947 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 1948 } 1949 break; 1950 1951 case TGSI_OPCODE_CMP: 1952 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1953 src0 = emit_fetch( bld, inst, 0, chan_index ); 1954 src1 = emit_fetch( bld, inst, 1, chan_index ); 1955 src2 = emit_fetch( bld, inst, 2, chan_index ); 1956 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 1957 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 1958 } 1959 break; 1960 1961 case TGSI_OPCODE_SCS: 1962 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1963 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1964 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 1965 } 1966 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1967 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1968 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 1969 } 1970 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1971 dst0[CHAN_Z] = bld->base.zero; 1972 } 1973 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1974 dst0[CHAN_W] = bld->base.one; 1975 } 1976 break; 1977 1978 case TGSI_OPCODE_TXB: 1979 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); 1980 break; 1981 1982 case TGSI_OPCODE_NRM: 1983 /* fall-through */ 1984 case TGSI_OPCODE_NRM4: 1985 /* 3 or 4-component normalization */ 1986 { 1987 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1988 1989 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 1990 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 1991 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 1992 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 1993 1994 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1995 1996 /* xmm4 = src.x */ 1997 /* xmm0 = src.x * src.x */ 1998 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1999 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 2000 tmp4 = tmp0; 2001 } 2002 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 2003 2004 /* xmm5 = src.y */ 2005 /* xmm0 = xmm0 + src.y * src.y */ 2006 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 2007 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 2008 tmp5 = tmp1; 2009 } 2010 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2011 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2012 2013 /* xmm6 = src.z */ 2014 /* xmm0 = xmm0 + src.z * src.z */ 2015 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 2016 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 2017 tmp6 = tmp1; 2018 } 2019 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2020 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2021 2022 if (dims == 4) { 2023 /* xmm7 = src.w */ 2024 /* xmm0 = xmm0 + src.w * src.w */ 2025 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 2026 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 2027 tmp7 = tmp1; 2028 } 2029 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 2030 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 2031 } 2032 2033 /* xmm1 = 1 / sqrt(xmm0) */ 2034 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 2035 2036 /* dst.x = xmm1 * src.x */ 2037 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 2038 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 2039 } 2040 2041 /* dst.y = xmm1 * src.y */ 2042 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 2043 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 2044 } 2045 2046 /* dst.z = xmm1 * src.z */ 2047 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 2048 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 2049 } 2050 2051 /* dst.w = xmm1 * src.w */ 2052 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 2053 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 2054 } 2055 } 2056 2057 /* dst.w = 1.0 */ 2058 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 2059 dst0[CHAN_W] = bld->base.one; 2060 } 2061 } 2062 break; 2063 2064 case TGSI_OPCODE_DIV: 2065 /* deprecated */ 2066 assert( 0 ); 2067 return FALSE; 2068 break; 2069 2070 case TGSI_OPCODE_DP2: 2071 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 2072 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 2073 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 2074 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 2075 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 2076 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 2077 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 2078 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2079 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 2080 } 2081 break; 2082 2083 case TGSI_OPCODE_TXL: 2084 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 2085 break; 2086 2087 case TGSI_OPCODE_TXP: 2088 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); 2089 break; 2090 2091 case TGSI_OPCODE_BRK: 2092 lp_exec_break(&bld->exec_mask); 2093 break; 2094 2095 case TGSI_OPCODE_IF: 2096 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 2097 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 2098 tmp0, bld->base.zero); 2099 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 2100 break; 2101 2102 case TGSI_OPCODE_BGNLOOP: 2103 lp_exec_bgnloop(&bld->exec_mask); 2104 break; 2105 2106 case TGSI_OPCODE_BGNSUB: 2107 lp_exec_mask_bgnsub(&bld->exec_mask); 2108 break; 2109 2110 case TGSI_OPCODE_ELSE: 2111 lp_exec_mask_cond_invert(&bld->exec_mask); 2112 break; 2113 2114 case TGSI_OPCODE_ENDIF: 2115 lp_exec_mask_cond_pop(&bld->exec_mask); 2116 break; 2117 2118 case TGSI_OPCODE_ENDLOOP: 2119 lp_exec_endloop(&bld->exec_mask); 2120 break; 2121 2122 case TGSI_OPCODE_ENDSUB: 2123 lp_exec_mask_endsub(&bld->exec_mask, pc); 2124 break; 2125 2126 case TGSI_OPCODE_PUSHA: 2127 /* deprecated? */ 2128 assert(0); 2129 return FALSE; 2130 break; 2131 2132 case TGSI_OPCODE_POPA: 2133 /* deprecated? */ 2134 assert(0); 2135 return FALSE; 2136 break; 2137 2138 case TGSI_OPCODE_CEIL: 2139 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2140 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2141 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 2142 } 2143 break; 2144 2145 case TGSI_OPCODE_I2F: 2146 /* deprecated? */ 2147 assert(0); 2148 return FALSE; 2149 break; 2150 2151 case TGSI_OPCODE_NOT: 2152 /* deprecated? */ 2153 assert(0); 2154 return FALSE; 2155 break; 2156 2157 case TGSI_OPCODE_TRUNC: 2158 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2159 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 2160 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 2161 } 2162 break; 2163 2164 case TGSI_OPCODE_SHL: 2165 /* deprecated? */ 2166 assert(0); 2167 return FALSE; 2168 break; 2169 2170 case TGSI_OPCODE_ISHR: 2171 /* deprecated? */ 2172 assert(0); 2173 return FALSE; 2174 break; 2175 2176 case TGSI_OPCODE_AND: 2177 /* deprecated? */ 2178 assert(0); 2179 return FALSE; 2180 break; 2181 2182 case TGSI_OPCODE_OR: 2183 /* deprecated? */ 2184 assert(0); 2185 return FALSE; 2186 break; 2187 2188 case TGSI_OPCODE_MOD: 2189 /* deprecated? */ 2190 assert(0); 2191 return FALSE; 2192 break; 2193 2194 case TGSI_OPCODE_XOR: 2195 /* deprecated? */ 2196 assert(0); 2197 return FALSE; 2198 break; 2199 2200 case TGSI_OPCODE_SAD: 2201 /* deprecated? */ 2202 assert(0); 2203 return FALSE; 2204 break; 2205 2206 case TGSI_OPCODE_TXF: 2207 /* deprecated? */ 2208 assert(0); 2209 return FALSE; 2210 break; 2211 2212 case TGSI_OPCODE_TXQ: 2213 /* deprecated? */ 2214 assert(0); 2215 return FALSE; 2216 break; 2217 2218 case TGSI_OPCODE_CONT: 2219 lp_exec_continue(&bld->exec_mask); 2220 break; 2221 2222 case TGSI_OPCODE_EMIT: 2223 return FALSE; 2224 break; 2225 2226 case TGSI_OPCODE_ENDPRIM: 2227 return FALSE; 2228 break; 2229 2230 case TGSI_OPCODE_NOP: 2231 break; 2232 2233 default: 2234 return FALSE; 2235 } 2236 2237 if(info->num_dst) { 2238 LLVMValueRef pred[NUM_CHANNELS]; 2239 2240 emit_fetch_predicate( bld, inst, pred ); 2241 2242 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2243 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); 2244 } 2245 } 2246 2247 return TRUE; 2248} 2249 2250 2251void 2252lp_build_tgsi_soa(LLVMBuilderRef builder, 2253 const struct tgsi_token *tokens, 2254 struct lp_type type, 2255 struct lp_build_mask_context *mask, 2256 LLVMValueRef consts_ptr, 2257 const LLVMValueRef *pos, 2258 const LLVMValueRef (*inputs)[NUM_CHANNELS], 2259 LLVMValueRef (*outputs)[NUM_CHANNELS], 2260 struct lp_build_sampler_soa *sampler, 2261 const struct tgsi_shader_info *info) 2262{ 2263 struct lp_build_tgsi_soa_context bld; 2264 struct tgsi_parse_context parse; 2265 uint num_immediates = 0; 2266 uint num_instructions = 0; 2267 unsigned i; 2268 int pc = 0; 2269 2270 struct lp_type res_type; 2271 2272 assert(type.length <= LP_MAX_VECTOR_LENGTH); 2273 memset(&res_type, 0, sizeof res_type); 2274 res_type.width = type.width; 2275 res_type.length = type.length; 2276 res_type.sign = 1; 2277 2278 /* Setup build context */ 2279 memset(&bld, 0, sizeof bld); 2280 lp_build_context_init(&bld.base, builder, type); 2281 lp_build_context_init(&bld.uint_bld, builder, lp_uint_type(type)); 2282 lp_build_context_init(&bld.elem_bld, builder, lp_elem_type(type)); 2283 bld.mask = mask; 2284 bld.pos = pos; 2285 bld.inputs = inputs; 2286 bld.outputs = outputs; 2287 bld.consts_ptr = consts_ptr; 2288 bld.sampler = sampler; 2289 bld.info = info; 2290 bld.indirect_files = info->indirect_files; 2291 bld.instructions = (struct tgsi_full_instruction *) 2292 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); 2293 bld.max_instructions = LP_MAX_INSTRUCTIONS; 2294 2295 if (!bld.instructions) { 2296 return; 2297 } 2298 2299 lp_exec_mask_init(&bld.exec_mask, &bld.base); 2300 2301 if (bld.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 2302 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), 2303 info->file_max[TGSI_FILE_TEMPORARY]*4 + 4, 0); 2304 bld.temps_array = lp_build_array_alloca(bld.base.builder, 2305 bld.base.vec_type, array_size, 2306 "temp_array"); 2307 } 2308 2309 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2310 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), 2311 info->file_max[TGSI_FILE_OUTPUT]*4 + 4, 0); 2312 bld.outputs_array = lp_build_array_alloca(bld.base.builder, 2313 bld.base.vec_type, array_size, 2314 "output_array"); 2315 } 2316 2317 tgsi_parse_init( &parse, tokens ); 2318 2319 while( !tgsi_parse_end_of_tokens( &parse ) ) { 2320 tgsi_parse_token( &parse ); 2321 2322 switch( parse.FullToken.Token.Type ) { 2323 case TGSI_TOKEN_TYPE_DECLARATION: 2324 /* Inputs already interpolated */ 2325 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 2326 break; 2327 2328 case TGSI_TOKEN_TYPE_INSTRUCTION: 2329 { 2330 /* save expanded instruction */ 2331 if (num_instructions == bld.max_instructions) { 2332 struct tgsi_full_instruction *instructions; 2333 instructions = REALLOC(bld.instructions, 2334 bld.max_instructions 2335 * sizeof(struct tgsi_full_instruction), 2336 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 2337 * sizeof(struct tgsi_full_instruction)); 2338 if (!instructions) { 2339 break; 2340 } 2341 bld.instructions = instructions; 2342 bld.max_instructions += LP_MAX_INSTRUCTIONS; 2343 } 2344 2345 memcpy(bld.instructions + num_instructions, 2346 &parse.FullToken.FullInstruction, 2347 sizeof(bld.instructions[0])); 2348 2349 num_instructions++; 2350 } 2351 2352 break; 2353 2354 case TGSI_TOKEN_TYPE_IMMEDIATE: 2355 /* simply copy the immediate values into the next immediates[] slot */ 2356 { 2357 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 2358 assert(size <= 4); 2359 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 2360 for( i = 0; i < size; ++i ) 2361 bld.immediates[num_immediates][i] = 2362 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); 2363 for( i = size; i < 4; ++i ) 2364 bld.immediates[num_immediates][i] = bld.base.undef; 2365 num_immediates++; 2366 } 2367 break; 2368 2369 case TGSI_TOKEN_TYPE_PROPERTY: 2370 break; 2371 2372 default: 2373 assert( 0 ); 2374 } 2375 } 2376 2377 while (pc != -1) { 2378 struct tgsi_full_instruction *instr = bld.instructions + pc; 2379 const struct tgsi_opcode_info *opcode_info = 2380 tgsi_get_opcode_info(instr->Instruction.Opcode); 2381 if (!emit_instruction( &bld, instr, opcode_info, &pc )) 2382 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 2383 opcode_info->mnemonic); 2384 } 2385 2386 /* If we have indirect addressing in outputs we need to copy our alloca array 2387 * to the outputs slots specified by the called */ 2388 if (bld.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2389 unsigned index, chan; 2390 assert(info->num_outputs <= info->file_max[TGSI_FILE_OUTPUT] + 1); 2391 for (index = 0; index < info->num_outputs; ++index) { 2392 for (chan = 0; chan < NUM_CHANNELS; ++chan) { 2393 bld.outputs[index][chan] = get_output_ptr(&bld, index, chan); 2394 } 2395 } 2396 } 2397 2398 if (0) { 2399 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); 2400 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2401 debug_printf("11111111111111111111111111111 \n"); 2402 tgsi_dump(tokens, 0); 2403 lp_debug_dump_value(function); 2404 debug_printf("2222222222222222222222222222 \n"); 2405 } 2406 tgsi_parse_free( &parse ); 2407 2408 if (0) { 2409 LLVMModuleRef module = LLVMGetGlobalParent( 2410 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); 2411 LLVMDumpModule(module); 2412 2413 } 2414 2415 FREE( bld.instructions ); 2416} 2417 2418