lp_bld_tgsi_soa.c revision 3d5b9c1f2d3340259dd0d8765090a5a963074f29
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_scan.h" 49#include "lp_bld_type.h" 50#include "lp_bld_const.h" 51#include "lp_bld_arit.h" 52#include "lp_bld_gather.h" 53#include "lp_bld_logic.h" 54#include "lp_bld_swizzle.h" 55#include "lp_bld_flow.h" 56#include "lp_bld_quad.h" 57#include "lp_bld_tgsi.h" 58#include "lp_bld_limits.h" 59#include "lp_bld_debug.h" 60 61 62#define FOR_EACH_CHANNEL( CHAN )\ 63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 64 65#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 67 68#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 70 71#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 72 FOR_EACH_CHANNEL( CHAN )\ 73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 74 75#define CHAN_X 0 76#define CHAN_Y 1 77#define CHAN_Z 2 78#define CHAN_W 3 79#define NUM_CHANNELS 4 80 81#define LP_MAX_INSTRUCTIONS 256 82 83 84struct lp_exec_mask { 85 struct lp_build_context *bld; 86 87 boolean has_mask; 88 89 LLVMTypeRef int_vec_type; 90 91 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 92 int cond_stack_size; 93 LLVMValueRef cond_mask; 94 95 LLVMBasicBlockRef loop_block; 96 LLVMValueRef cont_mask; 97 LLVMValueRef break_mask; 98 LLVMValueRef break_var; 99 struct { 100 LLVMBasicBlockRef loop_block; 101 LLVMValueRef cont_mask; 102 LLVMValueRef break_mask; 103 LLVMValueRef break_var; 104 } loop_stack[LP_MAX_TGSI_NESTING]; 105 int loop_stack_size; 106 107 LLVMValueRef ret_mask; 108 struct { 109 int pc; 110 LLVMValueRef ret_mask; 111 } call_stack[LP_MAX_TGSI_NESTING]; 112 int call_stack_size; 113 114 LLVMValueRef exec_mask; 115}; 116 117struct lp_build_tgsi_soa_context 118{ 119 struct lp_build_context base; 120 121 /* Builder for integer masks and indices */ 122 struct lp_build_context int_bld; 123 124 LLVMValueRef consts_ptr; 125 const LLVMValueRef *pos; 126 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 127 LLVMValueRef (*outputs)[NUM_CHANNELS]; 128 129 const struct lp_build_sampler_soa *sampler; 130 131 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 132 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 133 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 134 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; 135 136 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 137 * set in the indirect_files field. 138 * The temps[] array above is unused then. 139 */ 140 LLVMValueRef temps_array; 141 142 /** bitmask indicating which register files are accessed indirectly */ 143 unsigned indirect_files; 144 145 struct lp_build_mask_context *mask; 146 struct lp_exec_mask exec_mask; 147 148 struct tgsi_full_instruction *instructions; 149 uint max_instructions; 150}; 151 152static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 153{ 154 mask->bld = bld; 155 mask->has_mask = FALSE; 156 mask->cond_stack_size = 0; 157 mask->loop_stack_size = 0; 158 mask->call_stack_size = 0; 159 160 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); 161 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 162 LLVMConstAllOnes(mask->int_vec_type); 163} 164 165static void lp_exec_mask_update(struct lp_exec_mask *mask) 166{ 167 if (mask->loop_stack_size) { 168 /*for loops we need to update the entire mask at runtime */ 169 LLVMValueRef tmp; 170 assert(mask->break_mask); 171 tmp = LLVMBuildAnd(mask->bld->builder, 172 mask->cont_mask, 173 mask->break_mask, 174 "maskcb"); 175 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 176 mask->cond_mask, 177 tmp, 178 "maskfull"); 179 } else 180 mask->exec_mask = mask->cond_mask; 181 182 if (mask->call_stack_size) { 183 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 184 mask->exec_mask, 185 mask->ret_mask, 186 "callmask"); 187 } 188 189 mask->has_mask = (mask->cond_stack_size > 0 || 190 mask->loop_stack_size > 0 || 191 mask->call_stack_size > 0); 192} 193 194static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 195 LLVMValueRef val) 196{ 197 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 198 if (mask->cond_stack_size == 0) { 199 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 200 } 201 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 202 assert(LLVMTypeOf(val) == mask->int_vec_type); 203 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 204 mask->cond_mask, 205 val, 206 ""); 207 lp_exec_mask_update(mask); 208} 209 210static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 211{ 212 LLVMValueRef prev_mask; 213 LLVMValueRef inv_mask; 214 215 assert(mask->cond_stack_size); 216 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 217 if (mask->cond_stack_size == 1) { 218 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 219 } 220 221 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, ""); 222 223 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 224 inv_mask, 225 prev_mask, ""); 226 lp_exec_mask_update(mask); 227} 228 229static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 230{ 231 assert(mask->cond_stack_size); 232 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 233 lp_exec_mask_update(mask); 234} 235 236static void lp_exec_bgnloop(struct lp_exec_mask *mask) 237{ 238 if (mask->loop_stack_size == 0) { 239 assert(mask->loop_block == NULL); 240 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 241 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 242 assert(mask->break_var == NULL); 243 } 244 245 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 246 247 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 248 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 249 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 250 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 251 ++mask->loop_stack_size; 252 253 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, ""); 254 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 255 256 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); 257 LLVMBuildBr(mask->bld->builder, mask->loop_block); 258 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); 259 260 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, ""); 261 262 lp_exec_mask_update(mask); 263} 264 265static void lp_exec_break(struct lp_exec_mask *mask) 266{ 267 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 268 mask->exec_mask, 269 "break"); 270 271 mask->break_mask = LLVMBuildAnd(mask->bld->builder, 272 mask->break_mask, 273 exec_mask, "break_full"); 274 275 lp_exec_mask_update(mask); 276} 277 278static void lp_exec_continue(struct lp_exec_mask *mask) 279{ 280 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 281 mask->exec_mask, 282 ""); 283 284 mask->cont_mask = LLVMBuildAnd(mask->bld->builder, 285 mask->cont_mask, 286 exec_mask, ""); 287 288 lp_exec_mask_update(mask); 289} 290 291 292static void lp_exec_endloop(struct lp_exec_mask *mask) 293{ 294 LLVMBasicBlockRef endloop; 295 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* 296 mask->bld->type.length); 297 LLVMValueRef i1cond; 298 299 assert(mask->break_mask); 300 301 /* 302 * Restore the cont_mask, but don't pop 303 */ 304 assert(mask->loop_stack_size); 305 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 306 lp_exec_mask_update(mask); 307 308 /* 309 * Unlike the continue mask, the break_mask must be preserved across loop 310 * iterations 311 */ 312 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 313 314 /* i1cond = (mask == 0) */ 315 i1cond = LLVMBuildICmp( 316 mask->bld->builder, 317 LLVMIntNE, 318 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""), 319 LLVMConstNull(reg_type), ""); 320 321 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); 322 323 LLVMBuildCondBr(mask->bld->builder, 324 i1cond, mask->loop_block, endloop); 325 326 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); 327 328 assert(mask->loop_stack_size); 329 --mask->loop_stack_size; 330 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 331 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 332 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 333 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 334 335 lp_exec_mask_update(mask); 336} 337 338/* stores val into an address pointed to by dst. 339 * mask->exec_mask is used to figure out which bits of val 340 * should be stored into the address 341 * (0 means don't store this bit, 1 means do store). 342 */ 343static void lp_exec_mask_store(struct lp_exec_mask *mask, 344 LLVMValueRef pred, 345 LLVMValueRef val, 346 LLVMValueRef dst) 347{ 348 /* Mix the predicate and execution mask */ 349 if (mask->has_mask) { 350 if (pred) { 351 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); 352 } else { 353 pred = mask->exec_mask; 354 } 355 } 356 357 if (pred) { 358 LLVMValueRef real_val, dst_val; 359 360 dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); 361 real_val = lp_build_select(mask->bld, 362 pred, 363 val, dst_val); 364 365 LLVMBuildStore(mask->bld->builder, real_val, dst); 366 } else 367 LLVMBuildStore(mask->bld->builder, val, dst); 368} 369 370static void lp_exec_mask_call(struct lp_exec_mask *mask, 371 int func, 372 int *pc) 373{ 374 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 375 mask->call_stack[mask->call_stack_size].pc = *pc; 376 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 377 mask->call_stack_size++; 378 *pc = func; 379} 380 381static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 382{ 383 LLVMValueRef exec_mask; 384 385 if (mask->call_stack_size == 0) { 386 /* returning from main() */ 387 *pc = -1; 388 return; 389 } 390 exec_mask = LLVMBuildNot(mask->bld->builder, 391 mask->exec_mask, 392 "ret"); 393 394 mask->ret_mask = LLVMBuildAnd(mask->bld->builder, 395 mask->ret_mask, 396 exec_mask, "ret_full"); 397 398 lp_exec_mask_update(mask); 399} 400 401static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 402{ 403} 404 405static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 406{ 407 assert(mask->call_stack_size); 408 mask->call_stack_size--; 409 *pc = mask->call_stack[mask->call_stack_size].pc; 410 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 411 lp_exec_mask_update(mask); 412} 413 414 415/** 416 * Return pointer to a temporary register channel (src or dest). 417 * Note that indirect addressing cannot be handled here. 418 * \param index which temporary register 419 * \param chan which channel of the temp register. 420 */ 421static LLVMValueRef 422get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 423 unsigned index, 424 unsigned chan) 425{ 426 assert(chan < 4); 427 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 428 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); 429 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); 430 } 431 else { 432 return bld->temps[index][chan]; 433 } 434} 435 436 437/** 438 * Gather vector. 439 * XXX the lp_build_gather() function should be capable of doing this 440 * with a little work. 441 */ 442static LLVMValueRef 443build_gather(struct lp_build_tgsi_soa_context *bld, 444 LLVMValueRef base_ptr, 445 LLVMValueRef indexes) 446{ 447 LLVMValueRef res = bld->base.undef; 448 unsigned i; 449 450 /* 451 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 452 */ 453 for (i = 0; i < bld->base.type.length; i++) { 454 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); 455 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder, 456 indexes, ii, ""); 457 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr, 458 &index, 1, ""); 459 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 460 461 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, ""); 462 } 463 464 return res; 465} 466 467 468/** 469 * Read the current value of the ADDR register, convert the floats to 470 * ints, multiply by four and return the vector of offsets. 471 * The offsets will be used to index into the constant buffer or 472 * temporary register file. 473 */ 474static LLVMValueRef 475get_indirect_offsets(struct lp_build_tgsi_soa_context *bld, 476 const struct tgsi_src_register *indirect_reg) 477{ 478 /* always use X component of address register */ 479 unsigned swizzle = indirect_reg->SwizzleX; 480 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); 481 LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4); 482 LLVMValueRef addr_vec; 483 484 addr_vec = LLVMBuildLoad(bld->base.builder, 485 bld->addr[indirect_reg->Index][swizzle], 486 "load addr reg"); 487 488 /* for indexing we want integers */ 489 addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec, 490 int_vec_type, ""); 491 492 /* addr_vec = addr_vec * 4 */ 493 addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4); 494 495 return addr_vec; 496} 497 498 499/** 500 * Register fetch. 501 */ 502static LLVMValueRef 503emit_fetch( 504 struct lp_build_tgsi_soa_context *bld, 505 const struct tgsi_full_instruction *inst, 506 unsigned src_op, 507 const unsigned chan_index ) 508{ 509 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 510 const unsigned swizzle = 511 tgsi_util_get_full_src_register_swizzle(reg, chan_index); 512 LLVMValueRef res; 513 LLVMValueRef addr_vec = NULL; 514 515 if (swizzle > 3) { 516 assert(0 && "invalid swizzle in emit_fetch()"); 517 return bld->base.undef; 518 } 519 520 if (reg->Register.Indirect) { 521 assert(bld->indirect_files); 522 addr_vec = get_indirect_offsets(bld, ®->Indirect); 523 } 524 525 switch (reg->Register.File) { 526 case TGSI_FILE_CONSTANT: 527 if (reg->Register.Indirect) { 528 LLVMValueRef index_vec; /* index into the const buffer */ 529 530 assert(bld->indirect_files & (1 << TGSI_FILE_CONSTANT)); 531 532 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */ 533 index_vec = lp_build_const_int_vec(bld->int_bld.type, 534 reg->Register.Index * 4 + swizzle); 535 536 /* index_vec = index_vec + addr_vec */ 537 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec); 538 539 /* Gather values from the constant buffer */ 540 res = build_gather(bld, bld->consts_ptr, index_vec); 541 } 542 else { 543 LLVMValueRef index; /* index into the const buffer */ 544 LLVMValueRef scalar, scalar_ptr; 545 546 index = lp_build_const_int32(reg->Register.Index*4 + swizzle); 547 548 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, 549 &index, 1, ""); 550 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 551 552 res = lp_build_broadcast_scalar(&bld->base, scalar); 553 } 554 break; 555 556 case TGSI_FILE_IMMEDIATE: 557 res = bld->immediates[reg->Register.Index][swizzle]; 558 assert(res); 559 break; 560 561 case TGSI_FILE_INPUT: 562 res = bld->inputs[reg->Register.Index][swizzle]; 563 assert(res); 564 break; 565 566 case TGSI_FILE_TEMPORARY: 567 if (reg->Register.Indirect) { 568 LLVMValueRef vec_len = 569 lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length); 570 LLVMValueRef index_vec; /* index into the const buffer */ 571 LLVMValueRef temps_array; 572 LLVMTypeRef float4_ptr_type; 573 574 assert(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)); 575 576 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */ 577 index_vec = lp_build_const_int_vec(bld->int_bld.type, 578 reg->Register.Index * 4 + swizzle); 579 580 /* index_vec += addr_vec */ 581 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec); 582 583 /* index_vec *= vector_length */ 584 index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len); 585 586 /* cast temps_array pointer to float* */ 587 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); 588 temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array, 589 float4_ptr_type, ""); 590 591 /* Gather values from the temporary register array */ 592 res = build_gather(bld, temps_array, index_vec); 593 } 594 else { 595 LLVMValueRef temp_ptr; 596 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); 597 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 598 if (!res) 599 return bld->base.undef; 600 } 601 break; 602 603 default: 604 assert(0 && "invalid src register in emit_fetch()"); 605 return bld->base.undef; 606 } 607 608 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 609 case TGSI_UTIL_SIGN_CLEAR: 610 res = lp_build_abs( &bld->base, res ); 611 break; 612 613 case TGSI_UTIL_SIGN_SET: 614 res = lp_build_abs( &bld->base, res ); 615 /* fall through */ 616 case TGSI_UTIL_SIGN_TOGGLE: 617 res = lp_build_negate( &bld->base, res ); 618 break; 619 620 case TGSI_UTIL_SIGN_KEEP: 621 break; 622 } 623 624 return res; 625} 626 627 628/** 629 * Register fetch with derivatives. 630 */ 631static void 632emit_fetch_deriv( 633 struct lp_build_tgsi_soa_context *bld, 634 const struct tgsi_full_instruction *inst, 635 unsigned index, 636 const unsigned chan_index, 637 LLVMValueRef *res, 638 LLVMValueRef *ddx, 639 LLVMValueRef *ddy) 640{ 641 LLVMValueRef src; 642 643 src = emit_fetch(bld, inst, index, chan_index); 644 645 if(res) 646 *res = src; 647 648 /* TODO: use interpolation coeffs for inputs */ 649 650 if(ddx) 651 *ddx = lp_build_ddx(&bld->base, src); 652 653 if(ddy) 654 *ddy = lp_build_ddy(&bld->base, src); 655} 656 657 658/** 659 * Predicate. 660 */ 661static void 662emit_fetch_predicate( 663 struct lp_build_tgsi_soa_context *bld, 664 const struct tgsi_full_instruction *inst, 665 LLVMValueRef *pred) 666{ 667 unsigned index; 668 unsigned char swizzles[4]; 669 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 670 LLVMValueRef value; 671 unsigned chan; 672 673 if (!inst->Instruction.Predicate) { 674 FOR_EACH_CHANNEL( chan ) { 675 pred[chan] = NULL; 676 } 677 return; 678 } 679 680 swizzles[0] = inst->Predicate.SwizzleX; 681 swizzles[1] = inst->Predicate.SwizzleY; 682 swizzles[2] = inst->Predicate.SwizzleZ; 683 swizzles[3] = inst->Predicate.SwizzleW; 684 685 index = inst->Predicate.Index; 686 assert(index < LP_MAX_TGSI_PREDS); 687 688 FOR_EACH_CHANNEL( chan ) { 689 unsigned swizzle = swizzles[chan]; 690 691 /* 692 * Only fetch the predicate register channels that are actually listed 693 * in the swizzles 694 */ 695 if (!unswizzled[swizzle]) { 696 value = LLVMBuildLoad(bld->base.builder, 697 bld->preds[index][swizzle], ""); 698 699 /* 700 * Convert the value to an integer mask. 701 * 702 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 703 * is needlessly causing two comparisons due to storing the intermediate 704 * result as float vector instead of an integer mask vector. 705 */ 706 value = lp_build_compare(bld->base.builder, 707 bld->base.type, 708 PIPE_FUNC_NOTEQUAL, 709 value, 710 bld->base.zero); 711 if (inst->Predicate.Negate) { 712 value = LLVMBuildNot(bld->base.builder, value, ""); 713 } 714 715 unswizzled[swizzle] = value; 716 } else { 717 value = unswizzled[swizzle]; 718 } 719 720 pred[chan] = value; 721 } 722} 723 724 725/** 726 * Register store. 727 */ 728static void 729emit_store( 730 struct lp_build_tgsi_soa_context *bld, 731 const struct tgsi_full_instruction *inst, 732 unsigned index, 733 unsigned chan_index, 734 LLVMValueRef pred, 735 LLVMValueRef value) 736{ 737 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 738 LLVMValueRef addr = NULL; 739 740 switch( inst->Instruction.Saturate ) { 741 case TGSI_SAT_NONE: 742 break; 743 744 case TGSI_SAT_ZERO_ONE: 745 value = lp_build_max(&bld->base, value, bld->base.zero); 746 value = lp_build_min(&bld->base, value, bld->base.one); 747 break; 748 749 case TGSI_SAT_MINUS_PLUS_ONE: 750 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); 751 value = lp_build_min(&bld->base, value, bld->base.one); 752 break; 753 754 default: 755 assert(0); 756 } 757 758 if (reg->Register.Indirect) { 759 /* XXX use get_indirect_offsets() here eventually */ 760 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); 761 unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); 762 763 assert(bld->indirect_files); 764 765 addr = LLVMBuildLoad(bld->base.builder, 766 bld->addr[reg->Indirect.Index][swizzle], 767 ""); 768 /* for indexing we want integers */ 769 addr = LLVMBuildFPToSI(bld->base.builder, addr, 770 int_vec_type, ""); 771 addr = LLVMBuildExtractElement(bld->base.builder, 772 addr, LLVMConstInt(LLVMInt32Type(), 0, 0), 773 ""); 774 addr = LLVMBuildMul(bld->base.builder, 775 addr, LLVMConstInt(LLVMInt32Type(), 4, 0), 776 ""); 777 } 778 779 switch( reg->Register.File ) { 780 case TGSI_FILE_OUTPUT: 781 lp_exec_mask_store(&bld->exec_mask, pred, value, 782 bld->outputs[reg->Register.Index][chan_index]); 783 break; 784 785 case TGSI_FILE_TEMPORARY: 786 if (reg->Register.Indirect) { 787 /* XXX not done yet */ 788 debug_printf("WARNING: LLVM scatter store of temp regs" 789 " not implemented\n"); 790 } 791 else { 792 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 793 chan_index); 794 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); 795 } 796 break; 797 798 case TGSI_FILE_ADDRESS: 799 lp_exec_mask_store(&bld->exec_mask, pred, value, 800 bld->addr[reg->Indirect.Index][chan_index]); 801 break; 802 803 case TGSI_FILE_PREDICATE: 804 lp_exec_mask_store(&bld->exec_mask, pred, value, 805 bld->preds[reg->Register.Index][chan_index]); 806 break; 807 808 default: 809 assert( 0 ); 810 } 811} 812 813 814/** 815 * High-level instruction translators. 816 */ 817 818static void 819emit_tex( struct lp_build_tgsi_soa_context *bld, 820 const struct tgsi_full_instruction *inst, 821 enum lp_build_tex_modifier modifier, 822 LLVMValueRef *texel) 823{ 824 unsigned unit; 825 LLVMValueRef lod_bias, explicit_lod; 826 LLVMValueRef oow = NULL; 827 LLVMValueRef coords[3]; 828 LLVMValueRef ddx[3]; 829 LLVMValueRef ddy[3]; 830 unsigned num_coords; 831 unsigned i; 832 833 if (!bld->sampler) { 834 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 835 for (i = 0; i < 4; i++) { 836 texel[i] = bld->base.undef; 837 } 838 return; 839 } 840 841 switch (inst->Texture.Texture) { 842 case TGSI_TEXTURE_1D: 843 num_coords = 1; 844 break; 845 case TGSI_TEXTURE_2D: 846 case TGSI_TEXTURE_RECT: 847 num_coords = 2; 848 break; 849 case TGSI_TEXTURE_SHADOW1D: 850 case TGSI_TEXTURE_SHADOW2D: 851 case TGSI_TEXTURE_SHADOWRECT: 852 case TGSI_TEXTURE_3D: 853 case TGSI_TEXTURE_CUBE: 854 num_coords = 3; 855 break; 856 default: 857 assert(0); 858 return; 859 } 860 861 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 862 lod_bias = emit_fetch( bld, inst, 0, 3 ); 863 explicit_lod = NULL; 864 } 865 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 866 lod_bias = NULL; 867 explicit_lod = emit_fetch( bld, inst, 0, 3 ); 868 } 869 else { 870 lod_bias = NULL; 871 explicit_lod = NULL; 872 } 873 874 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 875 oow = emit_fetch( bld, inst, 0, 3 ); 876 oow = lp_build_rcp(&bld->base, oow); 877 } 878 879 for (i = 0; i < num_coords; i++) { 880 coords[i] = emit_fetch( bld, inst, 0, i ); 881 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 882 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 883 } 884 for (i = num_coords; i < 3; i++) { 885 coords[i] = bld->base.undef; 886 } 887 888 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 889 for (i = 0; i < num_coords; i++) { 890 ddx[i] = emit_fetch( bld, inst, 1, i ); 891 ddy[i] = emit_fetch( bld, inst, 2, i ); 892 } 893 unit = inst->Src[3].Register.Index; 894 } else { 895 for (i = 0; i < num_coords; i++) { 896 ddx[i] = lp_build_ddx( &bld->base, coords[i] ); 897 ddy[i] = lp_build_ddy( &bld->base, coords[i] ); 898 } 899 unit = inst->Src[1].Register.Index; 900 } 901 for (i = num_coords; i < 3; i++) { 902 ddx[i] = bld->base.undef; 903 ddy[i] = bld->base.undef; 904 } 905 906 bld->sampler->emit_fetch_texel(bld->sampler, 907 bld->base.builder, 908 bld->base.type, 909 unit, num_coords, coords, 910 ddx, ddy, 911 lod_bias, explicit_lod, 912 texel); 913} 914 915 916/** 917 * Kill fragment if any of the src register values are negative. 918 */ 919static void 920emit_kil( 921 struct lp_build_tgsi_soa_context *bld, 922 const struct tgsi_full_instruction *inst ) 923{ 924 const struct tgsi_full_src_register *reg = &inst->Src[0]; 925 LLVMValueRef terms[NUM_CHANNELS]; 926 LLVMValueRef mask; 927 unsigned chan_index; 928 929 memset(&terms, 0, sizeof terms); 930 931 FOR_EACH_CHANNEL( chan_index ) { 932 unsigned swizzle; 933 934 /* Unswizzle channel */ 935 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 936 937 /* Check if the component has not been already tested. */ 938 assert(swizzle < NUM_CHANNELS); 939 if( !terms[swizzle] ) 940 /* TODO: change the comparison operator instead of setting the sign */ 941 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 942 } 943 944 mask = NULL; 945 FOR_EACH_CHANNEL( chan_index ) { 946 if(terms[chan_index]) { 947 LLVMValueRef chan_mask; 948 949 /* 950 * If term < 0 then mask = 0 else mask = ~0. 951 */ 952 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 953 954 if(mask) 955 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); 956 else 957 mask = chan_mask; 958 } 959 } 960 961 if(mask) 962 lp_build_mask_update(bld->mask, mask); 963} 964 965 966/** 967 * Predicated fragment kill. 968 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 969 * The only predication is the execution mask which will apply if 970 * we're inside a loop or conditional. 971 */ 972static void 973emit_kilp(struct lp_build_tgsi_soa_context *bld, 974 const struct tgsi_full_instruction *inst) 975{ 976 LLVMValueRef mask; 977 978 /* For those channels which are "alive", disable fragment shader 979 * execution. 980 */ 981 if (bld->exec_mask.has_mask) { 982 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); 983 } 984 else { 985 mask = bld->base.zero; 986 } 987 988 lp_build_mask_update(bld->mask, mask); 989} 990 991static void 992emit_declaration( 993 struct lp_build_tgsi_soa_context *bld, 994 const struct tgsi_full_declaration *decl) 995{ 996 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); 997 998 unsigned first = decl->Range.First; 999 unsigned last = decl->Range.Last; 1000 unsigned idx, i; 1001 1002 for (idx = first; idx <= last; ++idx) { 1003 switch (decl->Declaration.File) { 1004 case TGSI_FILE_TEMPORARY: 1005 assert(idx < LP_MAX_TGSI_TEMPS); 1006 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 1007 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), 1008 last*4 + 4, 0); 1009 bld->temps_array = lp_build_array_alloca(bld->base.builder, 1010 vec_type, array_size, ""); 1011 } else { 1012 for (i = 0; i < NUM_CHANNELS; i++) 1013 bld->temps[idx][i] = lp_build_alloca(bld->base.builder, 1014 vec_type, ""); 1015 } 1016 break; 1017 1018 case TGSI_FILE_OUTPUT: 1019 for (i = 0; i < NUM_CHANNELS; i++) 1020 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, 1021 vec_type, ""); 1022 break; 1023 1024 case TGSI_FILE_ADDRESS: 1025 assert(idx < LP_MAX_TGSI_ADDRS); 1026 for (i = 0; i < NUM_CHANNELS; i++) 1027 bld->addr[idx][i] = lp_build_alloca(bld->base.builder, 1028 vec_type, ""); 1029 break; 1030 1031 case TGSI_FILE_PREDICATE: 1032 assert(idx < LP_MAX_TGSI_PREDS); 1033 for (i = 0; i < NUM_CHANNELS; i++) 1034 bld->preds[idx][i] = lp_build_alloca(bld->base.builder, 1035 vec_type, ""); 1036 break; 1037 1038 default: 1039 /* don't need to declare other vars */ 1040 break; 1041 } 1042 } 1043} 1044 1045 1046/** 1047 * Emit LLVM for one TGSI instruction. 1048 * \param return TRUE for success, FALSE otherwise 1049 */ 1050static boolean 1051emit_instruction( 1052 struct lp_build_tgsi_soa_context *bld, 1053 const struct tgsi_full_instruction *inst, 1054 const struct tgsi_opcode_info *info, 1055 int *pc) 1056{ 1057 unsigned chan_index; 1058 LLVMValueRef src0, src1, src2; 1059 LLVMValueRef tmp0, tmp1, tmp2; 1060 LLVMValueRef tmp3 = NULL; 1061 LLVMValueRef tmp4 = NULL; 1062 LLVMValueRef tmp5 = NULL; 1063 LLVMValueRef tmp6 = NULL; 1064 LLVMValueRef tmp7 = NULL; 1065 LLVMValueRef res; 1066 LLVMValueRef dst0[NUM_CHANNELS]; 1067 1068 /* 1069 * Stores and write masks are handled in a general fashion after the long 1070 * instruction opcode switch statement. 1071 * 1072 * Although not stricitly necessary, we avoid generating instructions for 1073 * channels which won't be stored, in cases where's that easy. For some 1074 * complex instructions, like texture sampling, it is more convenient to 1075 * assume a full writemask and then let LLVM optimization passes eliminate 1076 * redundant code. 1077 */ 1078 1079 (*pc)++; 1080 1081 assert(info->num_dst <= 1); 1082 if (info->num_dst) { 1083 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1084 dst0[chan_index] = bld->base.undef; 1085 } 1086 } 1087 1088 switch (inst->Instruction.Opcode) { 1089 case TGSI_OPCODE_ARL: 1090 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1091 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1092 tmp0 = lp_build_floor(&bld->base, tmp0); 1093 dst0[chan_index] = tmp0; 1094 } 1095 break; 1096 1097 case TGSI_OPCODE_MOV: 1098 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1099 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 1100 } 1101 break; 1102 1103 case TGSI_OPCODE_LIT: 1104 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 1105 dst0[CHAN_X] = bld->base.one; 1106 } 1107 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1108 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1109 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 1110 } 1111 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1112 /* XMM[1] = SrcReg[0].yyyy */ 1113 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1114 /* XMM[1] = max(XMM[1], 0) */ 1115 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 1116 /* XMM[2] = SrcReg[0].wwww */ 1117 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 1118 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 1119 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1120 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 1121 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 1122 } 1123 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 1124 dst0[CHAN_W] = bld->base.one; 1125 } 1126 break; 1127 1128 case TGSI_OPCODE_RCP: 1129 /* TGSI_OPCODE_RECIP */ 1130 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1131 res = lp_build_rcp(&bld->base, src0); 1132 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1133 dst0[chan_index] = res; 1134 } 1135 break; 1136 1137 case TGSI_OPCODE_RSQ: 1138 /* TGSI_OPCODE_RECIPSQRT */ 1139 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1140 src0 = lp_build_abs(&bld->base, src0); 1141 res = lp_build_rsqrt(&bld->base, src0); 1142 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1143 dst0[chan_index] = res; 1144 } 1145 break; 1146 1147 case TGSI_OPCODE_EXP: 1148 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1149 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1150 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1151 LLVMValueRef *p_exp2_int_part = NULL; 1152 LLVMValueRef *p_frac_part = NULL; 1153 LLVMValueRef *p_exp2 = NULL; 1154 1155 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1156 1157 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1158 p_exp2_int_part = &tmp0; 1159 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1160 p_frac_part = &tmp1; 1161 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1162 p_exp2 = &tmp2; 1163 1164 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 1165 1166 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1167 dst0[CHAN_X] = tmp0; 1168 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1169 dst0[CHAN_Y] = tmp1; 1170 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1171 dst0[CHAN_Z] = tmp2; 1172 } 1173 /* dst.w = 1.0 */ 1174 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1175 dst0[CHAN_W] = bld->base.one; 1176 } 1177 break; 1178 1179 case TGSI_OPCODE_LOG: 1180 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1181 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1182 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1183 LLVMValueRef *p_floor_log2 = NULL; 1184 LLVMValueRef *p_exp = NULL; 1185 LLVMValueRef *p_log2 = NULL; 1186 1187 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1188 src0 = lp_build_abs( &bld->base, src0 ); 1189 1190 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1191 p_floor_log2 = &tmp0; 1192 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1193 p_exp = &tmp1; 1194 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1195 p_log2 = &tmp2; 1196 1197 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 1198 1199 /* dst.x = floor(lg2(abs(src.x))) */ 1200 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1201 dst0[CHAN_X] = tmp0; 1202 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 1203 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 1204 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 1205 } 1206 /* dst.z = lg2(abs(src.x)) */ 1207 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1208 dst0[CHAN_Z] = tmp2; 1209 } 1210 /* dst.w = 1.0 */ 1211 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1212 dst0[CHAN_W] = bld->base.one; 1213 } 1214 break; 1215 1216 case TGSI_OPCODE_MUL: 1217 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1218 src0 = emit_fetch( bld, inst, 0, chan_index ); 1219 src1 = emit_fetch( bld, inst, 1, chan_index ); 1220 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 1221 } 1222 break; 1223 1224 case TGSI_OPCODE_ADD: 1225 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1226 src0 = emit_fetch( bld, inst, 0, chan_index ); 1227 src1 = emit_fetch( bld, inst, 1, chan_index ); 1228 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 1229 } 1230 break; 1231 1232 case TGSI_OPCODE_DP3: 1233 /* TGSI_OPCODE_DOT3 */ 1234 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1235 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1236 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1237 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1238 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1239 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1240 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1241 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1242 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1243 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1244 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1245 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1246 dst0[chan_index] = tmp0; 1247 } 1248 break; 1249 1250 case TGSI_OPCODE_DP4: 1251 /* TGSI_OPCODE_DOT4 */ 1252 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1253 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1254 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1255 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1256 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1257 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1258 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1259 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1260 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1261 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1262 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1263 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1264 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1265 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1266 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1267 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1268 dst0[chan_index] = tmp0; 1269 } 1270 break; 1271 1272 case TGSI_OPCODE_DST: 1273 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1274 dst0[CHAN_X] = bld->base.one; 1275 } 1276 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1277 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1278 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1279 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1280 } 1281 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1282 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1283 } 1284 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1285 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1286 } 1287 break; 1288 1289 case TGSI_OPCODE_MIN: 1290 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1291 src0 = emit_fetch( bld, inst, 0, chan_index ); 1292 src1 = emit_fetch( bld, inst, 1, chan_index ); 1293 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1294 } 1295 break; 1296 1297 case TGSI_OPCODE_MAX: 1298 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1299 src0 = emit_fetch( bld, inst, 0, chan_index ); 1300 src1 = emit_fetch( bld, inst, 1, chan_index ); 1301 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1302 } 1303 break; 1304 1305 case TGSI_OPCODE_SLT: 1306 /* TGSI_OPCODE_SETLT */ 1307 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1308 src0 = emit_fetch( bld, inst, 0, chan_index ); 1309 src1 = emit_fetch( bld, inst, 1, chan_index ); 1310 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1311 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1312 } 1313 break; 1314 1315 case TGSI_OPCODE_SGE: 1316 /* TGSI_OPCODE_SETGE */ 1317 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1318 src0 = emit_fetch( bld, inst, 0, chan_index ); 1319 src1 = emit_fetch( bld, inst, 1, chan_index ); 1320 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1321 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1322 } 1323 break; 1324 1325 case TGSI_OPCODE_MAD: 1326 /* TGSI_OPCODE_MADD */ 1327 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1328 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1329 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1330 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1331 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1332 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1333 dst0[chan_index] = tmp0; 1334 } 1335 break; 1336 1337 case TGSI_OPCODE_SUB: 1338 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1339 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1340 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1341 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1342 } 1343 break; 1344 1345 case TGSI_OPCODE_LRP: 1346 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1347 src0 = emit_fetch( bld, inst, 0, chan_index ); 1348 src1 = emit_fetch( bld, inst, 1, chan_index ); 1349 src2 = emit_fetch( bld, inst, 2, chan_index ); 1350 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1351 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1352 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1353 } 1354 break; 1355 1356 case TGSI_OPCODE_CND: 1357 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1358 src0 = emit_fetch( bld, inst, 0, chan_index ); 1359 src1 = emit_fetch( bld, inst, 1, chan_index ); 1360 src2 = emit_fetch( bld, inst, 2, chan_index ); 1361 tmp1 = lp_build_const_vec(bld->base.type, 0.5); 1362 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1363 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1364 } 1365 break; 1366 1367 case TGSI_OPCODE_DP2A: 1368 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1369 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1370 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1371 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1372 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1373 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1374 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1375 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1376 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1377 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1378 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1379 } 1380 break; 1381 1382 case TGSI_OPCODE_FRC: 1383 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1384 src0 = emit_fetch( bld, inst, 0, chan_index ); 1385 tmp0 = lp_build_floor(&bld->base, src0); 1386 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1387 dst0[chan_index] = tmp0; 1388 } 1389 break; 1390 1391 case TGSI_OPCODE_CLAMP: 1392 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1393 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1394 src1 = emit_fetch( bld, inst, 1, chan_index ); 1395 src2 = emit_fetch( bld, inst, 2, chan_index ); 1396 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1397 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1398 dst0[chan_index] = tmp0; 1399 } 1400 break; 1401 1402 case TGSI_OPCODE_FLR: 1403 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1404 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1405 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1406 } 1407 break; 1408 1409 case TGSI_OPCODE_ROUND: 1410 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1411 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1412 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1413 } 1414 break; 1415 1416 case TGSI_OPCODE_EX2: { 1417 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1418 tmp0 = lp_build_exp2( &bld->base, tmp0); 1419 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1420 dst0[chan_index] = tmp0; 1421 } 1422 break; 1423 } 1424 1425 case TGSI_OPCODE_LG2: 1426 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1427 tmp0 = lp_build_log2( &bld->base, tmp0); 1428 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1429 dst0[chan_index] = tmp0; 1430 } 1431 break; 1432 1433 case TGSI_OPCODE_POW: 1434 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1435 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1436 res = lp_build_pow( &bld->base, src0, src1 ); 1437 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1438 dst0[chan_index] = res; 1439 } 1440 break; 1441 1442 case TGSI_OPCODE_XPD: 1443 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1444 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1445 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1446 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1447 } 1448 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1449 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1450 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1451 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1452 } 1453 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1454 tmp2 = tmp0; 1455 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1456 tmp5 = tmp3; 1457 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1458 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1459 dst0[CHAN_X] = tmp2; 1460 } 1461 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1462 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1463 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1464 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1465 } 1466 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1467 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1468 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1469 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1470 dst0[CHAN_Y] = tmp3; 1471 } 1472 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1473 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1474 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1475 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1476 dst0[CHAN_Z] = tmp5; 1477 } 1478 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1479 dst0[CHAN_W] = bld->base.one; 1480 } 1481 break; 1482 1483 case TGSI_OPCODE_ABS: 1484 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1485 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1486 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1487 } 1488 break; 1489 1490 case TGSI_OPCODE_RCC: 1491 /* deprecated? */ 1492 assert(0); 1493 return FALSE; 1494 1495 case TGSI_OPCODE_DPH: 1496 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1497 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1498 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1499 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1500 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1501 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1502 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1503 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1504 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1505 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1506 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1507 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1508 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1509 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1510 dst0[chan_index] = tmp0; 1511 } 1512 break; 1513 1514 case TGSI_OPCODE_COS: 1515 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1516 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1517 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1518 dst0[chan_index] = tmp0; 1519 } 1520 break; 1521 1522 case TGSI_OPCODE_DDX: 1523 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1524 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1525 } 1526 break; 1527 1528 case TGSI_OPCODE_DDY: 1529 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1530 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1531 } 1532 break; 1533 1534 case TGSI_OPCODE_KILP: 1535 /* predicated kill */ 1536 emit_kilp( bld, inst ); 1537 break; 1538 1539 case TGSI_OPCODE_KIL: 1540 /* conditional kill */ 1541 emit_kil( bld, inst ); 1542 break; 1543 1544 case TGSI_OPCODE_PK2H: 1545 return FALSE; 1546 break; 1547 1548 case TGSI_OPCODE_PK2US: 1549 return FALSE; 1550 break; 1551 1552 case TGSI_OPCODE_PK4B: 1553 return FALSE; 1554 break; 1555 1556 case TGSI_OPCODE_PK4UB: 1557 return FALSE; 1558 break; 1559 1560 case TGSI_OPCODE_RFL: 1561 return FALSE; 1562 break; 1563 1564 case TGSI_OPCODE_SEQ: 1565 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1566 src0 = emit_fetch( bld, inst, 0, chan_index ); 1567 src1 = emit_fetch( bld, inst, 1, chan_index ); 1568 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1569 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1570 } 1571 break; 1572 1573 case TGSI_OPCODE_SFL: 1574 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1575 dst0[chan_index] = bld->base.zero; 1576 } 1577 break; 1578 1579 case TGSI_OPCODE_SGT: 1580 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1581 src0 = emit_fetch( bld, inst, 0, chan_index ); 1582 src1 = emit_fetch( bld, inst, 1, chan_index ); 1583 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1584 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1585 } 1586 break; 1587 1588 case TGSI_OPCODE_SIN: 1589 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1590 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1591 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1592 dst0[chan_index] = tmp0; 1593 } 1594 break; 1595 1596 case TGSI_OPCODE_SLE: 1597 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1598 src0 = emit_fetch( bld, inst, 0, chan_index ); 1599 src1 = emit_fetch( bld, inst, 1, chan_index ); 1600 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1601 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1602 } 1603 break; 1604 1605 case TGSI_OPCODE_SNE: 1606 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1607 src0 = emit_fetch( bld, inst, 0, chan_index ); 1608 src1 = emit_fetch( bld, inst, 1, chan_index ); 1609 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1610 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1611 } 1612 break; 1613 1614 case TGSI_OPCODE_STR: 1615 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1616 dst0[chan_index] = bld->base.one; 1617 } 1618 break; 1619 1620 case TGSI_OPCODE_TEX: 1621 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); 1622 break; 1623 1624 case TGSI_OPCODE_TXD: 1625 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1626 break; 1627 1628 case TGSI_OPCODE_UP2H: 1629 /* deprecated */ 1630 assert (0); 1631 return FALSE; 1632 break; 1633 1634 case TGSI_OPCODE_UP2US: 1635 /* deprecated */ 1636 assert(0); 1637 return FALSE; 1638 break; 1639 1640 case TGSI_OPCODE_UP4B: 1641 /* deprecated */ 1642 assert(0); 1643 return FALSE; 1644 break; 1645 1646 case TGSI_OPCODE_UP4UB: 1647 /* deprecated */ 1648 assert(0); 1649 return FALSE; 1650 break; 1651 1652 case TGSI_OPCODE_X2D: 1653 /* deprecated? */ 1654 assert(0); 1655 return FALSE; 1656 break; 1657 1658 case TGSI_OPCODE_ARA: 1659 /* deprecated */ 1660 assert(0); 1661 return FALSE; 1662 break; 1663 1664 case TGSI_OPCODE_ARR: 1665 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1666 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1667 tmp0 = lp_build_round(&bld->base, tmp0); 1668 dst0[chan_index] = tmp0; 1669 } 1670 break; 1671 1672 case TGSI_OPCODE_BRA: 1673 /* deprecated */ 1674 assert(0); 1675 return FALSE; 1676 break; 1677 1678 case TGSI_OPCODE_CAL: 1679 lp_exec_mask_call(&bld->exec_mask, 1680 inst->Label.Label, 1681 pc); 1682 1683 break; 1684 1685 case TGSI_OPCODE_RET: 1686 lp_exec_mask_ret(&bld->exec_mask, pc); 1687 break; 1688 1689 case TGSI_OPCODE_END: 1690 *pc = -1; 1691 break; 1692 1693 case TGSI_OPCODE_SSG: 1694 /* TGSI_OPCODE_SGN */ 1695 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1696 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1697 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 1698 } 1699 break; 1700 1701 case TGSI_OPCODE_CMP: 1702 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1703 src0 = emit_fetch( bld, inst, 0, chan_index ); 1704 src1 = emit_fetch( bld, inst, 1, chan_index ); 1705 src2 = emit_fetch( bld, inst, 2, chan_index ); 1706 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 1707 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 1708 } 1709 break; 1710 1711 case TGSI_OPCODE_SCS: 1712 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1713 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1714 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 1715 } 1716 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1717 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1718 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 1719 } 1720 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1721 dst0[CHAN_Z] = bld->base.zero; 1722 } 1723 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1724 dst0[CHAN_W] = bld->base.one; 1725 } 1726 break; 1727 1728 case TGSI_OPCODE_TXB: 1729 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); 1730 break; 1731 1732 case TGSI_OPCODE_NRM: 1733 /* fall-through */ 1734 case TGSI_OPCODE_NRM4: 1735 /* 3 or 4-component normalization */ 1736 { 1737 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1738 1739 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 1740 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 1741 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 1742 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 1743 1744 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1745 1746 /* xmm4 = src.x */ 1747 /* xmm0 = src.x * src.x */ 1748 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1749 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1750 tmp4 = tmp0; 1751 } 1752 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 1753 1754 /* xmm5 = src.y */ 1755 /* xmm0 = xmm0 + src.y * src.y */ 1756 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 1757 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1758 tmp5 = tmp1; 1759 } 1760 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1761 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1762 1763 /* xmm6 = src.z */ 1764 /* xmm0 = xmm0 + src.z * src.z */ 1765 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 1766 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1767 tmp6 = tmp1; 1768 } 1769 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1770 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1771 1772 if (dims == 4) { 1773 /* xmm7 = src.w */ 1774 /* xmm0 = xmm0 + src.w * src.w */ 1775 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 1776 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 1777 tmp7 = tmp1; 1778 } 1779 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1780 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1781 } 1782 1783 /* xmm1 = 1 / sqrt(xmm0) */ 1784 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 1785 1786 /* dst.x = xmm1 * src.x */ 1787 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1788 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 1789 } 1790 1791 /* dst.y = xmm1 * src.y */ 1792 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1793 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 1794 } 1795 1796 /* dst.z = xmm1 * src.z */ 1797 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1798 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 1799 } 1800 1801 /* dst.w = xmm1 * src.w */ 1802 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 1803 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 1804 } 1805 } 1806 1807 /* dst.w = 1.0 */ 1808 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 1809 dst0[CHAN_W] = bld->base.one; 1810 } 1811 } 1812 break; 1813 1814 case TGSI_OPCODE_DIV: 1815 /* deprecated */ 1816 assert( 0 ); 1817 return FALSE; 1818 break; 1819 1820 case TGSI_OPCODE_DP2: 1821 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1822 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1823 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1824 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1825 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1826 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1827 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1828 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1829 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1830 } 1831 break; 1832 1833 case TGSI_OPCODE_TXL: 1834 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 1835 break; 1836 1837 case TGSI_OPCODE_TXP: 1838 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); 1839 break; 1840 1841 case TGSI_OPCODE_BRK: 1842 lp_exec_break(&bld->exec_mask); 1843 break; 1844 1845 case TGSI_OPCODE_IF: 1846 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1847 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 1848 tmp0, bld->base.zero); 1849 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 1850 break; 1851 1852 case TGSI_OPCODE_BGNLOOP: 1853 lp_exec_bgnloop(&bld->exec_mask); 1854 break; 1855 1856 case TGSI_OPCODE_BGNSUB: 1857 lp_exec_mask_bgnsub(&bld->exec_mask); 1858 break; 1859 1860 case TGSI_OPCODE_ELSE: 1861 lp_exec_mask_cond_invert(&bld->exec_mask); 1862 break; 1863 1864 case TGSI_OPCODE_ENDIF: 1865 lp_exec_mask_cond_pop(&bld->exec_mask); 1866 break; 1867 1868 case TGSI_OPCODE_ENDLOOP: 1869 lp_exec_endloop(&bld->exec_mask); 1870 break; 1871 1872 case TGSI_OPCODE_ENDSUB: 1873 lp_exec_mask_endsub(&bld->exec_mask, pc); 1874 break; 1875 1876 case TGSI_OPCODE_PUSHA: 1877 /* deprecated? */ 1878 assert(0); 1879 return FALSE; 1880 break; 1881 1882 case TGSI_OPCODE_POPA: 1883 /* deprecated? */ 1884 assert(0); 1885 return FALSE; 1886 break; 1887 1888 case TGSI_OPCODE_CEIL: 1889 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1890 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1891 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 1892 } 1893 break; 1894 1895 case TGSI_OPCODE_I2F: 1896 /* deprecated? */ 1897 assert(0); 1898 return FALSE; 1899 break; 1900 1901 case TGSI_OPCODE_NOT: 1902 /* deprecated? */ 1903 assert(0); 1904 return FALSE; 1905 break; 1906 1907 case TGSI_OPCODE_TRUNC: 1908 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1909 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1910 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 1911 } 1912 break; 1913 1914 case TGSI_OPCODE_SHL: 1915 /* deprecated? */ 1916 assert(0); 1917 return FALSE; 1918 break; 1919 1920 case TGSI_OPCODE_ISHR: 1921 /* deprecated? */ 1922 assert(0); 1923 return FALSE; 1924 break; 1925 1926 case TGSI_OPCODE_AND: 1927 /* deprecated? */ 1928 assert(0); 1929 return FALSE; 1930 break; 1931 1932 case TGSI_OPCODE_OR: 1933 /* deprecated? */ 1934 assert(0); 1935 return FALSE; 1936 break; 1937 1938 case TGSI_OPCODE_MOD: 1939 /* deprecated? */ 1940 assert(0); 1941 return FALSE; 1942 break; 1943 1944 case TGSI_OPCODE_XOR: 1945 /* deprecated? */ 1946 assert(0); 1947 return FALSE; 1948 break; 1949 1950 case TGSI_OPCODE_SAD: 1951 /* deprecated? */ 1952 assert(0); 1953 return FALSE; 1954 break; 1955 1956 case TGSI_OPCODE_TXF: 1957 /* deprecated? */ 1958 assert(0); 1959 return FALSE; 1960 break; 1961 1962 case TGSI_OPCODE_TXQ: 1963 /* deprecated? */ 1964 assert(0); 1965 return FALSE; 1966 break; 1967 1968 case TGSI_OPCODE_CONT: 1969 lp_exec_continue(&bld->exec_mask); 1970 break; 1971 1972 case TGSI_OPCODE_EMIT: 1973 return FALSE; 1974 break; 1975 1976 case TGSI_OPCODE_ENDPRIM: 1977 return FALSE; 1978 break; 1979 1980 case TGSI_OPCODE_NOP: 1981 break; 1982 1983 default: 1984 return FALSE; 1985 } 1986 1987 if(info->num_dst) { 1988 LLVMValueRef pred[NUM_CHANNELS]; 1989 1990 emit_fetch_predicate( bld, inst, pred ); 1991 1992 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1993 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); 1994 } 1995 } 1996 1997 return TRUE; 1998} 1999 2000 2001void 2002lp_build_tgsi_soa(LLVMBuilderRef builder, 2003 const struct tgsi_token *tokens, 2004 struct lp_type type, 2005 struct lp_build_mask_context *mask, 2006 LLVMValueRef consts_ptr, 2007 const LLVMValueRef *pos, 2008 const LLVMValueRef (*inputs)[NUM_CHANNELS], 2009 LLVMValueRef (*outputs)[NUM_CHANNELS], 2010 struct lp_build_sampler_soa *sampler, 2011 const struct tgsi_shader_info *info) 2012{ 2013 struct lp_build_tgsi_soa_context bld; 2014 struct tgsi_parse_context parse; 2015 uint num_immediates = 0; 2016 uint num_instructions = 0; 2017 unsigned i; 2018 int pc = 0; 2019 2020 /* Setup build context */ 2021 memset(&bld, 0, sizeof bld); 2022 lp_build_context_init(&bld.base, builder, type); 2023 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type)); 2024 bld.mask = mask; 2025 bld.pos = pos; 2026 bld.inputs = inputs; 2027 bld.outputs = outputs; 2028 bld.consts_ptr = consts_ptr; 2029 bld.sampler = sampler; 2030 bld.indirect_files = info->indirect_files; 2031 bld.instructions = (struct tgsi_full_instruction *) 2032 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); 2033 bld.max_instructions = LP_MAX_INSTRUCTIONS; 2034 2035 if (!bld.instructions) { 2036 return; 2037 } 2038 2039 lp_exec_mask_init(&bld.exec_mask, &bld.base); 2040 2041 tgsi_parse_init( &parse, tokens ); 2042 2043 while( !tgsi_parse_end_of_tokens( &parse ) ) { 2044 tgsi_parse_token( &parse ); 2045 2046 switch( parse.FullToken.Token.Type ) { 2047 case TGSI_TOKEN_TYPE_DECLARATION: 2048 /* Inputs already interpolated */ 2049 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 2050 break; 2051 2052 case TGSI_TOKEN_TYPE_INSTRUCTION: 2053 { 2054 /* save expanded instruction */ 2055 if (num_instructions == bld.max_instructions) { 2056 struct tgsi_full_instruction *instructions; 2057 instructions = REALLOC(bld.instructions, 2058 bld.max_instructions 2059 * sizeof(struct tgsi_full_instruction), 2060 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 2061 * sizeof(struct tgsi_full_instruction)); 2062 if (!instructions) { 2063 break; 2064 } 2065 bld.instructions = instructions; 2066 bld.max_instructions += LP_MAX_INSTRUCTIONS; 2067 } 2068 2069 memcpy(bld.instructions + num_instructions, 2070 &parse.FullToken.FullInstruction, 2071 sizeof(bld.instructions[0])); 2072 2073 num_instructions++; 2074 } 2075 2076 break; 2077 2078 case TGSI_TOKEN_TYPE_IMMEDIATE: 2079 /* simply copy the immediate values into the next immediates[] slot */ 2080 { 2081 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 2082 assert(size <= 4); 2083 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 2084 for( i = 0; i < size; ++i ) 2085 bld.immediates[num_immediates][i] = 2086 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); 2087 for( i = size; i < 4; ++i ) 2088 bld.immediates[num_immediates][i] = bld.base.undef; 2089 num_immediates++; 2090 } 2091 break; 2092 2093 case TGSI_TOKEN_TYPE_PROPERTY: 2094 break; 2095 2096 default: 2097 assert( 0 ); 2098 } 2099 } 2100 2101 while (pc != -1) { 2102 struct tgsi_full_instruction *instr = bld.instructions + pc; 2103 const struct tgsi_opcode_info *opcode_info = 2104 tgsi_get_opcode_info(instr->Instruction.Opcode); 2105 if (!emit_instruction( &bld, instr, opcode_info, &pc )) 2106 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 2107 opcode_info->mnemonic); 2108 } 2109 2110 if (0) { 2111 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); 2112 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2113 debug_printf("11111111111111111111111111111 \n"); 2114 tgsi_dump(tokens, 0); 2115 lp_debug_dump_value(function); 2116 debug_printf("2222222222222222222222222222 \n"); 2117 } 2118 tgsi_parse_free( &parse ); 2119 2120 if (0) { 2121 LLVMModuleRef module = LLVMGetGlobalParent( 2122 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); 2123 LLVMDumpModule(module); 2124 2125 } 2126 2127 FREE( bld.instructions ); 2128} 2129 2130