lp_bld_tgsi_soa.c revision 58daea741fa21fe3f89fd7bf106df1545c5b21af
1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca@vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39#include "pipe/p_config.h" 40#include "pipe/p_shader_tokens.h" 41#include "util/u_debug.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "tgsi/tgsi_dump.h" 45#include "tgsi/tgsi_info.h" 46#include "tgsi/tgsi_parse.h" 47#include "tgsi/tgsi_util.h" 48#include "tgsi/tgsi_scan.h" 49#include "lp_bld_type.h" 50#include "lp_bld_const.h" 51#include "lp_bld_arit.h" 52#include "lp_bld_gather.h" 53#include "lp_bld_logic.h" 54#include "lp_bld_swizzle.h" 55#include "lp_bld_flow.h" 56#include "lp_bld_quad.h" 57#include "lp_bld_tgsi.h" 58#include "lp_bld_limits.h" 59#include "lp_bld_debug.h" 60 61 62#define FOR_EACH_CHANNEL( CHAN )\ 63 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) 64 65#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 66 ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN))) 67 68#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ 69 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) 70 71#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ 72 FOR_EACH_CHANNEL( CHAN )\ 73 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) 74 75#define CHAN_X 0 76#define CHAN_Y 1 77#define CHAN_Z 2 78#define CHAN_W 3 79#define NUM_CHANNELS 4 80 81#define LP_MAX_INSTRUCTIONS 256 82 83 84struct lp_exec_mask { 85 struct lp_build_context *bld; 86 87 boolean has_mask; 88 89 LLVMTypeRef int_vec_type; 90 91 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 92 int cond_stack_size; 93 LLVMValueRef cond_mask; 94 95 LLVMBasicBlockRef loop_block; 96 LLVMValueRef cont_mask; 97 LLVMValueRef break_mask; 98 LLVMValueRef break_var; 99 struct { 100 LLVMBasicBlockRef loop_block; 101 LLVMValueRef cont_mask; 102 LLVMValueRef break_mask; 103 LLVMValueRef break_var; 104 } loop_stack[LP_MAX_TGSI_NESTING]; 105 int loop_stack_size; 106 107 LLVMValueRef ret_mask; 108 struct { 109 int pc; 110 LLVMValueRef ret_mask; 111 } call_stack[LP_MAX_TGSI_NESTING]; 112 int call_stack_size; 113 114 LLVMValueRef exec_mask; 115}; 116 117struct lp_build_tgsi_soa_context 118{ 119 struct lp_build_context base; 120 121 /* Builder for integer masks and indices */ 122 struct lp_build_context int_bld; 123 124 LLVMValueRef consts_ptr; 125 const LLVMValueRef *pos; 126 const LLVMValueRef (*inputs)[NUM_CHANNELS]; 127 LLVMValueRef (*outputs)[NUM_CHANNELS]; 128 129 const struct lp_build_sampler_soa *sampler; 130 131 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES][NUM_CHANNELS]; 132 LLVMValueRef temps[LP_MAX_TGSI_TEMPS][NUM_CHANNELS]; 133 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][NUM_CHANNELS]; 134 LLVMValueRef preds[LP_MAX_TGSI_PREDS][NUM_CHANNELS]; 135 136 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 137 * set in the indirect_files field. 138 * The temps[] array above is unused then. 139 */ 140 LLVMValueRef temps_array; 141 142 /** bitmask indicating which register files are accessed indirectly */ 143 unsigned indirect_files; 144 145 struct lp_build_mask_context *mask; 146 struct lp_exec_mask exec_mask; 147 148 struct tgsi_full_instruction *instructions; 149 uint max_instructions; 150}; 151 152static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 153{ 154 mask->bld = bld; 155 mask->has_mask = FALSE; 156 mask->cond_stack_size = 0; 157 mask->loop_stack_size = 0; 158 mask->call_stack_size = 0; 159 160 mask->int_vec_type = lp_build_int_vec_type(mask->bld->type); 161 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 162 LLVMConstAllOnes(mask->int_vec_type); 163} 164 165static void lp_exec_mask_update(struct lp_exec_mask *mask) 166{ 167 if (mask->loop_stack_size) { 168 /*for loops we need to update the entire mask at runtime */ 169 LLVMValueRef tmp; 170 assert(mask->break_mask); 171 tmp = LLVMBuildAnd(mask->bld->builder, 172 mask->cont_mask, 173 mask->break_mask, 174 "maskcb"); 175 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 176 mask->cond_mask, 177 tmp, 178 "maskfull"); 179 } else 180 mask->exec_mask = mask->cond_mask; 181 182 if (mask->call_stack_size) { 183 mask->exec_mask = LLVMBuildAnd(mask->bld->builder, 184 mask->exec_mask, 185 mask->ret_mask, 186 "callmask"); 187 } 188 189 mask->has_mask = (mask->cond_stack_size > 0 || 190 mask->loop_stack_size > 0 || 191 mask->call_stack_size > 0); 192} 193 194static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 195 LLVMValueRef val) 196{ 197 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 198 if (mask->cond_stack_size == 0) { 199 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 200 } 201 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 202 assert(LLVMTypeOf(val) == mask->int_vec_type); 203 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 204 mask->cond_mask, 205 val, 206 ""); 207 lp_exec_mask_update(mask); 208} 209 210static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 211{ 212 LLVMValueRef prev_mask; 213 LLVMValueRef inv_mask; 214 215 assert(mask->cond_stack_size); 216 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 217 if (mask->cond_stack_size == 1) { 218 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 219 } 220 221 inv_mask = LLVMBuildNot(mask->bld->builder, mask->cond_mask, ""); 222 223 mask->cond_mask = LLVMBuildAnd(mask->bld->builder, 224 inv_mask, 225 prev_mask, ""); 226 lp_exec_mask_update(mask); 227} 228 229static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 230{ 231 assert(mask->cond_stack_size); 232 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 233 lp_exec_mask_update(mask); 234} 235 236static void lp_exec_bgnloop(struct lp_exec_mask *mask) 237{ 238 if (mask->loop_stack_size == 0) { 239 assert(mask->loop_block == NULL); 240 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 241 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 242 assert(mask->break_var == NULL); 243 } 244 245 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 246 247 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 248 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 249 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 250 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 251 ++mask->loop_stack_size; 252 253 mask->break_var = lp_build_alloca(mask->bld->builder, mask->int_vec_type, ""); 254 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 255 256 mask->loop_block = lp_build_insert_new_block(mask->bld->builder, "bgnloop"); 257 LLVMBuildBr(mask->bld->builder, mask->loop_block); 258 LLVMPositionBuilderAtEnd(mask->bld->builder, mask->loop_block); 259 260 mask->break_mask = LLVMBuildLoad(mask->bld->builder, mask->break_var, ""); 261 262 lp_exec_mask_update(mask); 263} 264 265static void lp_exec_break(struct lp_exec_mask *mask) 266{ 267 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 268 mask->exec_mask, 269 "break"); 270 271 mask->break_mask = LLVMBuildAnd(mask->bld->builder, 272 mask->break_mask, 273 exec_mask, "break_full"); 274 275 lp_exec_mask_update(mask); 276} 277 278static void lp_exec_continue(struct lp_exec_mask *mask) 279{ 280 LLVMValueRef exec_mask = LLVMBuildNot(mask->bld->builder, 281 mask->exec_mask, 282 ""); 283 284 mask->cont_mask = LLVMBuildAnd(mask->bld->builder, 285 mask->cont_mask, 286 exec_mask, ""); 287 288 lp_exec_mask_update(mask); 289} 290 291 292static void lp_exec_endloop(struct lp_exec_mask *mask) 293{ 294 LLVMBasicBlockRef endloop; 295 LLVMTypeRef reg_type = LLVMIntType(mask->bld->type.width* 296 mask->bld->type.length); 297 LLVMValueRef i1cond; 298 299 assert(mask->break_mask); 300 301 /* 302 * Restore the cont_mask, but don't pop 303 */ 304 assert(mask->loop_stack_size); 305 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 306 lp_exec_mask_update(mask); 307 308 /* 309 * Unlike the continue mask, the break_mask must be preserved across loop 310 * iterations 311 */ 312 LLVMBuildStore(mask->bld->builder, mask->break_mask, mask->break_var); 313 314 /* i1cond = (mask == 0) */ 315 i1cond = LLVMBuildICmp( 316 mask->bld->builder, 317 LLVMIntNE, 318 LLVMBuildBitCast(mask->bld->builder, mask->exec_mask, reg_type, ""), 319 LLVMConstNull(reg_type), ""); 320 321 endloop = lp_build_insert_new_block(mask->bld->builder, "endloop"); 322 323 LLVMBuildCondBr(mask->bld->builder, 324 i1cond, mask->loop_block, endloop); 325 326 LLVMPositionBuilderAtEnd(mask->bld->builder, endloop); 327 328 assert(mask->loop_stack_size); 329 --mask->loop_stack_size; 330 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 331 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 332 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 333 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 334 335 lp_exec_mask_update(mask); 336} 337 338/* stores val into an address pointed to by dst. 339 * mask->exec_mask is used to figure out which bits of val 340 * should be stored into the address 341 * (0 means don't store this bit, 1 means do store). 342 */ 343static void lp_exec_mask_store(struct lp_exec_mask *mask, 344 LLVMValueRef pred, 345 LLVMValueRef val, 346 LLVMValueRef dst) 347{ 348 /* Mix the predicate and execution mask */ 349 if (mask->has_mask) { 350 if (pred) { 351 pred = LLVMBuildAnd(mask->bld->builder, pred, mask->exec_mask, ""); 352 } else { 353 pred = mask->exec_mask; 354 } 355 } 356 357 if (pred) { 358 LLVMValueRef real_val, dst_val; 359 360 dst_val = LLVMBuildLoad(mask->bld->builder, dst, ""); 361 real_val = lp_build_select(mask->bld, 362 pred, 363 val, dst_val); 364 365 LLVMBuildStore(mask->bld->builder, real_val, dst); 366 } else 367 LLVMBuildStore(mask->bld->builder, val, dst); 368} 369 370static void lp_exec_mask_call(struct lp_exec_mask *mask, 371 int func, 372 int *pc) 373{ 374 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 375 mask->call_stack[mask->call_stack_size].pc = *pc; 376 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 377 mask->call_stack_size++; 378 *pc = func; 379} 380 381static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 382{ 383 LLVMValueRef exec_mask; 384 385 if (mask->call_stack_size == 0) { 386 /* returning from main() */ 387 *pc = -1; 388 return; 389 } 390 exec_mask = LLVMBuildNot(mask->bld->builder, 391 mask->exec_mask, 392 "ret"); 393 394 mask->ret_mask = LLVMBuildAnd(mask->bld->builder, 395 mask->ret_mask, 396 exec_mask, "ret_full"); 397 398 lp_exec_mask_update(mask); 399} 400 401static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 402{ 403} 404 405static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 406{ 407 assert(mask->call_stack_size); 408 mask->call_stack_size--; 409 *pc = mask->call_stack[mask->call_stack_size].pc; 410 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 411 lp_exec_mask_update(mask); 412} 413 414 415/** 416 * Return pointer to a temporary register channel (src or dest). 417 * Note that indirect addressing cannot be handled here. 418 * \param index which temporary register 419 * \param chan which channel of the temp register. 420 */ 421static LLVMValueRef 422get_temp_ptr(struct lp_build_tgsi_soa_context *bld, 423 unsigned index, 424 unsigned chan) 425{ 426 assert(chan < 4); 427 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 428 LLVMValueRef lindex = lp_build_const_int32(index * 4 + chan); 429 return LLVMBuildGEP(bld->base.builder, bld->temps_array, &lindex, 1, ""); 430 } 431 else { 432 return bld->temps[index][chan]; 433 } 434} 435 436 437/** 438 * Gather vector. 439 * XXX the lp_build_gather() function should be capable of doing this 440 * with a little work. 441 */ 442static LLVMValueRef 443build_gather(struct lp_build_tgsi_soa_context *bld, 444 LLVMValueRef base_ptr, 445 LLVMValueRef indexes) 446{ 447 LLVMValueRef res = bld->base.undef; 448 unsigned i; 449 450 /* 451 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 452 */ 453 for (i = 0; i < bld->base.type.length; i++) { 454 LLVMValueRef ii = LLVMConstInt(LLVMInt32Type(), i, 0); 455 LLVMValueRef index = LLVMBuildExtractElement(bld->base.builder, 456 indexes, ii, ""); 457 LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, base_ptr, 458 &index, 1, ""); 459 LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 460 461 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, ii, ""); 462 } 463 464 return res; 465} 466 467 468/** 469 * Read the current value of the ADDR register, convert the floats to 470 * ints, multiply by four and return the vector of offsets. 471 * The offsets will be used to index into the constant buffer or 472 * temporary register file. 473 */ 474static LLVMValueRef 475get_indirect_offsets(struct lp_build_tgsi_soa_context *bld, 476 const struct tgsi_src_register *indirect_reg) 477{ 478 /* always use X component of address register */ 479 const int x = indirect_reg->SwizzleX; 480 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); 481 uint swizzle = tgsi_util_get_src_register_swizzle(indirect_reg, x); 482 LLVMValueRef vec4 = lp_build_const_int_vec(bld->int_bld.type, 4); 483 LLVMValueRef addr_vec; 484 485 addr_vec = LLVMBuildLoad(bld->base.builder, 486 bld->addr[indirect_reg->Index][swizzle], 487 "load addr reg"); 488 489 /* for indexing we want integers */ 490 addr_vec = LLVMBuildFPToSI(bld->base.builder, addr_vec, 491 int_vec_type, ""); 492 493 /* addr_vec = addr_vec * 4 */ 494 addr_vec = lp_build_mul(&bld->int_bld, addr_vec, vec4); 495 496 return addr_vec; 497} 498 499 500/** 501 * Register fetch. 502 */ 503static LLVMValueRef 504emit_fetch( 505 struct lp_build_tgsi_soa_context *bld, 506 const struct tgsi_full_instruction *inst, 507 unsigned src_op, 508 const unsigned chan_index ) 509{ 510 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 511 const unsigned swizzle = 512 tgsi_util_get_full_src_register_swizzle(reg, chan_index); 513 LLVMValueRef res; 514 LLVMValueRef addr_vec = NULL; 515 516 if (swizzle > 3) { 517 assert(0 && "invalid swizzle in emit_fetch()"); 518 return bld->base.undef; 519 } 520 521 if (reg->Register.Indirect) { 522 assert(bld->indirect_files); 523 addr_vec = get_indirect_offsets(bld, ®->Indirect); 524 } 525 526 switch (reg->Register.File) { 527 case TGSI_FILE_CONSTANT: 528 if (reg->Register.Indirect) { 529 LLVMValueRef index_vec; /* index into the const buffer */ 530 531 assert(bld->indirect_files & (1 << TGSI_FILE_CONSTANT)); 532 533 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */ 534 index_vec = lp_build_const_int_vec(bld->int_bld.type, 535 reg->Register.Index * 4 + swizzle); 536 537 /* index_vec = index_vec + addr_vec */ 538 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec); 539 540 /* Gather values from the constant buffer */ 541 res = build_gather(bld, bld->consts_ptr, index_vec); 542 } 543 else { 544 LLVMValueRef index; /* index into the const buffer */ 545 LLVMValueRef scalar, scalar_ptr; 546 547 index = lp_build_const_int32(reg->Register.Index*4 + swizzle); 548 549 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, 550 &index, 1, ""); 551 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 552 553 res = lp_build_broadcast_scalar(&bld->base, scalar); 554 } 555 break; 556 557 case TGSI_FILE_IMMEDIATE: 558 res = bld->immediates[reg->Register.Index][swizzle]; 559 assert(res); 560 break; 561 562 case TGSI_FILE_INPUT: 563 res = bld->inputs[reg->Register.Index][swizzle]; 564 assert(res); 565 break; 566 567 case TGSI_FILE_TEMPORARY: 568 if (reg->Register.Indirect) { 569 LLVMValueRef vec_len = 570 lp_build_const_int_vec(bld->int_bld.type, bld->base.type.length); 571 LLVMValueRef index_vec; /* index into the const buffer */ 572 LLVMValueRef temps_array; 573 LLVMTypeRef float4_ptr_type; 574 575 assert(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)); 576 577 /* index_vec = broadcast(reg->Register.Index * 4 + swizzle) */ 578 index_vec = lp_build_const_int_vec(bld->int_bld.type, 579 reg->Register.Index * 4 + swizzle); 580 581 /* index_vec += addr_vec */ 582 index_vec = lp_build_add(&bld->int_bld, index_vec, addr_vec); 583 584 /* index_vec *= vector_length */ 585 index_vec = lp_build_mul(&bld->int_bld, index_vec, vec_len); 586 587 /* cast temps_array pointer to float* */ 588 float4_ptr_type = LLVMPointerType(LLVMFloatType(), 0); 589 temps_array = LLVMBuildBitCast(bld->int_bld.builder, bld->temps_array, 590 float4_ptr_type, ""); 591 592 /* Gather values from the temporary register array */ 593 res = build_gather(bld, temps_array, index_vec); 594 } 595 else { 596 LLVMValueRef temp_ptr; 597 temp_ptr = get_temp_ptr(bld, reg->Register.Index, swizzle); 598 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 599 if (!res) 600 return bld->base.undef; 601 } 602 break; 603 604 default: 605 assert(0 && "invalid src register in emit_fetch()"); 606 return bld->base.undef; 607 } 608 609 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { 610 case TGSI_UTIL_SIGN_CLEAR: 611 res = lp_build_abs( &bld->base, res ); 612 break; 613 614 case TGSI_UTIL_SIGN_SET: 615 res = lp_build_abs( &bld->base, res ); 616 /* fall through */ 617 case TGSI_UTIL_SIGN_TOGGLE: 618 res = lp_build_negate( &bld->base, res ); 619 break; 620 621 case TGSI_UTIL_SIGN_KEEP: 622 break; 623 } 624 625 return res; 626} 627 628 629/** 630 * Register fetch with derivatives. 631 */ 632static void 633emit_fetch_deriv( 634 struct lp_build_tgsi_soa_context *bld, 635 const struct tgsi_full_instruction *inst, 636 unsigned index, 637 const unsigned chan_index, 638 LLVMValueRef *res, 639 LLVMValueRef *ddx, 640 LLVMValueRef *ddy) 641{ 642 LLVMValueRef src; 643 644 src = emit_fetch(bld, inst, index, chan_index); 645 646 if(res) 647 *res = src; 648 649 /* TODO: use interpolation coeffs for inputs */ 650 651 if(ddx) 652 *ddx = lp_build_ddx(&bld->base, src); 653 654 if(ddy) 655 *ddy = lp_build_ddy(&bld->base, src); 656} 657 658 659/** 660 * Predicate. 661 */ 662static void 663emit_fetch_predicate( 664 struct lp_build_tgsi_soa_context *bld, 665 const struct tgsi_full_instruction *inst, 666 LLVMValueRef *pred) 667{ 668 unsigned index; 669 unsigned char swizzles[4]; 670 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 671 LLVMValueRef value; 672 unsigned chan; 673 674 if (!inst->Instruction.Predicate) { 675 FOR_EACH_CHANNEL( chan ) { 676 pred[chan] = NULL; 677 } 678 return; 679 } 680 681 swizzles[0] = inst->Predicate.SwizzleX; 682 swizzles[1] = inst->Predicate.SwizzleY; 683 swizzles[2] = inst->Predicate.SwizzleZ; 684 swizzles[3] = inst->Predicate.SwizzleW; 685 686 index = inst->Predicate.Index; 687 assert(index < LP_MAX_TGSI_PREDS); 688 689 FOR_EACH_CHANNEL( chan ) { 690 unsigned swizzle = swizzles[chan]; 691 692 /* 693 * Only fetch the predicate register channels that are actually listed 694 * in the swizzles 695 */ 696 if (!unswizzled[swizzle]) { 697 value = LLVMBuildLoad(bld->base.builder, 698 bld->preds[index][swizzle], ""); 699 700 /* 701 * Convert the value to an integer mask. 702 * 703 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 704 * is needlessly causing two comparisons due to storing the intermediate 705 * result as float vector instead of an integer mask vector. 706 */ 707 value = lp_build_compare(bld->base.builder, 708 bld->base.type, 709 PIPE_FUNC_NOTEQUAL, 710 value, 711 bld->base.zero); 712 if (inst->Predicate.Negate) { 713 value = LLVMBuildNot(bld->base.builder, value, ""); 714 } 715 716 unswizzled[swizzle] = value; 717 } else { 718 value = unswizzled[swizzle]; 719 } 720 721 pred[chan] = value; 722 } 723} 724 725 726/** 727 * Register store. 728 */ 729static void 730emit_store( 731 struct lp_build_tgsi_soa_context *bld, 732 const struct tgsi_full_instruction *inst, 733 unsigned index, 734 unsigned chan_index, 735 LLVMValueRef pred, 736 LLVMValueRef value) 737{ 738 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 739 LLVMValueRef addr = NULL; 740 741 switch( inst->Instruction.Saturate ) { 742 case TGSI_SAT_NONE: 743 break; 744 745 case TGSI_SAT_ZERO_ONE: 746 value = lp_build_max(&bld->base, value, bld->base.zero); 747 value = lp_build_min(&bld->base, value, bld->base.one); 748 break; 749 750 case TGSI_SAT_MINUS_PLUS_ONE: 751 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.type, -1.0)); 752 value = lp_build_min(&bld->base, value, bld->base.one); 753 break; 754 755 default: 756 assert(0); 757 } 758 759 if (reg->Register.Indirect) { 760 /* XXX use get_indirect_offsets() here eventually */ 761 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->base.type); 762 unsigned swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, chan_index ); 763 764 assert(bld->indirect_files); 765 766 addr = LLVMBuildLoad(bld->base.builder, 767 bld->addr[reg->Indirect.Index][swizzle], 768 ""); 769 /* for indexing we want integers */ 770 addr = LLVMBuildFPToSI(bld->base.builder, addr, 771 int_vec_type, ""); 772 addr = LLVMBuildExtractElement(bld->base.builder, 773 addr, LLVMConstInt(LLVMInt32Type(), 0, 0), 774 ""); 775 addr = LLVMBuildMul(bld->base.builder, 776 addr, LLVMConstInt(LLVMInt32Type(), 4, 0), 777 ""); 778 } 779 780 switch( reg->Register.File ) { 781 case TGSI_FILE_OUTPUT: 782 lp_exec_mask_store(&bld->exec_mask, pred, value, 783 bld->outputs[reg->Register.Index][chan_index]); 784 break; 785 786 case TGSI_FILE_TEMPORARY: 787 if (reg->Register.Indirect) { 788 /* XXX not done yet */ 789 debug_printf("WARNING: LLVM scatter store of temp regs" 790 " not implemented\n"); 791 } 792 else { 793 LLVMValueRef temp_ptr = get_temp_ptr(bld, reg->Register.Index, 794 chan_index); 795 lp_exec_mask_store(&bld->exec_mask, pred, value, temp_ptr); 796 } 797 break; 798 799 case TGSI_FILE_ADDRESS: 800 lp_exec_mask_store(&bld->exec_mask, pred, value, 801 bld->addr[reg->Indirect.Index][chan_index]); 802 break; 803 804 case TGSI_FILE_PREDICATE: 805 lp_exec_mask_store(&bld->exec_mask, pred, value, 806 bld->preds[reg->Register.Index][chan_index]); 807 break; 808 809 default: 810 assert( 0 ); 811 } 812} 813 814 815/** 816 * High-level instruction translators. 817 */ 818 819static void 820emit_tex( struct lp_build_tgsi_soa_context *bld, 821 const struct tgsi_full_instruction *inst, 822 enum lp_build_tex_modifier modifier, 823 LLVMValueRef *texel) 824{ 825 unsigned unit; 826 LLVMValueRef lod_bias, explicit_lod; 827 LLVMValueRef oow = NULL; 828 LLVMValueRef coords[3]; 829 LLVMValueRef ddx[3]; 830 LLVMValueRef ddy[3]; 831 unsigned num_coords; 832 unsigned i; 833 834 if (!bld->sampler) { 835 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 836 for (i = 0; i < 4; i++) { 837 texel[i] = bld->base.undef; 838 } 839 return; 840 } 841 842 switch (inst->Texture.Texture) { 843 case TGSI_TEXTURE_1D: 844 num_coords = 1; 845 break; 846 case TGSI_TEXTURE_2D: 847 case TGSI_TEXTURE_RECT: 848 num_coords = 2; 849 break; 850 case TGSI_TEXTURE_SHADOW1D: 851 case TGSI_TEXTURE_SHADOW2D: 852 case TGSI_TEXTURE_SHADOWRECT: 853 case TGSI_TEXTURE_3D: 854 case TGSI_TEXTURE_CUBE: 855 num_coords = 3; 856 break; 857 default: 858 assert(0); 859 return; 860 } 861 862 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 863 lod_bias = emit_fetch( bld, inst, 0, 3 ); 864 explicit_lod = NULL; 865 } 866 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 867 lod_bias = NULL; 868 explicit_lod = emit_fetch( bld, inst, 0, 3 ); 869 } 870 else { 871 lod_bias = NULL; 872 explicit_lod = NULL; 873 } 874 875 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 876 oow = emit_fetch( bld, inst, 0, 3 ); 877 oow = lp_build_rcp(&bld->base, oow); 878 } 879 880 for (i = 0; i < num_coords; i++) { 881 coords[i] = emit_fetch( bld, inst, 0, i ); 882 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 883 coords[i] = lp_build_mul(&bld->base, coords[i], oow); 884 } 885 for (i = num_coords; i < 3; i++) { 886 coords[i] = bld->base.undef; 887 } 888 889 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 890 for (i = 0; i < num_coords; i++) { 891 ddx[i] = emit_fetch( bld, inst, 1, i ); 892 ddy[i] = emit_fetch( bld, inst, 2, i ); 893 } 894 unit = inst->Src[3].Register.Index; 895 } else { 896 for (i = 0; i < num_coords; i++) { 897 ddx[i] = lp_build_ddx( &bld->base, coords[i] ); 898 ddy[i] = lp_build_ddy( &bld->base, coords[i] ); 899 } 900 unit = inst->Src[1].Register.Index; 901 } 902 for (i = num_coords; i < 3; i++) { 903 ddx[i] = bld->base.undef; 904 ddy[i] = bld->base.undef; 905 } 906 907 bld->sampler->emit_fetch_texel(bld->sampler, 908 bld->base.builder, 909 bld->base.type, 910 unit, num_coords, coords, 911 ddx, ddy, 912 lod_bias, explicit_lod, 913 texel); 914} 915 916 917/** 918 * Kill fragment if any of the src register values are negative. 919 */ 920static void 921emit_kil( 922 struct lp_build_tgsi_soa_context *bld, 923 const struct tgsi_full_instruction *inst ) 924{ 925 const struct tgsi_full_src_register *reg = &inst->Src[0]; 926 LLVMValueRef terms[NUM_CHANNELS]; 927 LLVMValueRef mask; 928 unsigned chan_index; 929 930 memset(&terms, 0, sizeof terms); 931 932 FOR_EACH_CHANNEL( chan_index ) { 933 unsigned swizzle; 934 935 /* Unswizzle channel */ 936 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 937 938 /* Check if the component has not been already tested. */ 939 assert(swizzle < NUM_CHANNELS); 940 if( !terms[swizzle] ) 941 /* TODO: change the comparison operator instead of setting the sign */ 942 terms[swizzle] = emit_fetch(bld, inst, 0, chan_index ); 943 } 944 945 mask = NULL; 946 FOR_EACH_CHANNEL( chan_index ) { 947 if(terms[chan_index]) { 948 LLVMValueRef chan_mask; 949 950 /* 951 * If term < 0 then mask = 0 else mask = ~0. 952 */ 953 chan_mask = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->base.zero); 954 955 if(mask) 956 mask = LLVMBuildAnd(bld->base.builder, mask, chan_mask, ""); 957 else 958 mask = chan_mask; 959 } 960 } 961 962 if(mask) 963 lp_build_mask_update(bld->mask, mask); 964} 965 966 967/** 968 * Predicated fragment kill. 969 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 970 * The only predication is the execution mask which will apply if 971 * we're inside a loop or conditional. 972 */ 973static void 974emit_kilp(struct lp_build_tgsi_soa_context *bld, 975 const struct tgsi_full_instruction *inst) 976{ 977 LLVMValueRef mask; 978 979 /* For those channels which are "alive", disable fragment shader 980 * execution. 981 */ 982 if (bld->exec_mask.has_mask) { 983 mask = LLVMBuildNot(bld->base.builder, bld->exec_mask.exec_mask, "kilp"); 984 } 985 else { 986 mask = bld->base.zero; 987 } 988 989 lp_build_mask_update(bld->mask, mask); 990} 991 992static void 993emit_declaration( 994 struct lp_build_tgsi_soa_context *bld, 995 const struct tgsi_full_declaration *decl) 996{ 997 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.type); 998 999 unsigned first = decl->Range.First; 1000 unsigned last = decl->Range.Last; 1001 unsigned idx, i; 1002 1003 for (idx = first; idx <= last; ++idx) { 1004 switch (decl->Declaration.File) { 1005 case TGSI_FILE_TEMPORARY: 1006 assert(idx < LP_MAX_TGSI_TEMPS); 1007 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 1008 LLVMValueRef array_size = LLVMConstInt(LLVMInt32Type(), 1009 last*4 + 4, 0); 1010 bld->temps_array = lp_build_array_alloca(bld->base.builder, 1011 vec_type, array_size, ""); 1012 } else { 1013 for (i = 0; i < NUM_CHANNELS; i++) 1014 bld->temps[idx][i] = lp_build_alloca(bld->base.builder, 1015 vec_type, ""); 1016 } 1017 break; 1018 1019 case TGSI_FILE_OUTPUT: 1020 for (i = 0; i < NUM_CHANNELS; i++) 1021 bld->outputs[idx][i] = lp_build_alloca(bld->base.builder, 1022 vec_type, ""); 1023 break; 1024 1025 case TGSI_FILE_ADDRESS: 1026 assert(idx < LP_MAX_TGSI_ADDRS); 1027 for (i = 0; i < NUM_CHANNELS; i++) 1028 bld->addr[idx][i] = lp_build_alloca(bld->base.builder, 1029 vec_type, ""); 1030 break; 1031 1032 case TGSI_FILE_PREDICATE: 1033 assert(idx < LP_MAX_TGSI_PREDS); 1034 for (i = 0; i < NUM_CHANNELS; i++) 1035 bld->preds[idx][i] = lp_build_alloca(bld->base.builder, 1036 vec_type, ""); 1037 break; 1038 1039 default: 1040 /* don't need to declare other vars */ 1041 break; 1042 } 1043 } 1044} 1045 1046 1047/** 1048 * Emit LLVM for one TGSI instruction. 1049 * \param return TRUE for success, FALSE otherwise 1050 */ 1051static boolean 1052emit_instruction( 1053 struct lp_build_tgsi_soa_context *bld, 1054 const struct tgsi_full_instruction *inst, 1055 const struct tgsi_opcode_info *info, 1056 int *pc) 1057{ 1058 unsigned chan_index; 1059 LLVMValueRef src0, src1, src2; 1060 LLVMValueRef tmp0, tmp1, tmp2; 1061 LLVMValueRef tmp3 = NULL; 1062 LLVMValueRef tmp4 = NULL; 1063 LLVMValueRef tmp5 = NULL; 1064 LLVMValueRef tmp6 = NULL; 1065 LLVMValueRef tmp7 = NULL; 1066 LLVMValueRef res; 1067 LLVMValueRef dst0[NUM_CHANNELS]; 1068 1069 /* 1070 * Stores and write masks are handled in a general fashion after the long 1071 * instruction opcode switch statement. 1072 * 1073 * Although not stricitly necessary, we avoid generating instructions for 1074 * channels which won't be stored, in cases where's that easy. For some 1075 * complex instructions, like texture sampling, it is more convenient to 1076 * assume a full writemask and then let LLVM optimization passes eliminate 1077 * redundant code. 1078 */ 1079 1080 (*pc)++; 1081 1082 assert(info->num_dst <= 1); 1083 if (info->num_dst) { 1084 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1085 dst0[chan_index] = bld->base.undef; 1086 } 1087 } 1088 1089 switch (inst->Instruction.Opcode) { 1090 case TGSI_OPCODE_ARL: 1091 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1092 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1093 tmp0 = lp_build_floor(&bld->base, tmp0); 1094 dst0[chan_index] = tmp0; 1095 } 1096 break; 1097 1098 case TGSI_OPCODE_MOV: 1099 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1100 dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index ); 1101 } 1102 break; 1103 1104 case TGSI_OPCODE_LIT: 1105 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ) { 1106 dst0[CHAN_X] = bld->base.one; 1107 } 1108 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1109 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1110 dst0[CHAN_Y] = lp_build_max( &bld->base, src0, bld->base.zero); 1111 } 1112 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1113 /* XMM[1] = SrcReg[0].yyyy */ 1114 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1115 /* XMM[1] = max(XMM[1], 0) */ 1116 tmp1 = lp_build_max( &bld->base, tmp1, bld->base.zero); 1117 /* XMM[2] = SrcReg[0].wwww */ 1118 tmp2 = emit_fetch( bld, inst, 0, CHAN_W ); 1119 tmp1 = lp_build_pow( &bld->base, tmp1, tmp2); 1120 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1121 tmp2 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, tmp0, bld->base.zero); 1122 dst0[CHAN_Z] = lp_build_select(&bld->base, tmp2, tmp1, bld->base.zero); 1123 } 1124 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) ) { 1125 dst0[CHAN_W] = bld->base.one; 1126 } 1127 break; 1128 1129 case TGSI_OPCODE_RCP: 1130 /* TGSI_OPCODE_RECIP */ 1131 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1132 res = lp_build_rcp(&bld->base, src0); 1133 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1134 dst0[chan_index] = res; 1135 } 1136 break; 1137 1138 case TGSI_OPCODE_RSQ: 1139 /* TGSI_OPCODE_RECIPSQRT */ 1140 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1141 src0 = lp_build_abs(&bld->base, src0); 1142 res = lp_build_rsqrt(&bld->base, src0); 1143 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1144 dst0[chan_index] = res; 1145 } 1146 break; 1147 1148 case TGSI_OPCODE_EXP: 1149 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1150 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1151 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1152 LLVMValueRef *p_exp2_int_part = NULL; 1153 LLVMValueRef *p_frac_part = NULL; 1154 LLVMValueRef *p_exp2 = NULL; 1155 1156 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1157 1158 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1159 p_exp2_int_part = &tmp0; 1160 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1161 p_frac_part = &tmp1; 1162 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1163 p_exp2 = &tmp2; 1164 1165 lp_build_exp2_approx(&bld->base, src0, p_exp2_int_part, p_frac_part, p_exp2); 1166 1167 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1168 dst0[CHAN_X] = tmp0; 1169 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1170 dst0[CHAN_Y] = tmp1; 1171 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1172 dst0[CHAN_Z] = tmp2; 1173 } 1174 /* dst.w = 1.0 */ 1175 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1176 dst0[CHAN_W] = bld->base.one; 1177 } 1178 break; 1179 1180 case TGSI_OPCODE_LOG: 1181 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1182 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1183 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) { 1184 LLVMValueRef *p_floor_log2 = NULL; 1185 LLVMValueRef *p_exp = NULL; 1186 LLVMValueRef *p_log2 = NULL; 1187 1188 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1189 src0 = lp_build_abs( &bld->base, src0 ); 1190 1191 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1192 p_floor_log2 = &tmp0; 1193 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) 1194 p_exp = &tmp1; 1195 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1196 p_log2 = &tmp2; 1197 1198 lp_build_log2_approx(&bld->base, src0, p_exp, p_floor_log2, p_log2); 1199 1200 /* dst.x = floor(lg2(abs(src.x))) */ 1201 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X )) 1202 dst0[CHAN_X] = tmp0; 1203 /* dst.y = abs(src)/ex2(floor(lg2(abs(src.x)))) */ 1204 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y )) { 1205 dst0[CHAN_Y] = lp_build_div( &bld->base, src0, tmp1); 1206 } 1207 /* dst.z = lg2(abs(src.x)) */ 1208 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) 1209 dst0[CHAN_Z] = tmp2; 1210 } 1211 /* dst.w = 1.0 */ 1212 if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_W )) { 1213 dst0[CHAN_W] = bld->base.one; 1214 } 1215 break; 1216 1217 case TGSI_OPCODE_MUL: 1218 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1219 src0 = emit_fetch( bld, inst, 0, chan_index ); 1220 src1 = emit_fetch( bld, inst, 1, chan_index ); 1221 dst0[chan_index] = lp_build_mul(&bld->base, src0, src1); 1222 } 1223 break; 1224 1225 case TGSI_OPCODE_ADD: 1226 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1227 src0 = emit_fetch( bld, inst, 0, chan_index ); 1228 src1 = emit_fetch( bld, inst, 1, chan_index ); 1229 dst0[chan_index] = lp_build_add(&bld->base, src0, src1); 1230 } 1231 break; 1232 1233 case TGSI_OPCODE_DP3: 1234 /* TGSI_OPCODE_DOT3 */ 1235 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1236 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1237 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1238 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1239 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1240 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1241 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1242 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1243 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1244 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1245 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1246 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1247 dst0[chan_index] = tmp0; 1248 } 1249 break; 1250 1251 case TGSI_OPCODE_DP4: 1252 /* TGSI_OPCODE_DOT4 */ 1253 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1254 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1255 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1256 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1257 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1258 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1259 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1260 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1261 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1262 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1263 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1264 tmp1 = emit_fetch( bld, inst, 0, CHAN_W ); 1265 tmp2 = emit_fetch( bld, inst, 1, CHAN_W ); 1266 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1267 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1268 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1269 dst0[chan_index] = tmp0; 1270 } 1271 break; 1272 1273 case TGSI_OPCODE_DST: 1274 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1275 dst0[CHAN_X] = bld->base.one; 1276 } 1277 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1278 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1279 tmp1 = emit_fetch( bld, inst, 1, CHAN_Y ); 1280 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp0, tmp1); 1281 } 1282 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1283 dst0[CHAN_Z] = emit_fetch( bld, inst, 0, CHAN_Z ); 1284 } 1285 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1286 dst0[CHAN_W] = emit_fetch( bld, inst, 1, CHAN_W ); 1287 } 1288 break; 1289 1290 case TGSI_OPCODE_MIN: 1291 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1292 src0 = emit_fetch( bld, inst, 0, chan_index ); 1293 src1 = emit_fetch( bld, inst, 1, chan_index ); 1294 dst0[chan_index] = lp_build_min( &bld->base, src0, src1 ); 1295 } 1296 break; 1297 1298 case TGSI_OPCODE_MAX: 1299 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1300 src0 = emit_fetch( bld, inst, 0, chan_index ); 1301 src1 = emit_fetch( bld, inst, 1, chan_index ); 1302 dst0[chan_index] = lp_build_max( &bld->base, src0, src1 ); 1303 } 1304 break; 1305 1306 case TGSI_OPCODE_SLT: 1307 /* TGSI_OPCODE_SETLT */ 1308 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1309 src0 = emit_fetch( bld, inst, 0, chan_index ); 1310 src1 = emit_fetch( bld, inst, 1, chan_index ); 1311 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, src1 ); 1312 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1313 } 1314 break; 1315 1316 case TGSI_OPCODE_SGE: 1317 /* TGSI_OPCODE_SETGE */ 1318 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1319 src0 = emit_fetch( bld, inst, 0, chan_index ); 1320 src1 = emit_fetch( bld, inst, 1, chan_index ); 1321 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GEQUAL, src0, src1 ); 1322 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1323 } 1324 break; 1325 1326 case TGSI_OPCODE_MAD: 1327 /* TGSI_OPCODE_MADD */ 1328 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1329 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1330 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1331 tmp2 = emit_fetch( bld, inst, 2, chan_index ); 1332 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1333 tmp0 = lp_build_add( &bld->base, tmp0, tmp2); 1334 dst0[chan_index] = tmp0; 1335 } 1336 break; 1337 1338 case TGSI_OPCODE_SUB: 1339 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1340 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1341 tmp1 = emit_fetch( bld, inst, 1, chan_index ); 1342 dst0[chan_index] = lp_build_sub( &bld->base, tmp0, tmp1); 1343 } 1344 break; 1345 1346 case TGSI_OPCODE_LRP: 1347 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1348 src0 = emit_fetch( bld, inst, 0, chan_index ); 1349 src1 = emit_fetch( bld, inst, 1, chan_index ); 1350 src2 = emit_fetch( bld, inst, 2, chan_index ); 1351 tmp0 = lp_build_sub( &bld->base, src1, src2 ); 1352 tmp0 = lp_build_mul( &bld->base, src0, tmp0 ); 1353 dst0[chan_index] = lp_build_add( &bld->base, tmp0, src2 ); 1354 } 1355 break; 1356 1357 case TGSI_OPCODE_CND: 1358 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1359 src0 = emit_fetch( bld, inst, 0, chan_index ); 1360 src1 = emit_fetch( bld, inst, 1, chan_index ); 1361 src2 = emit_fetch( bld, inst, 2, chan_index ); 1362 tmp1 = lp_build_const_vec(bld->base.type, 0.5); 1363 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src2, tmp1); 1364 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src0, src1 ); 1365 } 1366 break; 1367 1368 case TGSI_OPCODE_DP2A: 1369 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1370 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1371 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1372 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1373 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1374 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1375 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1376 tmp1 = emit_fetch( bld, inst, 2, CHAN_X ); /* xmm1 = src[2].x */ 1377 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1378 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1379 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1380 } 1381 break; 1382 1383 case TGSI_OPCODE_FRC: 1384 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1385 src0 = emit_fetch( bld, inst, 0, chan_index ); 1386 tmp0 = lp_build_floor(&bld->base, src0); 1387 tmp0 = lp_build_sub(&bld->base, src0, tmp0); 1388 dst0[chan_index] = tmp0; 1389 } 1390 break; 1391 1392 case TGSI_OPCODE_CLAMP: 1393 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1394 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1395 src1 = emit_fetch( bld, inst, 1, chan_index ); 1396 src2 = emit_fetch( bld, inst, 2, chan_index ); 1397 tmp0 = lp_build_max(&bld->base, tmp0, src1); 1398 tmp0 = lp_build_min(&bld->base, tmp0, src2); 1399 dst0[chan_index] = tmp0; 1400 } 1401 break; 1402 1403 case TGSI_OPCODE_FLR: 1404 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1405 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1406 dst0[chan_index] = lp_build_floor(&bld->base, tmp0); 1407 } 1408 break; 1409 1410 case TGSI_OPCODE_ROUND: 1411 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1412 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1413 dst0[chan_index] = lp_build_round(&bld->base, tmp0); 1414 } 1415 break; 1416 1417 case TGSI_OPCODE_EX2: { 1418 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1419 tmp0 = lp_build_exp2( &bld->base, tmp0); 1420 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1421 dst0[chan_index] = tmp0; 1422 } 1423 break; 1424 } 1425 1426 case TGSI_OPCODE_LG2: 1427 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1428 tmp0 = lp_build_log2( &bld->base, tmp0); 1429 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1430 dst0[chan_index] = tmp0; 1431 } 1432 break; 1433 1434 case TGSI_OPCODE_POW: 1435 src0 = emit_fetch( bld, inst, 0, CHAN_X ); 1436 src1 = emit_fetch( bld, inst, 1, CHAN_X ); 1437 res = lp_build_pow( &bld->base, src0, src1 ); 1438 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1439 dst0[chan_index] = res; 1440 } 1441 break; 1442 1443 case TGSI_OPCODE_XPD: 1444 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1445 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ) { 1446 tmp1 = emit_fetch( bld, inst, 1, CHAN_Z ); 1447 tmp3 = emit_fetch( bld, inst, 0, CHAN_Z ); 1448 } 1449 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) || 1450 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1451 tmp0 = emit_fetch( bld, inst, 0, CHAN_Y ); 1452 tmp4 = emit_fetch( bld, inst, 1, CHAN_Y ); 1453 } 1454 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1455 tmp2 = tmp0; 1456 tmp2 = lp_build_mul( &bld->base, tmp2, tmp1); 1457 tmp5 = tmp3; 1458 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1459 tmp2 = lp_build_sub( &bld->base, tmp2, tmp5); 1460 dst0[CHAN_X] = tmp2; 1461 } 1462 if( IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) || 1463 IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) ) { 1464 tmp2 = emit_fetch( bld, inst, 1, CHAN_X ); 1465 tmp5 = emit_fetch( bld, inst, 0, CHAN_X ); 1466 } 1467 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1468 tmp3 = lp_build_mul( &bld->base, tmp3, tmp2); 1469 tmp1 = lp_build_mul( &bld->base, tmp1, tmp5); 1470 tmp3 = lp_build_sub( &bld->base, tmp3, tmp1); 1471 dst0[CHAN_Y] = tmp3; 1472 } 1473 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1474 tmp5 = lp_build_mul( &bld->base, tmp5, tmp4); 1475 tmp0 = lp_build_mul( &bld->base, tmp0, tmp2); 1476 tmp5 = lp_build_sub( &bld->base, tmp5, tmp0); 1477 dst0[CHAN_Z] = tmp5; 1478 } 1479 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1480 dst0[CHAN_W] = bld->base.one; 1481 } 1482 break; 1483 1484 case TGSI_OPCODE_ABS: 1485 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1486 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1487 dst0[chan_index] = lp_build_abs( &bld->base, tmp0 ); 1488 } 1489 break; 1490 1491 case TGSI_OPCODE_RCC: 1492 /* deprecated? */ 1493 assert(0); 1494 return FALSE; 1495 1496 case TGSI_OPCODE_DPH: 1497 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1498 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); 1499 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); 1500 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); 1501 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); 1502 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1503 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1504 tmp1 = emit_fetch( bld, inst, 0, CHAN_Z ); 1505 tmp2 = emit_fetch( bld, inst, 1, CHAN_Z ); 1506 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); 1507 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1508 tmp1 = emit_fetch( bld, inst, 1, CHAN_W ); 1509 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1510 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1511 dst0[chan_index] = tmp0; 1512 } 1513 break; 1514 1515 case TGSI_OPCODE_COS: 1516 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1517 tmp0 = lp_build_cos( &bld->base, tmp0 ); 1518 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1519 dst0[chan_index] = tmp0; 1520 } 1521 break; 1522 1523 case TGSI_OPCODE_DDX: 1524 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1525 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, &dst0[chan_index], NULL); 1526 } 1527 break; 1528 1529 case TGSI_OPCODE_DDY: 1530 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1531 emit_fetch_deriv( bld, inst, 0, chan_index, NULL, NULL, &dst0[chan_index]); 1532 } 1533 break; 1534 1535 case TGSI_OPCODE_KILP: 1536 /* predicated kill */ 1537 emit_kilp( bld, inst ); 1538 break; 1539 1540 case TGSI_OPCODE_KIL: 1541 /* conditional kill */ 1542 emit_kil( bld, inst ); 1543 break; 1544 1545 case TGSI_OPCODE_PK2H: 1546 return FALSE; 1547 break; 1548 1549 case TGSI_OPCODE_PK2US: 1550 return FALSE; 1551 break; 1552 1553 case TGSI_OPCODE_PK4B: 1554 return FALSE; 1555 break; 1556 1557 case TGSI_OPCODE_PK4UB: 1558 return FALSE; 1559 break; 1560 1561 case TGSI_OPCODE_RFL: 1562 return FALSE; 1563 break; 1564 1565 case TGSI_OPCODE_SEQ: 1566 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1567 src0 = emit_fetch( bld, inst, 0, chan_index ); 1568 src1 = emit_fetch( bld, inst, 1, chan_index ); 1569 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_EQUAL, src0, src1 ); 1570 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1571 } 1572 break; 1573 1574 case TGSI_OPCODE_SFL: 1575 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1576 dst0[chan_index] = bld->base.zero; 1577 } 1578 break; 1579 1580 case TGSI_OPCODE_SGT: 1581 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1582 src0 = emit_fetch( bld, inst, 0, chan_index ); 1583 src1 = emit_fetch( bld, inst, 1, chan_index ); 1584 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_GREATER, src0, src1 ); 1585 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1586 } 1587 break; 1588 1589 case TGSI_OPCODE_SIN: 1590 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1591 tmp0 = lp_build_sin( &bld->base, tmp0 ); 1592 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1593 dst0[chan_index] = tmp0; 1594 } 1595 break; 1596 1597 case TGSI_OPCODE_SLE: 1598 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1599 src0 = emit_fetch( bld, inst, 0, chan_index ); 1600 src1 = emit_fetch( bld, inst, 1, chan_index ); 1601 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LEQUAL, src0, src1 ); 1602 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1603 } 1604 break; 1605 1606 case TGSI_OPCODE_SNE: 1607 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1608 src0 = emit_fetch( bld, inst, 0, chan_index ); 1609 src1 = emit_fetch( bld, inst, 1, chan_index ); 1610 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_NOTEQUAL, src0, src1 ); 1611 dst0[chan_index] = lp_build_select( &bld->base, tmp0, bld->base.one, bld->base.zero ); 1612 } 1613 break; 1614 1615 case TGSI_OPCODE_STR: 1616 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1617 dst0[chan_index] = bld->base.one; 1618 } 1619 break; 1620 1621 case TGSI_OPCODE_TEX: 1622 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_NONE, dst0 ); 1623 break; 1624 1625 case TGSI_OPCODE_TXD: 1626 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, dst0 ); 1627 break; 1628 1629 case TGSI_OPCODE_UP2H: 1630 /* deprecated */ 1631 assert (0); 1632 return FALSE; 1633 break; 1634 1635 case TGSI_OPCODE_UP2US: 1636 /* deprecated */ 1637 assert(0); 1638 return FALSE; 1639 break; 1640 1641 case TGSI_OPCODE_UP4B: 1642 /* deprecated */ 1643 assert(0); 1644 return FALSE; 1645 break; 1646 1647 case TGSI_OPCODE_UP4UB: 1648 /* deprecated */ 1649 assert(0); 1650 return FALSE; 1651 break; 1652 1653 case TGSI_OPCODE_X2D: 1654 /* deprecated? */ 1655 assert(0); 1656 return FALSE; 1657 break; 1658 1659 case TGSI_OPCODE_ARA: 1660 /* deprecated */ 1661 assert(0); 1662 return FALSE; 1663 break; 1664 1665 case TGSI_OPCODE_ARR: 1666 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1667 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1668 tmp0 = lp_build_round(&bld->base, tmp0); 1669 dst0[chan_index] = tmp0; 1670 } 1671 break; 1672 1673 case TGSI_OPCODE_BRA: 1674 /* deprecated */ 1675 assert(0); 1676 return FALSE; 1677 break; 1678 1679 case TGSI_OPCODE_CAL: 1680 lp_exec_mask_call(&bld->exec_mask, 1681 inst->Label.Label, 1682 pc); 1683 1684 break; 1685 1686 case TGSI_OPCODE_RET: 1687 lp_exec_mask_ret(&bld->exec_mask, pc); 1688 break; 1689 1690 case TGSI_OPCODE_END: 1691 *pc = -1; 1692 break; 1693 1694 case TGSI_OPCODE_SSG: 1695 /* TGSI_OPCODE_SGN */ 1696 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1697 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1698 dst0[chan_index] = lp_build_sgn( &bld->base, tmp0 ); 1699 } 1700 break; 1701 1702 case TGSI_OPCODE_CMP: 1703 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1704 src0 = emit_fetch( bld, inst, 0, chan_index ); 1705 src1 = emit_fetch( bld, inst, 1, chan_index ); 1706 src2 = emit_fetch( bld, inst, 2, chan_index ); 1707 tmp0 = lp_build_cmp( &bld->base, PIPE_FUNC_LESS, src0, bld->base.zero ); 1708 dst0[chan_index] = lp_build_select( &bld->base, tmp0, src1, src2); 1709 } 1710 break; 1711 1712 case TGSI_OPCODE_SCS: 1713 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) { 1714 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1715 dst0[CHAN_X] = lp_build_cos( &bld->base, tmp0 ); 1716 } 1717 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) { 1718 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); 1719 dst0[CHAN_Y] = lp_build_sin( &bld->base, tmp0 ); 1720 } 1721 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z ) { 1722 dst0[CHAN_Z] = bld->base.zero; 1723 } 1724 IF_IS_DST0_CHANNEL_ENABLED( inst, CHAN_W ) { 1725 dst0[CHAN_W] = bld->base.one; 1726 } 1727 break; 1728 1729 case TGSI_OPCODE_TXB: 1730 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, dst0 ); 1731 break; 1732 1733 case TGSI_OPCODE_NRM: 1734 /* fall-through */ 1735 case TGSI_OPCODE_NRM4: 1736 /* 3 or 4-component normalization */ 1737 { 1738 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1739 1740 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) || 1741 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y) || 1742 IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z) || 1743 (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 4)) { 1744 1745 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1746 1747 /* xmm4 = src.x */ 1748 /* xmm0 = src.x * src.x */ 1749 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1750 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1751 tmp4 = tmp0; 1752 } 1753 tmp0 = lp_build_mul( &bld->base, tmp0, tmp0); 1754 1755 /* xmm5 = src.y */ 1756 /* xmm0 = xmm0 + src.y * src.y */ 1757 tmp1 = emit_fetch(bld, inst, 0, CHAN_Y); 1758 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1759 tmp5 = tmp1; 1760 } 1761 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1762 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1763 1764 /* xmm6 = src.z */ 1765 /* xmm0 = xmm0 + src.z * src.z */ 1766 tmp1 = emit_fetch(bld, inst, 0, CHAN_Z); 1767 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1768 tmp6 = tmp1; 1769 } 1770 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1771 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1772 1773 if (dims == 4) { 1774 /* xmm7 = src.w */ 1775 /* xmm0 = xmm0 + src.w * src.w */ 1776 tmp1 = emit_fetch(bld, inst, 0, CHAN_W); 1777 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W)) { 1778 tmp7 = tmp1; 1779 } 1780 tmp1 = lp_build_mul( &bld->base, tmp1, tmp1); 1781 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); 1782 } 1783 1784 /* xmm1 = 1 / sqrt(xmm0) */ 1785 tmp1 = lp_build_rsqrt( &bld->base, tmp0); 1786 1787 /* dst.x = xmm1 * src.x */ 1788 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X)) { 1789 dst0[CHAN_X] = lp_build_mul( &bld->base, tmp4, tmp1); 1790 } 1791 1792 /* dst.y = xmm1 * src.y */ 1793 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Y)) { 1794 dst0[CHAN_Y] = lp_build_mul( &bld->base, tmp5, tmp1); 1795 } 1796 1797 /* dst.z = xmm1 * src.z */ 1798 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_Z)) { 1799 dst0[CHAN_Z] = lp_build_mul( &bld->base, tmp6, tmp1); 1800 } 1801 1802 /* dst.w = xmm1 * src.w */ 1803 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_X) && dims == 4) { 1804 dst0[CHAN_W] = lp_build_mul( &bld->base, tmp7, tmp1); 1805 } 1806 } 1807 1808 /* dst.w = 1.0 */ 1809 if (IS_DST0_CHANNEL_ENABLED(inst, CHAN_W) && dims == 3) { 1810 dst0[CHAN_W] = bld->base.one; 1811 } 1812 } 1813 break; 1814 1815 case TGSI_OPCODE_DIV: 1816 /* deprecated */ 1817 assert( 0 ); 1818 return FALSE; 1819 break; 1820 1821 case TGSI_OPCODE_DP2: 1822 tmp0 = emit_fetch( bld, inst, 0, CHAN_X ); /* xmm0 = src[0].x */ 1823 tmp1 = emit_fetch( bld, inst, 1, CHAN_X ); /* xmm1 = src[1].x */ 1824 tmp0 = lp_build_mul( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 * xmm1 */ 1825 tmp1 = emit_fetch( bld, inst, 0, CHAN_Y ); /* xmm1 = src[0].y */ 1826 tmp2 = emit_fetch( bld, inst, 1, CHAN_Y ); /* xmm2 = src[1].y */ 1827 tmp1 = lp_build_mul( &bld->base, tmp1, tmp2); /* xmm1 = xmm1 * xmm2 */ 1828 tmp0 = lp_build_add( &bld->base, tmp0, tmp1); /* xmm0 = xmm0 + xmm1 */ 1829 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1830 dst0[chan_index] = tmp0; /* dest[ch] = xmm0 */ 1831 } 1832 break; 1833 1834 case TGSI_OPCODE_TXL: 1835 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, dst0 ); 1836 break; 1837 1838 case TGSI_OPCODE_TXP: 1839 emit_tex( bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED, dst0 ); 1840 break; 1841 1842 case TGSI_OPCODE_BRK: 1843 lp_exec_break(&bld->exec_mask); 1844 break; 1845 1846 case TGSI_OPCODE_IF: 1847 tmp0 = emit_fetch(bld, inst, 0, CHAN_X); 1848 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, 1849 tmp0, bld->base.zero); 1850 lp_exec_mask_cond_push(&bld->exec_mask, tmp0); 1851 break; 1852 1853 case TGSI_OPCODE_BGNLOOP: 1854 lp_exec_bgnloop(&bld->exec_mask); 1855 break; 1856 1857 case TGSI_OPCODE_BGNSUB: 1858 lp_exec_mask_bgnsub(&bld->exec_mask); 1859 break; 1860 1861 case TGSI_OPCODE_ELSE: 1862 lp_exec_mask_cond_invert(&bld->exec_mask); 1863 break; 1864 1865 case TGSI_OPCODE_ENDIF: 1866 lp_exec_mask_cond_pop(&bld->exec_mask); 1867 break; 1868 1869 case TGSI_OPCODE_ENDLOOP: 1870 lp_exec_endloop(&bld->exec_mask); 1871 break; 1872 1873 case TGSI_OPCODE_ENDSUB: 1874 lp_exec_mask_endsub(&bld->exec_mask, pc); 1875 break; 1876 1877 case TGSI_OPCODE_PUSHA: 1878 /* deprecated? */ 1879 assert(0); 1880 return FALSE; 1881 break; 1882 1883 case TGSI_OPCODE_POPA: 1884 /* deprecated? */ 1885 assert(0); 1886 return FALSE; 1887 break; 1888 1889 case TGSI_OPCODE_CEIL: 1890 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1891 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1892 dst0[chan_index] = lp_build_ceil(&bld->base, tmp0); 1893 } 1894 break; 1895 1896 case TGSI_OPCODE_I2F: 1897 /* deprecated? */ 1898 assert(0); 1899 return FALSE; 1900 break; 1901 1902 case TGSI_OPCODE_NOT: 1903 /* deprecated? */ 1904 assert(0); 1905 return FALSE; 1906 break; 1907 1908 case TGSI_OPCODE_TRUNC: 1909 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1910 tmp0 = emit_fetch( bld, inst, 0, chan_index ); 1911 dst0[chan_index] = lp_build_trunc(&bld->base, tmp0); 1912 } 1913 break; 1914 1915 case TGSI_OPCODE_SHL: 1916 /* deprecated? */ 1917 assert(0); 1918 return FALSE; 1919 break; 1920 1921 case TGSI_OPCODE_ISHR: 1922 /* deprecated? */ 1923 assert(0); 1924 return FALSE; 1925 break; 1926 1927 case TGSI_OPCODE_AND: 1928 /* deprecated? */ 1929 assert(0); 1930 return FALSE; 1931 break; 1932 1933 case TGSI_OPCODE_OR: 1934 /* deprecated? */ 1935 assert(0); 1936 return FALSE; 1937 break; 1938 1939 case TGSI_OPCODE_MOD: 1940 /* deprecated? */ 1941 assert(0); 1942 return FALSE; 1943 break; 1944 1945 case TGSI_OPCODE_XOR: 1946 /* deprecated? */ 1947 assert(0); 1948 return FALSE; 1949 break; 1950 1951 case TGSI_OPCODE_SAD: 1952 /* deprecated? */ 1953 assert(0); 1954 return FALSE; 1955 break; 1956 1957 case TGSI_OPCODE_TXF: 1958 /* deprecated? */ 1959 assert(0); 1960 return FALSE; 1961 break; 1962 1963 case TGSI_OPCODE_TXQ: 1964 /* deprecated? */ 1965 assert(0); 1966 return FALSE; 1967 break; 1968 1969 case TGSI_OPCODE_CONT: 1970 lp_exec_continue(&bld->exec_mask); 1971 break; 1972 1973 case TGSI_OPCODE_EMIT: 1974 return FALSE; 1975 break; 1976 1977 case TGSI_OPCODE_ENDPRIM: 1978 return FALSE; 1979 break; 1980 1981 case TGSI_OPCODE_NOP: 1982 break; 1983 1984 default: 1985 return FALSE; 1986 } 1987 1988 if(info->num_dst) { 1989 LLVMValueRef pred[NUM_CHANNELS]; 1990 1991 emit_fetch_predicate( bld, inst, pred ); 1992 1993 FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1994 emit_store( bld, inst, 0, chan_index, pred[chan_index], dst0[chan_index]); 1995 } 1996 } 1997 1998 return TRUE; 1999} 2000 2001 2002void 2003lp_build_tgsi_soa(LLVMBuilderRef builder, 2004 const struct tgsi_token *tokens, 2005 struct lp_type type, 2006 struct lp_build_mask_context *mask, 2007 LLVMValueRef consts_ptr, 2008 const LLVMValueRef *pos, 2009 const LLVMValueRef (*inputs)[NUM_CHANNELS], 2010 LLVMValueRef (*outputs)[NUM_CHANNELS], 2011 struct lp_build_sampler_soa *sampler, 2012 const struct tgsi_shader_info *info) 2013{ 2014 struct lp_build_tgsi_soa_context bld; 2015 struct tgsi_parse_context parse; 2016 uint num_immediates = 0; 2017 uint num_instructions = 0; 2018 unsigned i; 2019 int pc = 0; 2020 2021 /* Setup build context */ 2022 memset(&bld, 0, sizeof bld); 2023 lp_build_context_init(&bld.base, builder, type); 2024 lp_build_context_init(&bld.int_bld, builder, lp_int_type(type)); 2025 bld.mask = mask; 2026 bld.pos = pos; 2027 bld.inputs = inputs; 2028 bld.outputs = outputs; 2029 bld.consts_ptr = consts_ptr; 2030 bld.sampler = sampler; 2031 bld.indirect_files = info->indirect_files; 2032 bld.instructions = (struct tgsi_full_instruction *) 2033 MALLOC( LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction) ); 2034 bld.max_instructions = LP_MAX_INSTRUCTIONS; 2035 2036 if (!bld.instructions) { 2037 return; 2038 } 2039 2040 lp_exec_mask_init(&bld.exec_mask, &bld.base); 2041 2042 tgsi_parse_init( &parse, tokens ); 2043 2044 while( !tgsi_parse_end_of_tokens( &parse ) ) { 2045 tgsi_parse_token( &parse ); 2046 2047 switch( parse.FullToken.Token.Type ) { 2048 case TGSI_TOKEN_TYPE_DECLARATION: 2049 /* Inputs already interpolated */ 2050 emit_declaration( &bld, &parse.FullToken.FullDeclaration ); 2051 break; 2052 2053 case TGSI_TOKEN_TYPE_INSTRUCTION: 2054 { 2055 /* save expanded instruction */ 2056 if (num_instructions == bld.max_instructions) { 2057 struct tgsi_full_instruction *instructions; 2058 instructions = REALLOC(bld.instructions, 2059 bld.max_instructions 2060 * sizeof(struct tgsi_full_instruction), 2061 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 2062 * sizeof(struct tgsi_full_instruction)); 2063 if (!instructions) { 2064 break; 2065 } 2066 bld.instructions = instructions; 2067 bld.max_instructions += LP_MAX_INSTRUCTIONS; 2068 } 2069 2070 memcpy(bld.instructions + num_instructions, 2071 &parse.FullToken.FullInstruction, 2072 sizeof(bld.instructions[0])); 2073 2074 num_instructions++; 2075 } 2076 2077 break; 2078 2079 case TGSI_TOKEN_TYPE_IMMEDIATE: 2080 /* simply copy the immediate values into the next immediates[] slot */ 2081 { 2082 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 2083 assert(size <= 4); 2084 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 2085 for( i = 0; i < size; ++i ) 2086 bld.immediates[num_immediates][i] = 2087 lp_build_const_vec(type, parse.FullToken.FullImmediate.u[i].Float); 2088 for( i = size; i < 4; ++i ) 2089 bld.immediates[num_immediates][i] = bld.base.undef; 2090 num_immediates++; 2091 } 2092 break; 2093 2094 case TGSI_TOKEN_TYPE_PROPERTY: 2095 break; 2096 2097 default: 2098 assert( 0 ); 2099 } 2100 } 2101 2102 while (pc != -1) { 2103 struct tgsi_full_instruction *instr = bld.instructions + pc; 2104 const struct tgsi_opcode_info *opcode_info = 2105 tgsi_get_opcode_info(instr->Instruction.Opcode); 2106 if (!emit_instruction( &bld, instr, opcode_info, &pc )) 2107 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 2108 opcode_info->mnemonic); 2109 } 2110 2111 if (0) { 2112 LLVMBasicBlockRef block = LLVMGetInsertBlock(builder); 2113 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2114 debug_printf("11111111111111111111111111111 \n"); 2115 tgsi_dump(tokens, 0); 2116 lp_debug_dump_value(function); 2117 debug_printf("2222222222222222222222222222 \n"); 2118 } 2119 tgsi_parse_free( &parse ); 2120 2121 if (0) { 2122 LLVMModuleRef module = LLVMGetGlobalParent( 2123 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); 2124 LLVMDumpModule(module); 2125 2126 } 2127 2128 FREE( bld.instructions ); 2129} 2130 2131