lp_bld_tgsi_aos.c revision 8b3c99a5ebbc2f8b586d8ae2bd9aa5c55bbf3f04
1/************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * TGSI to LLVM IR translation -- AoS. 31 * 32 * FIXME: 33 * - No control flow support: the existing control flow code should be factored 34 * out into from the SoA code into a common module and shared. 35 * - No derivatives. Derivate logic should be pluggable, just like the samplers. 36 * 37 * @author Jose Fonseca <jfonseca@vmware.com> 38 */ 39 40#include "pipe/p_config.h" 41#include "pipe/p_shader_tokens.h" 42#include "util/u_debug.h" 43#include "util/u_math.h" 44#include "util/u_memory.h" 45#include "tgsi/tgsi_dump.h" 46#include "tgsi/tgsi_info.h" 47#include "tgsi/tgsi_parse.h" 48#include "tgsi/tgsi_util.h" 49#include "tgsi/tgsi_scan.h" 50#include "lp_bld_type.h" 51#include "lp_bld_const.h" 52#include "lp_bld_arit.h" 53#include "lp_bld_logic.h" 54#include "lp_bld_swizzle.h" 55#include "lp_bld_flow.h" 56#include "lp_bld_quad.h" 57#include "lp_bld_tgsi.h" 58#include "lp_bld_limits.h" 59#include "lp_bld_debug.h" 60 61 62#define LP_MAX_INSTRUCTIONS 256 63 64 65struct lp_build_tgsi_aos_context 66{ 67 struct lp_build_context base; 68 69 /* Builder for integer masks and indices */ 70 struct lp_build_context int_bld; 71 72 /* 73 * AoS swizzle used: 74 * - swizzles[0] = red index 75 * - swizzles[1] = green index 76 * - swizzles[2] = blue index 77 * - swizzles[3] = alpha index 78 */ 79 unsigned char swizzles[4]; 80 unsigned char inv_swizzles[4]; 81 82 LLVMValueRef consts_ptr; 83 const LLVMValueRef *inputs; 84 LLVMValueRef *outputs; 85 86 struct lp_build_sampler_aos *sampler; 87 88 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES]; 89 LLVMValueRef temps[LP_MAX_TGSI_TEMPS]; 90 LLVMValueRef addr[LP_MAX_TGSI_ADDRS]; 91 LLVMValueRef preds[LP_MAX_TGSI_PREDS]; 92 93 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 94 * set in the indirect_files field. 95 * The temps[] array above is unused then. 96 */ 97 LLVMValueRef temps_array; 98 99 /** bitmask indicating which register files are accessed indirectly */ 100 unsigned indirect_files; 101 102 struct tgsi_full_instruction *instructions; 103 uint max_instructions; 104}; 105 106 107/** 108 * Wrapper around lp_build_swizzle_aos which translates swizzles to another 109 * ordering. 110 */ 111static LLVMValueRef 112swizzle_aos(struct lp_build_tgsi_aos_context *bld, 113 LLVMValueRef a, 114 unsigned swizzle_x, 115 unsigned swizzle_y, 116 unsigned swizzle_z, 117 unsigned swizzle_w) 118{ 119 unsigned char swizzles[4]; 120 121 assert(swizzle_x < 4); 122 assert(swizzle_y < 4); 123 assert(swizzle_z < 4); 124 assert(swizzle_w < 4); 125 126 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x]; 127 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y]; 128 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z]; 129 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w]; 130 131 return lp_build_swizzle_aos(&bld->base, a, swizzles); 132} 133 134 135static LLVMValueRef 136swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld, 137 LLVMValueRef a, 138 unsigned chan) 139{ 140 chan = bld->swizzles[chan]; 141 return lp_build_swizzle_scalar_aos(&bld->base, a, chan); 142} 143 144 145/** 146 * Register fetch. 147 */ 148static LLVMValueRef 149emit_fetch( 150 struct lp_build_tgsi_aos_context *bld, 151 const struct tgsi_full_instruction *inst, 152 unsigned src_op) 153{ 154 LLVMBuilderRef builder = bld->base.gallivm->builder; 155 struct lp_type type = bld->base.type; 156 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 157 LLVMValueRef res; 158 unsigned chan; 159 160 assert(!reg->Register.Indirect); 161 162 /* 163 * Fetch the from the register file. 164 */ 165 166 switch (reg->Register.File) { 167 case TGSI_FILE_CONSTANT: 168 /* 169 * Get the constants components 170 */ 171 172 res = bld->base.undef; 173 for (chan = 0; chan < 4; ++chan) { 174 LLVMValueRef index; 175 LLVMValueRef scalar_ptr; 176 LLVMValueRef scalar; 177 LLVMValueRef swizzle; 178 179 index = lp_build_const_int32(bld->base.gallivm, reg->Register.Index * 4 + chan); 180 181 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, 182 &index, 1, ""); 183 184 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 185 186 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); 187 188 /* 189 * NOTE: constants array is always assumed to be RGBA 190 */ 191 192 swizzle = lp_build_const_int32(bld->base.gallivm, bld->swizzles[chan]); 193 194 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, ""); 195 } 196 197 /* 198 * Broadcast the first quaternion to all others. 199 * 200 * XXX: could be factored into a reusable function. 201 */ 202 203 if (type.length > 4) { 204 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 205 unsigned i; 206 207 for (chan = 0; chan < 4; ++chan) { 208 shuffles[chan] = lp_build_const_int32(bld->base.gallivm, chan); 209 } 210 211 for (i = 4; i < type.length; ++i) { 212 shuffles[i] = shuffles[i % 4]; 213 } 214 215 res = LLVMBuildShuffleVector(builder, 216 res, bld->base.undef, 217 LLVMConstVector(shuffles, type.length), 218 ""); 219 } 220 break; 221 222 case TGSI_FILE_IMMEDIATE: 223 res = bld->immediates[reg->Register.Index]; 224 assert(res); 225 break; 226 227 case TGSI_FILE_INPUT: 228 res = bld->inputs[reg->Register.Index]; 229 assert(res); 230 break; 231 232 case TGSI_FILE_TEMPORARY: 233 { 234 LLVMValueRef temp_ptr; 235 temp_ptr = bld->temps[reg->Register.Index]; 236 res = LLVMBuildLoad(builder, temp_ptr, ""); 237 if (!res) 238 return bld->base.undef; 239 } 240 break; 241 242 default: 243 assert(0 && "invalid src register in emit_fetch()"); 244 return bld->base.undef; 245 } 246 247 /* 248 * Apply sign modifier. 249 */ 250 251 if (reg->Register.Absolute) { 252 res = lp_build_abs(&bld->base, res); 253 } 254 255 if(reg->Register.Negate) { 256 res = lp_build_negate(&bld->base, res); 257 } 258 259 /* 260 * Swizzle the argument 261 */ 262 263 res = swizzle_aos(bld, res, 264 reg->Register.SwizzleX, 265 reg->Register.SwizzleY, 266 reg->Register.SwizzleZ, 267 reg->Register.SwizzleW); 268 269 return res; 270} 271 272 273/** 274 * Register store. 275 */ 276static void 277emit_store( 278 struct lp_build_tgsi_aos_context *bld, 279 const struct tgsi_full_instruction *inst, 280 unsigned index, 281 LLVMValueRef value) 282{ 283 LLVMBuilderRef builder = bld->base.gallivm->builder; 284 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 285 LLVMValueRef mask = NULL; 286 LLVMValueRef ptr; 287 288 /* 289 * Saturate the value 290 */ 291 292 switch (inst->Instruction.Saturate) { 293 case TGSI_SAT_NONE: 294 break; 295 296 case TGSI_SAT_ZERO_ONE: 297 value = lp_build_max(&bld->base, value, bld->base.zero); 298 value = lp_build_min(&bld->base, value, bld->base.one); 299 break; 300 301 case TGSI_SAT_MINUS_PLUS_ONE: 302 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0)); 303 value = lp_build_min(&bld->base, value, bld->base.one); 304 break; 305 306 default: 307 assert(0); 308 } 309 310 /* 311 * Translate the register file 312 */ 313 314 assert(!reg->Register.Indirect); 315 316 switch (reg->Register.File) { 317 case TGSI_FILE_OUTPUT: 318 ptr = bld->outputs[reg->Register.Index]; 319 break; 320 321 case TGSI_FILE_TEMPORARY: 322 ptr = bld->temps[reg->Register.Index]; 323 break; 324 325 case TGSI_FILE_ADDRESS: 326 ptr = bld->addr[reg->Indirect.Index]; 327 break; 328 329 case TGSI_FILE_PREDICATE: 330 ptr = bld->preds[reg->Register.Index]; 331 break; 332 333 default: 334 assert(0); 335 return; 336 } 337 338 /* 339 * Predicate 340 */ 341 342 if (inst->Instruction.Predicate) { 343 LLVMValueRef pred; 344 345 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS); 346 347 pred = LLVMBuildLoad(builder, 348 bld->preds[inst->Predicate.Index], ""); 349 350 /* 351 * Convert the value to an integer mask. 352 */ 353 pred = lp_build_compare(bld->base.gallivm, 354 bld->base.type, 355 PIPE_FUNC_NOTEQUAL, 356 pred, 357 bld->base.zero); 358 359 if (inst->Predicate.Negate) { 360 pred = LLVMBuildNot(builder, pred, ""); 361 } 362 363 pred = swizzle_aos(bld, pred, 364 inst->Predicate.SwizzleX, 365 inst->Predicate.SwizzleY, 366 inst->Predicate.SwizzleZ, 367 inst->Predicate.SwizzleW); 368 369 if (mask) { 370 mask = LLVMBuildAnd(builder, mask, pred, ""); 371 } else { 372 mask = pred; 373 } 374 } 375 376 /* 377 * Writemask 378 */ 379 380 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { 381 LLVMValueRef writemask; 382 383 writemask = lp_build_const_mask_aos(bld->base.gallivm, bld->base.type, 384 reg->Register.WriteMask); 385 386 if (mask) { 387 mask = LLVMBuildAnd(builder, mask, writemask, ""); 388 } else { 389 mask = writemask; 390 } 391 } 392 393 if (mask) { 394 LLVMValueRef orig_value; 395 396 orig_value = LLVMBuildLoad(builder, ptr, ""); 397 value = lp_build_select(&bld->base, 398 mask, value, orig_value); 399 } 400 401 LLVMBuildStore(builder, value, ptr); 402} 403 404 405/** 406 * High-level instruction translators. 407 */ 408 409static LLVMValueRef 410emit_tex(struct lp_build_tgsi_aos_context *bld, 411 const struct tgsi_full_instruction *inst, 412 enum lp_build_tex_modifier modifier) 413{ 414 unsigned target; 415 unsigned unit; 416 LLVMValueRef coords; 417 LLVMValueRef ddx; 418 LLVMValueRef ddy; 419 420 if (!bld->sampler) { 421 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 422 return bld->base.undef; 423 } 424 425 target = inst->Texture.Texture; 426 427 coords = emit_fetch( bld, inst, 0 ); 428 429 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 430 ddx = emit_fetch( bld, inst, 1 ); 431 ddy = emit_fetch( bld, inst, 2 ); 432 unit = inst->Src[3].Register.Index; 433 } else { 434#if 0 435 ddx = lp_build_ddx( &bld->base, coords ); 436 ddy = lp_build_ddy( &bld->base, coords ); 437#else 438 /* TODO */ 439 ddx = bld->base.one; 440 ddy = bld->base.one; 441#endif 442 unit = inst->Src[1].Register.Index; 443 } 444 445 return bld->sampler->emit_fetch_texel(bld->sampler, 446 &bld->base, 447 target, unit, 448 coords, ddx, ddy, 449 modifier); 450} 451 452 453static void 454emit_declaration( 455 struct lp_build_tgsi_aos_context *bld, 456 const struct tgsi_full_declaration *decl) 457{ 458 struct gallivm_state *gallivm = bld->base.gallivm; 459 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.gallivm, bld->base.type); 460 461 unsigned first = decl->Range.First; 462 unsigned last = decl->Range.Last; 463 unsigned idx; 464 465 for (idx = first; idx <= last; ++idx) { 466 switch (decl->Declaration.File) { 467 case TGSI_FILE_TEMPORARY: 468 assert(idx < LP_MAX_TGSI_TEMPS); 469 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 470 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1); 471 bld->temps_array = lp_build_array_alloca(bld->base.gallivm, 472 vec_type, array_size, ""); 473 } else { 474 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, ""); 475 } 476 break; 477 478 case TGSI_FILE_OUTPUT: 479 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, ""); 480 break; 481 482 case TGSI_FILE_ADDRESS: 483 assert(idx < LP_MAX_TGSI_ADDRS); 484 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, ""); 485 break; 486 487 case TGSI_FILE_PREDICATE: 488 assert(idx < LP_MAX_TGSI_PREDS); 489 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, ""); 490 break; 491 492 default: 493 /* don't need to declare other vars */ 494 break; 495 } 496 } 497} 498 499 500/** 501 * Emit LLVM for one TGSI instruction. 502 * \param return TRUE for success, FALSE otherwise 503 */ 504static boolean 505emit_instruction( 506 struct lp_build_tgsi_aos_context *bld, 507 const struct tgsi_full_instruction *inst, 508 const struct tgsi_opcode_info *info, 509 int *pc) 510{ 511 LLVMValueRef src0, src1, src2; 512 LLVMValueRef tmp0, tmp1; 513 LLVMValueRef dst0 = NULL; 514 515 /* 516 * Stores and write masks are handled in a general fashion after the long 517 * instruction opcode switch statement. 518 * 519 * Although not stricitly necessary, we avoid generating instructions for 520 * channels which won't be stored, in cases where's that easy. For some 521 * complex instructions, like texture sampling, it is more convenient to 522 * assume a full writemask and then let LLVM optimization passes eliminate 523 * redundant code. 524 */ 525 526 (*pc)++; 527 528 assert(info->num_dst <= 1); 529 if (info->num_dst) { 530 dst0 = bld->base.undef; 531 } 532 533 switch (inst->Instruction.Opcode) { 534 case TGSI_OPCODE_ARL: 535 src0 = emit_fetch(bld, inst, 0); 536 dst0 = lp_build_floor(&bld->base, src0); 537 break; 538 539 case TGSI_OPCODE_MOV: 540 dst0 = emit_fetch(bld, inst, 0); 541 break; 542 543 case TGSI_OPCODE_LIT: 544 return FALSE; 545 546 case TGSI_OPCODE_RCP: 547 /* TGSI_OPCODE_RECIP */ 548 src0 = emit_fetch(bld, inst, 0); 549 dst0 = lp_build_rcp(&bld->base, src0); 550 break; 551 552 case TGSI_OPCODE_RSQ: 553 /* TGSI_OPCODE_RECIPSQRT */ 554 src0 = emit_fetch(bld, inst, 0); 555 tmp0 = lp_build_abs(&bld->base, src0); 556 dst0 = lp_build_rsqrt(&bld->base, tmp0); 557 break; 558 559 case TGSI_OPCODE_EXP: 560 return FALSE; 561 562 case TGSI_OPCODE_LOG: 563 return FALSE; 564 565 case TGSI_OPCODE_MUL: 566 src0 = emit_fetch(bld, inst, 0); 567 src1 = emit_fetch(bld, inst, 1); 568 dst0 = lp_build_mul(&bld->base, src0, src1); 569 break; 570 571 case TGSI_OPCODE_ADD: 572 src0 = emit_fetch(bld, inst, 0); 573 src1 = emit_fetch(bld, inst, 1); 574 dst0 = lp_build_add(&bld->base, src0, src1); 575 break; 576 577 case TGSI_OPCODE_DP3: 578 /* TGSI_OPCODE_DOT3 */ 579 return FALSE; 580 581 case TGSI_OPCODE_DP4: 582 /* TGSI_OPCODE_DOT4 */ 583 return FALSE; 584 585 case TGSI_OPCODE_DST: 586 return FALSE; 587 588 case TGSI_OPCODE_MIN: 589 src0 = emit_fetch(bld, inst, 0); 590 src1 = emit_fetch(bld, inst, 1); 591 dst0 = lp_build_max(&bld->base, src0, src1); 592 break; 593 594 case TGSI_OPCODE_MAX: 595 src0 = emit_fetch(bld, inst, 0); 596 src1 = emit_fetch(bld, inst, 1); 597 dst0 = lp_build_max(&bld->base, src0, src1); 598 break; 599 600 case TGSI_OPCODE_SLT: 601 /* TGSI_OPCODE_SETLT */ 602 src0 = emit_fetch(bld, inst, 0); 603 src1 = emit_fetch(bld, inst, 1); 604 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1); 605 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); 606 break; 607 608 case TGSI_OPCODE_SGE: 609 /* TGSI_OPCODE_SETGE */ 610 src0 = emit_fetch(bld, inst, 0); 611 src1 = emit_fetch(bld, inst, 1); 612 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1); 613 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); 614 break; 615 616 case TGSI_OPCODE_MAD: 617 /* TGSI_OPCODE_MADD */ 618 src0 = emit_fetch(bld, inst, 0); 619 src1 = emit_fetch(bld, inst, 1); 620 src2 = emit_fetch(bld, inst, 2); 621 tmp0 = lp_build_mul(&bld->base, src0, src1); 622 dst0 = lp_build_add(&bld->base, tmp0, src2); 623 break; 624 625 case TGSI_OPCODE_SUB: 626 src0 = emit_fetch(bld, inst, 0); 627 src1 = emit_fetch(bld, inst, 1); 628 dst0 = lp_build_sub(&bld->base, src0, src1); 629 break; 630 631 case TGSI_OPCODE_LRP: 632 src0 = emit_fetch(bld, inst, 0); 633 src1 = emit_fetch(bld, inst, 1); 634 src2 = emit_fetch(bld, inst, 2); 635 tmp0 = lp_build_sub(&bld->base, src1, src2); 636 tmp0 = lp_build_mul(&bld->base, src0, tmp0); 637 dst0 = lp_build_add(&bld->base, tmp0, src2); 638 break; 639 640 case TGSI_OPCODE_CND: 641 src0 = emit_fetch(bld, inst, 0); 642 src1 = emit_fetch(bld, inst, 1); 643 src2 = emit_fetch(bld, inst, 2); 644 tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5); 645 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1); 646 dst0 = lp_build_select(&bld->base, tmp0, src0, src1); 647 break; 648 649 case TGSI_OPCODE_DP2A: 650 return FALSE; 651 652 case TGSI_OPCODE_FRC: 653 src0 = emit_fetch(bld, inst, 0); 654 tmp0 = lp_build_floor(&bld->base, src0); 655 dst0 = lp_build_sub(&bld->base, src0, tmp0); 656 break; 657 658 case TGSI_OPCODE_CLAMP: 659 src0 = emit_fetch(bld, inst, 0); 660 src1 = emit_fetch(bld, inst, 1); 661 src2 = emit_fetch(bld, inst, 2); 662 tmp0 = lp_build_max(&bld->base, src0, src1); 663 dst0 = lp_build_min(&bld->base, tmp0, src2); 664 break; 665 666 case TGSI_OPCODE_FLR: 667 src0 = emit_fetch(bld, inst, 0); 668 dst0 = lp_build_floor(&bld->base, src0); 669 break; 670 671 case TGSI_OPCODE_ROUND: 672 src0 = emit_fetch(bld, inst, 0); 673 dst0 = lp_build_round(&bld->base, src0); 674 break; 675 676 case TGSI_OPCODE_EX2: 677 src0 = emit_fetch(bld, inst, 0); 678 tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X); 679 dst0 = lp_build_exp2(&bld->base, tmp0); 680 break; 681 682 case TGSI_OPCODE_LG2: 683 src0 = emit_fetch(bld, inst, 0); 684 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 685 dst0 = lp_build_log2(&bld->base, tmp0); 686 break; 687 688 case TGSI_OPCODE_POW: 689 src0 = emit_fetch(bld, inst, 0); 690 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 691 src1 = emit_fetch(bld, inst, 1); 692 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X); 693 dst0 = lp_build_pow(&bld->base, src0, src1); 694 break; 695 696 case TGSI_OPCODE_XPD: 697 return FALSE; 698 699 case TGSI_OPCODE_ABS: 700 src0 = emit_fetch(bld, inst, 0); 701 dst0 = lp_build_abs(&bld->base, src0); 702 break; 703 704 case TGSI_OPCODE_RCC: 705 /* deprecated? */ 706 assert(0); 707 return FALSE; 708 709 case TGSI_OPCODE_DPH: 710 return FALSE; 711 712 case TGSI_OPCODE_COS: 713 src0 = emit_fetch(bld, inst, 0); 714 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 715 dst0 = lp_build_cos(&bld->base, tmp0); 716 break; 717 718 case TGSI_OPCODE_DDX: 719 return FALSE; 720 721 case TGSI_OPCODE_DDY: 722 return FALSE; 723 724 case TGSI_OPCODE_KILP: 725 /* predicated kill */ 726 return FALSE; 727 728 case TGSI_OPCODE_KIL: 729 /* conditional kill */ 730 return FALSE; 731 732 case TGSI_OPCODE_PK2H: 733 return FALSE; 734 break; 735 736 case TGSI_OPCODE_PK2US: 737 return FALSE; 738 break; 739 740 case TGSI_OPCODE_PK4B: 741 return FALSE; 742 break; 743 744 case TGSI_OPCODE_PK4UB: 745 return FALSE; 746 747 case TGSI_OPCODE_RFL: 748 return FALSE; 749 750 case TGSI_OPCODE_SEQ: 751 src0 = emit_fetch(bld, inst, 0); 752 src1 = emit_fetch(bld, inst, 1); 753 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1); 754 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); 755 break; 756 757 case TGSI_OPCODE_SFL: 758 dst0 = bld->base.zero; 759 break; 760 761 case TGSI_OPCODE_SGT: 762 src0 = emit_fetch(bld, inst, 0); 763 src1 = emit_fetch(bld, inst, 1); 764 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1); 765 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); 766 break; 767 768 case TGSI_OPCODE_SIN: 769 src0 = emit_fetch(bld, inst, 0); 770 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 771 dst0 = lp_build_sin(&bld->base, tmp0); 772 break; 773 774 case TGSI_OPCODE_SLE: 775 src0 = emit_fetch(bld, inst, 0); 776 src1 = emit_fetch(bld, inst, 1); 777 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1); 778 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); 779 break; 780 781 case TGSI_OPCODE_SNE: 782 src0 = emit_fetch(bld, inst, 0); 783 src1 = emit_fetch(bld, inst, 1); 784 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1); 785 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); 786 break; 787 788 case TGSI_OPCODE_STR: 789 dst0 = bld->base.one; 790 break; 791 792 case TGSI_OPCODE_TEX: 793 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE); 794 break; 795 796 case TGSI_OPCODE_TXD: 797 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); 798 break; 799 800 case TGSI_OPCODE_UP2H: 801 /* deprecated */ 802 assert (0); 803 return FALSE; 804 break; 805 806 case TGSI_OPCODE_UP2US: 807 /* deprecated */ 808 assert(0); 809 return FALSE; 810 break; 811 812 case TGSI_OPCODE_UP4B: 813 /* deprecated */ 814 assert(0); 815 return FALSE; 816 break; 817 818 case TGSI_OPCODE_UP4UB: 819 /* deprecated */ 820 assert(0); 821 return FALSE; 822 break; 823 824 case TGSI_OPCODE_X2D: 825 /* deprecated? */ 826 assert(0); 827 return FALSE; 828 break; 829 830 case TGSI_OPCODE_ARA: 831 /* deprecated */ 832 assert(0); 833 return FALSE; 834 break; 835 836 case TGSI_OPCODE_ARR: 837 src0 = emit_fetch(bld, inst, 0); 838 dst0 = lp_build_round(&bld->base, src0); 839 break; 840 841 case TGSI_OPCODE_BRA: 842 /* deprecated */ 843 assert(0); 844 return FALSE; 845 break; 846 847 case TGSI_OPCODE_CAL: 848 return FALSE; 849 850 case TGSI_OPCODE_RET: 851 return FALSE; 852 853 case TGSI_OPCODE_END: 854 *pc = -1; 855 break; 856 857 case TGSI_OPCODE_SSG: 858 /* TGSI_OPCODE_SGN */ 859 tmp0 = emit_fetch(bld, inst, 0); 860 dst0 = lp_build_sgn(&bld->base, tmp0); 861 break; 862 863 case TGSI_OPCODE_CMP: 864 src0 = emit_fetch(bld, inst, 0); 865 src1 = emit_fetch(bld, inst, 1); 866 src2 = emit_fetch(bld, inst, 2); 867 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero); 868 dst0 = lp_build_select(&bld->base, tmp0, src1, src2); 869 break; 870 871 case TGSI_OPCODE_SCS: 872 return FALSE; 873 874 case TGSI_OPCODE_TXB: 875 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); 876 break; 877 878 case TGSI_OPCODE_NRM: 879 /* fall-through */ 880 case TGSI_OPCODE_NRM4: 881 return FALSE; 882 883 case TGSI_OPCODE_DIV: 884 /* deprecated */ 885 assert(0); 886 return FALSE; 887 break; 888 889 case TGSI_OPCODE_DP2: 890 return FALSE; 891 892 case TGSI_OPCODE_TXL: 893 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); 894 break; 895 896 case TGSI_OPCODE_TXP: 897 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED); 898 break; 899 900 case TGSI_OPCODE_BRK: 901 return FALSE; 902 903 case TGSI_OPCODE_IF: 904 return FALSE; 905 906 case TGSI_OPCODE_BGNLOOP: 907 return FALSE; 908 909 case TGSI_OPCODE_BGNSUB: 910 return FALSE; 911 912 case TGSI_OPCODE_ELSE: 913 return FALSE; 914 915 case TGSI_OPCODE_ENDIF: 916 return FALSE; 917 918 case TGSI_OPCODE_ENDLOOP: 919 return FALSE; 920 921 case TGSI_OPCODE_ENDSUB: 922 return FALSE; 923 924 case TGSI_OPCODE_PUSHA: 925 /* deprecated? */ 926 assert(0); 927 return FALSE; 928 break; 929 930 case TGSI_OPCODE_POPA: 931 /* deprecated? */ 932 assert(0); 933 return FALSE; 934 break; 935 936 case TGSI_OPCODE_CEIL: 937 src0 = emit_fetch(bld, inst, 0); 938 dst0 = lp_build_ceil(&bld->base, src0); 939 break; 940 941 case TGSI_OPCODE_I2F: 942 /* deprecated? */ 943 assert(0); 944 return FALSE; 945 break; 946 947 case TGSI_OPCODE_NOT: 948 /* deprecated? */ 949 assert(0); 950 return FALSE; 951 break; 952 953 case TGSI_OPCODE_TRUNC: 954 src0 = emit_fetch(bld, inst, 0); 955 dst0 = lp_build_trunc(&bld->base, src0); 956 break; 957 958 case TGSI_OPCODE_SHL: 959 /* deprecated? */ 960 assert(0); 961 return FALSE; 962 break; 963 964 case TGSI_OPCODE_ISHR: 965 /* deprecated? */ 966 assert(0); 967 return FALSE; 968 break; 969 970 case TGSI_OPCODE_AND: 971 /* deprecated? */ 972 assert(0); 973 return FALSE; 974 break; 975 976 case TGSI_OPCODE_OR: 977 /* deprecated? */ 978 assert(0); 979 return FALSE; 980 break; 981 982 case TGSI_OPCODE_MOD: 983 /* deprecated? */ 984 assert(0); 985 return FALSE; 986 break; 987 988 case TGSI_OPCODE_XOR: 989 /* deprecated? */ 990 assert(0); 991 return FALSE; 992 break; 993 994 case TGSI_OPCODE_SAD: 995 /* deprecated? */ 996 assert(0); 997 return FALSE; 998 break; 999 1000 case TGSI_OPCODE_TXF: 1001 /* deprecated? */ 1002 assert(0); 1003 return FALSE; 1004 break; 1005 1006 case TGSI_OPCODE_TXQ: 1007 /* deprecated? */ 1008 assert(0); 1009 return FALSE; 1010 break; 1011 1012 case TGSI_OPCODE_CONT: 1013 return FALSE; 1014 1015 case TGSI_OPCODE_EMIT: 1016 return FALSE; 1017 break; 1018 1019 case TGSI_OPCODE_ENDPRIM: 1020 return FALSE; 1021 break; 1022 1023 case TGSI_OPCODE_NOP: 1024 break; 1025 1026 default: 1027 return FALSE; 1028 } 1029 1030 if (info->num_dst) { 1031 emit_store(bld, inst, 0, dst0); 1032 } 1033 1034 return TRUE; 1035} 1036 1037 1038void 1039lp_build_tgsi_aos(struct gallivm_state *gallivm, 1040 const struct tgsi_token *tokens, 1041 struct lp_type type, 1042 const unsigned char swizzles[4], 1043 LLVMValueRef consts_ptr, 1044 const LLVMValueRef *inputs, 1045 LLVMValueRef *outputs, 1046 struct lp_build_sampler_aos *sampler, 1047 const struct tgsi_shader_info *info) 1048{ 1049 struct lp_build_tgsi_aos_context bld; 1050 struct tgsi_parse_context parse; 1051 uint num_immediates = 0; 1052 uint num_instructions = 0; 1053 unsigned chan; 1054 int pc = 0; 1055 1056 /* Setup build context */ 1057 memset(&bld, 0, sizeof bld); 1058 lp_build_context_init(&bld.base, gallivm, type); 1059 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type)); 1060 1061 for (chan = 0; chan < 4; ++chan) { 1062 bld.swizzles[chan] = swizzles[chan]; 1063 bld.inv_swizzles[swizzles[chan]] = chan; 1064 } 1065 1066 bld.inputs = inputs; 1067 bld.outputs = outputs; 1068 bld.consts_ptr = consts_ptr; 1069 bld.sampler = sampler; 1070 bld.indirect_files = info->indirect_files; 1071 bld.instructions = (struct tgsi_full_instruction *) 1072 MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction)); 1073 bld.max_instructions = LP_MAX_INSTRUCTIONS; 1074 1075 if (!bld.instructions) { 1076 return; 1077 } 1078 1079 tgsi_parse_init(&parse, tokens); 1080 1081 while (!tgsi_parse_end_of_tokens(&parse)) { 1082 tgsi_parse_token(&parse); 1083 1084 switch(parse.FullToken.Token.Type) { 1085 case TGSI_TOKEN_TYPE_DECLARATION: 1086 /* Inputs already interpolated */ 1087 emit_declaration(&bld, &parse.FullToken.FullDeclaration); 1088 break; 1089 1090 case TGSI_TOKEN_TYPE_INSTRUCTION: 1091 { 1092 /* save expanded instruction */ 1093 if (num_instructions == bld.max_instructions) { 1094 struct tgsi_full_instruction *instructions; 1095 instructions = REALLOC(bld.instructions, 1096 bld.max_instructions 1097 * sizeof(struct tgsi_full_instruction), 1098 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 1099 * sizeof(struct tgsi_full_instruction)); 1100 if (!instructions) { 1101 break; 1102 } 1103 bld.instructions = instructions; 1104 bld.max_instructions += LP_MAX_INSTRUCTIONS; 1105 } 1106 1107 memcpy(bld.instructions + num_instructions, 1108 &parse.FullToken.FullInstruction, 1109 sizeof(bld.instructions[0])); 1110 1111 num_instructions++; 1112 } 1113 1114 break; 1115 1116 case TGSI_TOKEN_TYPE_IMMEDIATE: 1117 /* simply copy the immediate values into the next immediates[] slot */ 1118 { 1119 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1120 float imm[4]; 1121 assert(size <= 4); 1122 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 1123 for (chan = 0; chan < 4; ++chan) { 1124 imm[chan] = 0.0f; 1125 } 1126 for (chan = 0; chan < size; ++chan) { 1127 unsigned swizzle = bld.swizzles[chan]; 1128 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float; 1129 } 1130 bld.immediates[num_immediates] = 1131 lp_build_const_aos(gallivm, type, 1132 imm[0], imm[1], imm[2], imm[3], 1133 NULL); 1134 num_immediates++; 1135 } 1136 break; 1137 1138 case TGSI_TOKEN_TYPE_PROPERTY: 1139 break; 1140 1141 default: 1142 assert(0); 1143 } 1144 } 1145 1146 while (pc != -1) { 1147 struct tgsi_full_instruction *instr = bld.instructions + pc; 1148 const struct tgsi_opcode_info *opcode_info = 1149 tgsi_get_opcode_info(instr->Instruction.Opcode); 1150 if (!emit_instruction(&bld, instr, opcode_info, &pc)) 1151 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 1152 opcode_info->mnemonic); 1153 } 1154 1155 if (0) { 1156 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 1157 LLVMValueRef function = LLVMGetBasicBlockParent(block); 1158 debug_printf("11111111111111111111111111111 \n"); 1159 tgsi_dump(tokens, 0); 1160 lp_debug_dump_value(function); 1161 debug_printf("2222222222222222222222222222 \n"); 1162 } 1163 tgsi_parse_free(&parse); 1164 1165 if (0) { 1166 LLVMModuleRef module = LLVMGetGlobalParent( 1167 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 1168 LLVMDumpModule(module); 1169 } 1170 1171 FREE(bld.instructions); 1172} 1173 1174