lp_bld_tgsi_aos.c revision efc82aef35a2aac5d2ed9774f6d28f2626796416
1/************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * TGSI to LLVM IR translation -- AoS. 31 * 32 * FIXME: 33 * - No control flow support: the existing control flow code should be factored 34 * out into from the SoA code into a common module and shared. 35 * - No derivatives. Derivate logic should be pluggable, just like the samplers. 36 * 37 * @author Jose Fonseca <jfonseca@vmware.com> 38 */ 39 40#include "pipe/p_config.h" 41#include "pipe/p_shader_tokens.h" 42#include "util/u_debug.h" 43#include "util/u_math.h" 44#include "util/u_memory.h" 45#include "tgsi/tgsi_dump.h" 46#include "tgsi/tgsi_info.h" 47#include "tgsi/tgsi_parse.h" 48#include "tgsi/tgsi_util.h" 49#include "tgsi/tgsi_scan.h" 50#include "lp_bld_type.h" 51#include "lp_bld_const.h" 52#include "lp_bld_arit.h" 53#include "lp_bld_logic.h" 54#include "lp_bld_swizzle.h" 55#include "lp_bld_flow.h" 56#include "lp_bld_quad.h" 57#include "lp_bld_tgsi.h" 58#include "lp_bld_limits.h" 59#include "lp_bld_debug.h" 60 61 62#define LP_MAX_INSTRUCTIONS 256 63 64 65struct lp_build_tgsi_aos_context 66{ 67 struct lp_build_context base; 68 69 /* Builder for integer masks and indices */ 70 struct lp_build_context int_bld; 71 72 /* 73 * AoS swizzle used: 74 * - swizzles[0] = red index 75 * - swizzles[1] = green index 76 * - swizzles[2] = blue index 77 * - swizzles[3] = alpha index 78 */ 79 unsigned char swizzles[4]; 80 unsigned char inv_swizzles[4]; 81 82 LLVMValueRef consts_ptr; 83 const LLVMValueRef *inputs; 84 LLVMValueRef *outputs; 85 86 struct lp_build_sampler_aos *sampler; 87 88 LLVMValueRef immediates[LP_MAX_TGSI_IMMEDIATES]; 89 LLVMValueRef temps[LP_MAX_TGSI_TEMPS]; 90 LLVMValueRef addr[LP_MAX_TGSI_ADDRS]; 91 LLVMValueRef preds[LP_MAX_TGSI_PREDS]; 92 93 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 94 * set in the indirect_files field. 95 * The temps[] array above is unused then. 96 */ 97 LLVMValueRef temps_array; 98 99 /** bitmask indicating which register files are accessed indirectly */ 100 unsigned indirect_files; 101 102 struct tgsi_full_instruction *instructions; 103 uint max_instructions; 104}; 105 106 107/** 108 * Wrapper around lp_build_swizzle_aos which translates swizzles to another 109 * ordering. 110 */ 111static LLVMValueRef 112swizzle_aos(struct lp_build_tgsi_aos_context *bld, 113 LLVMValueRef a, 114 unsigned swizzle_x, 115 unsigned swizzle_y, 116 unsigned swizzle_z, 117 unsigned swizzle_w) 118{ 119 unsigned char swizzles[4]; 120 121 assert(swizzle_x < 4); 122 assert(swizzle_y < 4); 123 assert(swizzle_z < 4); 124 assert(swizzle_w < 4); 125 126 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x]; 127 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y]; 128 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z]; 129 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w]; 130 131 return lp_build_swizzle_aos(&bld->base, a, swizzles); 132} 133 134 135static LLVMValueRef 136swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld, 137 LLVMValueRef a, 138 unsigned chan) 139{ 140 chan = bld->swizzles[chan]; 141 return lp_build_swizzle_scalar_aos(&bld->base, a, chan); 142} 143 144 145/** 146 * Register fetch. 147 */ 148static LLVMValueRef 149emit_fetch( 150 struct lp_build_tgsi_aos_context *bld, 151 const struct tgsi_full_instruction *inst, 152 unsigned src_op) 153{ 154 struct lp_type type = bld->base.type; 155 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 156 LLVMValueRef res; 157 unsigned chan; 158 159 assert(!reg->Register.Indirect); 160 161 /* 162 * Fetch the from the register file. 163 */ 164 165 switch (reg->Register.File) { 166 case TGSI_FILE_CONSTANT: 167 /* 168 * Get the constants components 169 */ 170 171 res = bld->base.undef; 172 for (chan = 0; chan < 4; ++chan) { 173 LLVMValueRef index; 174 LLVMValueRef scalar_ptr; 175 LLVMValueRef scalar; 176 LLVMValueRef swizzle; 177 178 index = lp_build_const_int32(bld->base.gallivm, reg->Register.Index * 4 + chan); 179 180 scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, 181 &index, 1, ""); 182 183 scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, ""); 184 185 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); 186 187 /* 188 * NOTE: constants array is always assumed to be RGBA 189 */ 190 191 swizzle = lp_build_const_int32(bld->base.gallivm, chan); 192 193 res = LLVMBuildInsertElement(bld->base.builder, res, scalar, swizzle, ""); 194 } 195 196 /* 197 * Broadcast the first quaternion to all others. 198 * 199 * XXX: could be factored into a reusable function. 200 */ 201 202 if (type.length > 4) { 203 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 204 unsigned i; 205 206 for (chan = 0; chan < 4; ++chan) { 207 shuffles[chan] = lp_build_const_int32(bld->base.gallivm, chan); 208 } 209 210 for (i = 4; i < type.length; ++i) { 211 shuffles[i] = shuffles[i % 4]; 212 } 213 214 res = LLVMBuildShuffleVector(bld->base.builder, 215 res, bld->base.undef, 216 LLVMConstVector(shuffles, type.length), 217 ""); 218 } 219 break; 220 221 case TGSI_FILE_IMMEDIATE: 222 res = bld->immediates[reg->Register.Index]; 223 assert(res); 224 break; 225 226 case TGSI_FILE_INPUT: 227 res = bld->inputs[reg->Register.Index]; 228 assert(res); 229 break; 230 231 case TGSI_FILE_TEMPORARY: 232 { 233 LLVMValueRef temp_ptr; 234 temp_ptr = bld->temps[reg->Register.Index]; 235 res = LLVMBuildLoad(bld->base.builder, temp_ptr, ""); 236 if (!res) 237 return bld->base.undef; 238 } 239 break; 240 241 default: 242 assert(0 && "invalid src register in emit_fetch()"); 243 return bld->base.undef; 244 } 245 246 /* 247 * Apply sign modifier. 248 */ 249 250 if (reg->Register.Absolute) { 251 res = lp_build_abs(&bld->base, res); 252 } 253 254 if(reg->Register.Negate) { 255 res = lp_build_negate(&bld->base, res); 256 } 257 258 /* 259 * Swizzle the argument 260 */ 261 262 res = swizzle_aos(bld, res, 263 reg->Register.SwizzleX, 264 reg->Register.SwizzleY, 265 reg->Register.SwizzleZ, 266 reg->Register.SwizzleW); 267 268 return res; 269} 270 271 272/** 273 * Register store. 274 */ 275static void 276emit_store( 277 struct lp_build_tgsi_aos_context *bld, 278 const struct tgsi_full_instruction *inst, 279 unsigned index, 280 LLVMValueRef value) 281{ 282 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 283 LLVMValueRef mask = NULL; 284 LLVMValueRef ptr; 285 286 /* 287 * Saturate the value 288 */ 289 290 switch (inst->Instruction.Saturate) { 291 case TGSI_SAT_NONE: 292 break; 293 294 case TGSI_SAT_ZERO_ONE: 295 value = lp_build_max(&bld->base, value, bld->base.zero); 296 value = lp_build_min(&bld->base, value, bld->base.one); 297 break; 298 299 case TGSI_SAT_MINUS_PLUS_ONE: 300 value = lp_build_max(&bld->base, value, lp_build_const_vec(bld->base.gallivm, bld->base.type, -1.0)); 301 value = lp_build_min(&bld->base, value, bld->base.one); 302 break; 303 304 default: 305 assert(0); 306 } 307 308 /* 309 * Translate the register file 310 */ 311 312 assert(!reg->Register.Indirect); 313 314 switch (reg->Register.File) { 315 case TGSI_FILE_OUTPUT: 316 ptr = bld->outputs[reg->Register.Index]; 317 break; 318 319 case TGSI_FILE_TEMPORARY: 320 ptr = bld->temps[reg->Register.Index]; 321 break; 322 323 case TGSI_FILE_ADDRESS: 324 ptr = bld->addr[reg->Indirect.Index]; 325 break; 326 327 case TGSI_FILE_PREDICATE: 328 ptr = bld->preds[reg->Register.Index]; 329 break; 330 331 default: 332 assert(0); 333 return; 334 } 335 336 /* 337 * Predicate 338 */ 339 340 if (inst->Instruction.Predicate) { 341 LLVMValueRef pred; 342 343 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS); 344 345 pred = LLVMBuildLoad(bld->base.builder, 346 bld->preds[inst->Predicate.Index], ""); 347 348 /* 349 * Convert the value to an integer mask. 350 */ 351 pred = lp_build_compare(bld->base.gallivm, 352 bld->base.type, 353 PIPE_FUNC_NOTEQUAL, 354 pred, 355 bld->base.zero); 356 357 if (inst->Predicate.Negate) { 358 pred = LLVMBuildNot(bld->base.builder, pred, ""); 359 } 360 361 pred = swizzle_aos(bld, pred, 362 inst->Predicate.SwizzleX, 363 inst->Predicate.SwizzleY, 364 inst->Predicate.SwizzleZ, 365 inst->Predicate.SwizzleW); 366 367 if (mask) { 368 mask = LLVMBuildAnd(bld->base.builder, mask, pred, ""); 369 } else { 370 mask = pred; 371 } 372 } 373 374 /* 375 * Writemask 376 */ 377 378 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { 379 LLVMValueRef writemask; 380 381 writemask = lp_build_const_mask_aos(bld->base.gallivm, bld->base.type, 382 reg->Register.WriteMask); 383 384 if (mask) { 385 mask = LLVMBuildAnd(bld->base.builder, mask, writemask, ""); 386 } else { 387 mask = writemask; 388 } 389 } 390 391 if (mask) { 392 LLVMValueRef orig_value; 393 394 orig_value = LLVMBuildLoad(bld->base.builder, ptr, ""); 395 value = lp_build_select(&bld->base, 396 mask, value, orig_value); 397 } 398 399 LLVMBuildStore(bld->base.builder, value, ptr); 400} 401 402 403/** 404 * High-level instruction translators. 405 */ 406 407static LLVMValueRef 408emit_tex(struct lp_build_tgsi_aos_context *bld, 409 const struct tgsi_full_instruction *inst, 410 enum lp_build_tex_modifier modifier) 411{ 412 unsigned target; 413 unsigned unit; 414 LLVMValueRef coords; 415 LLVMValueRef ddx; 416 LLVMValueRef ddy; 417 418 if (!bld->sampler) { 419 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 420 return bld->base.undef; 421 } 422 423 target = inst->Texture.Texture; 424 425 coords = emit_fetch( bld, inst, 0 ); 426 427 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 428 ddx = emit_fetch( bld, inst, 1 ); 429 ddy = emit_fetch( bld, inst, 2 ); 430 unit = inst->Src[3].Register.Index; 431 } else { 432#if 0 433 ddx = lp_build_ddx( &bld->base, coords ); 434 ddy = lp_build_ddy( &bld->base, coords ); 435#else 436 /* TODO */ 437 ddx = bld->base.one; 438 ddy = bld->base.one; 439#endif 440 unit = inst->Src[1].Register.Index; 441 } 442 443 return bld->sampler->emit_fetch_texel(bld->sampler, 444 &bld->base, 445 target, unit, 446 coords, ddx, ddy, 447 modifier); 448} 449 450 451static void 452emit_declaration( 453 struct lp_build_tgsi_aos_context *bld, 454 const struct tgsi_full_declaration *decl) 455{ 456 struct gallivm_state *gallivm = bld->base.gallivm; 457 LLVMTypeRef vec_type = lp_build_vec_type(bld->base.gallivm, bld->base.type); 458 459 unsigned first = decl->Range.First; 460 unsigned last = decl->Range.Last; 461 unsigned idx; 462 463 for (idx = first; idx <= last; ++idx) { 464 switch (decl->Declaration.File) { 465 case TGSI_FILE_TEMPORARY: 466 assert(idx < LP_MAX_TGSI_TEMPS); 467 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 468 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1); 469 bld->temps_array = lp_build_array_alloca(bld->base.gallivm, 470 vec_type, array_size, ""); 471 } else { 472 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, ""); 473 } 474 break; 475 476 case TGSI_FILE_OUTPUT: 477 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, ""); 478 break; 479 480 case TGSI_FILE_ADDRESS: 481 assert(idx < LP_MAX_TGSI_ADDRS); 482 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, ""); 483 break; 484 485 case TGSI_FILE_PREDICATE: 486 assert(idx < LP_MAX_TGSI_PREDS); 487 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, ""); 488 break; 489 490 default: 491 /* don't need to declare other vars */ 492 break; 493 } 494 } 495} 496 497 498/** 499 * Emit LLVM for one TGSI instruction. 500 * \param return TRUE for success, FALSE otherwise 501 */ 502static boolean 503emit_instruction( 504 struct lp_build_tgsi_aos_context *bld, 505 const struct tgsi_full_instruction *inst, 506 const struct tgsi_opcode_info *info, 507 int *pc) 508{ 509 LLVMValueRef src0, src1, src2; 510 LLVMValueRef tmp0, tmp1; 511 LLVMValueRef dst0 = NULL; 512 513 /* 514 * Stores and write masks are handled in a general fashion after the long 515 * instruction opcode switch statement. 516 * 517 * Although not stricitly necessary, we avoid generating instructions for 518 * channels which won't be stored, in cases where's that easy. For some 519 * complex instructions, like texture sampling, it is more convenient to 520 * assume a full writemask and then let LLVM optimization passes eliminate 521 * redundant code. 522 */ 523 524 (*pc)++; 525 526 assert(info->num_dst <= 1); 527 if (info->num_dst) { 528 dst0 = bld->base.undef; 529 } 530 531 switch (inst->Instruction.Opcode) { 532 case TGSI_OPCODE_ARL: 533 src0 = emit_fetch(bld, inst, 0); 534 dst0 = lp_build_floor(&bld->base, src0); 535 break; 536 537 case TGSI_OPCODE_MOV: 538 dst0 = emit_fetch(bld, inst, 0); 539 break; 540 541 case TGSI_OPCODE_LIT: 542 return FALSE; 543 544 case TGSI_OPCODE_RCP: 545 /* TGSI_OPCODE_RECIP */ 546 src0 = emit_fetch(bld, inst, 0); 547 dst0 = lp_build_rcp(&bld->base, src0); 548 break; 549 550 case TGSI_OPCODE_RSQ: 551 /* TGSI_OPCODE_RECIPSQRT */ 552 src0 = emit_fetch(bld, inst, 0); 553 tmp0 = lp_build_abs(&bld->base, src0); 554 dst0 = lp_build_rsqrt(&bld->base, tmp0); 555 break; 556 557 case TGSI_OPCODE_EXP: 558 return FALSE; 559 560 case TGSI_OPCODE_LOG: 561 return FALSE; 562 563 case TGSI_OPCODE_MUL: 564 src0 = emit_fetch(bld, inst, 0); 565 src1 = emit_fetch(bld, inst, 1); 566 dst0 = lp_build_mul(&bld->base, src0, src1); 567 break; 568 569 case TGSI_OPCODE_ADD: 570 src0 = emit_fetch(bld, inst, 0); 571 src1 = emit_fetch(bld, inst, 1); 572 dst0 = lp_build_add(&bld->base, src0, src1); 573 break; 574 575 case TGSI_OPCODE_DP3: 576 /* TGSI_OPCODE_DOT3 */ 577 return FALSE; 578 579 case TGSI_OPCODE_DP4: 580 /* TGSI_OPCODE_DOT4 */ 581 return FALSE; 582 583 case TGSI_OPCODE_DST: 584 return FALSE; 585 586 case TGSI_OPCODE_MIN: 587 src0 = emit_fetch(bld, inst, 0); 588 src1 = emit_fetch(bld, inst, 1); 589 dst0 = lp_build_max(&bld->base, src0, src1); 590 break; 591 592 case TGSI_OPCODE_MAX: 593 src0 = emit_fetch(bld, inst, 0); 594 src1 = emit_fetch(bld, inst, 1); 595 dst0 = lp_build_max(&bld->base, src0, src1); 596 break; 597 598 case TGSI_OPCODE_SLT: 599 /* TGSI_OPCODE_SETLT */ 600 src0 = emit_fetch(bld, inst, 0); 601 src1 = emit_fetch(bld, inst, 1); 602 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, src1); 603 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); 604 break; 605 606 case TGSI_OPCODE_SGE: 607 /* TGSI_OPCODE_SETGE */ 608 src0 = emit_fetch(bld, inst, 0); 609 src1 = emit_fetch(bld, inst, 1); 610 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GEQUAL, src0, src1); 611 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); 612 break; 613 614 case TGSI_OPCODE_MAD: 615 /* TGSI_OPCODE_MADD */ 616 src0 = emit_fetch(bld, inst, 0); 617 src1 = emit_fetch(bld, inst, 1); 618 src2 = emit_fetch(bld, inst, 2); 619 tmp0 = lp_build_mul(&bld->base, src0, src1); 620 dst0 = lp_build_add(&bld->base, tmp0, src2); 621 break; 622 623 case TGSI_OPCODE_SUB: 624 src0 = emit_fetch(bld, inst, 0); 625 src1 = emit_fetch(bld, inst, 1); 626 dst0 = lp_build_sub(&bld->base, src0, src1); 627 break; 628 629 case TGSI_OPCODE_LRP: 630 src0 = emit_fetch(bld, inst, 0); 631 src1 = emit_fetch(bld, inst, 1); 632 src2 = emit_fetch(bld, inst, 2); 633 tmp0 = lp_build_sub(&bld->base, src1, src2); 634 tmp0 = lp_build_mul(&bld->base, src0, tmp0); 635 dst0 = lp_build_add(&bld->base, tmp0, src2); 636 break; 637 638 case TGSI_OPCODE_CND: 639 src0 = emit_fetch(bld, inst, 0); 640 src1 = emit_fetch(bld, inst, 1); 641 src2 = emit_fetch(bld, inst, 2); 642 tmp1 = lp_build_const_vec(bld->base.gallivm, bld->base.type, 0.5); 643 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src2, tmp1); 644 dst0 = lp_build_select(&bld->base, tmp0, src0, src1); 645 break; 646 647 case TGSI_OPCODE_DP2A: 648 return FALSE; 649 650 case TGSI_OPCODE_FRC: 651 src0 = emit_fetch(bld, inst, 0); 652 tmp0 = lp_build_floor(&bld->base, src0); 653 dst0 = lp_build_sub(&bld->base, src0, tmp0); 654 break; 655 656 case TGSI_OPCODE_CLAMP: 657 src0 = emit_fetch(bld, inst, 0); 658 src1 = emit_fetch(bld, inst, 1); 659 src2 = emit_fetch(bld, inst, 2); 660 tmp0 = lp_build_max(&bld->base, src0, src1); 661 dst0 = lp_build_min(&bld->base, tmp0, src2); 662 break; 663 664 case TGSI_OPCODE_FLR: 665 src0 = emit_fetch(bld, inst, 0); 666 dst0 = lp_build_floor(&bld->base, src0); 667 break; 668 669 case TGSI_OPCODE_ROUND: 670 src0 = emit_fetch(bld, inst, 0); 671 dst0 = lp_build_round(&bld->base, src0); 672 break; 673 674 case TGSI_OPCODE_EX2: 675 src0 = emit_fetch(bld, inst, 0); 676 tmp0 = lp_build_swizzle_scalar_aos(&bld->base, src0, TGSI_SWIZZLE_X); 677 dst0 = lp_build_exp2(&bld->base, tmp0); 678 break; 679 680 case TGSI_OPCODE_LG2: 681 src0 = emit_fetch(bld, inst, 0); 682 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 683 dst0 = lp_build_log2(&bld->base, tmp0); 684 break; 685 686 case TGSI_OPCODE_POW: 687 src0 = emit_fetch(bld, inst, 0); 688 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 689 src1 = emit_fetch(bld, inst, 1); 690 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X); 691 dst0 = lp_build_pow(&bld->base, src0, src1); 692 break; 693 694 case TGSI_OPCODE_XPD: 695 return FALSE; 696 697 case TGSI_OPCODE_ABS: 698 src0 = emit_fetch(bld, inst, 0); 699 dst0 = lp_build_abs(&bld->base, src0); 700 break; 701 702 case TGSI_OPCODE_RCC: 703 /* deprecated? */ 704 assert(0); 705 return FALSE; 706 707 case TGSI_OPCODE_DPH: 708 return FALSE; 709 710 case TGSI_OPCODE_COS: 711 src0 = emit_fetch(bld, inst, 0); 712 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 713 dst0 = lp_build_cos(&bld->base, tmp0); 714 break; 715 716 case TGSI_OPCODE_DDX: 717 return FALSE; 718 719 case TGSI_OPCODE_DDY: 720 return FALSE; 721 722 case TGSI_OPCODE_KILP: 723 /* predicated kill */ 724 return FALSE; 725 726 case TGSI_OPCODE_KIL: 727 /* conditional kill */ 728 return FALSE; 729 730 case TGSI_OPCODE_PK2H: 731 return FALSE; 732 break; 733 734 case TGSI_OPCODE_PK2US: 735 return FALSE; 736 break; 737 738 case TGSI_OPCODE_PK4B: 739 return FALSE; 740 break; 741 742 case TGSI_OPCODE_PK4UB: 743 return FALSE; 744 745 case TGSI_OPCODE_RFL: 746 return FALSE; 747 748 case TGSI_OPCODE_SEQ: 749 src0 = emit_fetch(bld, inst, 0); 750 src1 = emit_fetch(bld, inst, 1); 751 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_EQUAL, src0, src1); 752 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); 753 break; 754 755 case TGSI_OPCODE_SFL: 756 dst0 = bld->base.zero; 757 break; 758 759 case TGSI_OPCODE_SGT: 760 src0 = emit_fetch(bld, inst, 0); 761 src1 = emit_fetch(bld, inst, 1); 762 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_GREATER, src0, src1); 763 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); 764 break; 765 766 case TGSI_OPCODE_SIN: 767 src0 = emit_fetch(bld, inst, 0); 768 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 769 dst0 = lp_build_sin(&bld->base, tmp0); 770 break; 771 772 case TGSI_OPCODE_SLE: 773 src0 = emit_fetch(bld, inst, 0); 774 src1 = emit_fetch(bld, inst, 1); 775 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LEQUAL, src0, src1); 776 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); 777 break; 778 779 case TGSI_OPCODE_SNE: 780 src0 = emit_fetch(bld, inst, 0); 781 src1 = emit_fetch(bld, inst, 1); 782 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_NOTEQUAL, src0, src1); 783 dst0 = lp_build_select(&bld->base, tmp0, bld->base.one, bld->base.zero); 784 break; 785 786 case TGSI_OPCODE_STR: 787 dst0 = bld->base.one; 788 break; 789 790 case TGSI_OPCODE_TEX: 791 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE); 792 break; 793 794 case TGSI_OPCODE_TXD: 795 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); 796 break; 797 798 case TGSI_OPCODE_UP2H: 799 /* deprecated */ 800 assert (0); 801 return FALSE; 802 break; 803 804 case TGSI_OPCODE_UP2US: 805 /* deprecated */ 806 assert(0); 807 return FALSE; 808 break; 809 810 case TGSI_OPCODE_UP4B: 811 /* deprecated */ 812 assert(0); 813 return FALSE; 814 break; 815 816 case TGSI_OPCODE_UP4UB: 817 /* deprecated */ 818 assert(0); 819 return FALSE; 820 break; 821 822 case TGSI_OPCODE_X2D: 823 /* deprecated? */ 824 assert(0); 825 return FALSE; 826 break; 827 828 case TGSI_OPCODE_ARA: 829 /* deprecated */ 830 assert(0); 831 return FALSE; 832 break; 833 834 case TGSI_OPCODE_ARR: 835 src0 = emit_fetch(bld, inst, 0); 836 dst0 = lp_build_round(&bld->base, src0); 837 break; 838 839 case TGSI_OPCODE_BRA: 840 /* deprecated */ 841 assert(0); 842 return FALSE; 843 break; 844 845 case TGSI_OPCODE_CAL: 846 return FALSE; 847 848 case TGSI_OPCODE_RET: 849 return FALSE; 850 851 case TGSI_OPCODE_END: 852 *pc = -1; 853 break; 854 855 case TGSI_OPCODE_SSG: 856 /* TGSI_OPCODE_SGN */ 857 tmp0 = emit_fetch(bld, inst, 0); 858 dst0 = lp_build_sgn(&bld->base, tmp0); 859 break; 860 861 case TGSI_OPCODE_CMP: 862 src0 = emit_fetch(bld, inst, 0); 863 src1 = emit_fetch(bld, inst, 1); 864 src2 = emit_fetch(bld, inst, 2); 865 tmp0 = lp_build_cmp(&bld->base, PIPE_FUNC_LESS, src0, bld->base.zero); 866 dst0 = lp_build_select(&bld->base, tmp0, src1, src2); 867 break; 868 869 case TGSI_OPCODE_SCS: 870 return FALSE; 871 872 case TGSI_OPCODE_TXB: 873 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); 874 break; 875 876 case TGSI_OPCODE_NRM: 877 /* fall-through */ 878 case TGSI_OPCODE_NRM4: 879 return FALSE; 880 881 case TGSI_OPCODE_DIV: 882 /* deprecated */ 883 assert(0); 884 return FALSE; 885 break; 886 887 case TGSI_OPCODE_DP2: 888 return FALSE; 889 890 case TGSI_OPCODE_TXL: 891 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); 892 break; 893 894 case TGSI_OPCODE_TXP: 895 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED); 896 break; 897 898 case TGSI_OPCODE_BRK: 899 return FALSE; 900 901 case TGSI_OPCODE_IF: 902 return FALSE; 903 904 case TGSI_OPCODE_BGNLOOP: 905 return FALSE; 906 907 case TGSI_OPCODE_BGNSUB: 908 return FALSE; 909 910 case TGSI_OPCODE_ELSE: 911 return FALSE; 912 913 case TGSI_OPCODE_ENDIF: 914 return FALSE; 915 916 case TGSI_OPCODE_ENDLOOP: 917 return FALSE; 918 919 case TGSI_OPCODE_ENDSUB: 920 return FALSE; 921 922 case TGSI_OPCODE_PUSHA: 923 /* deprecated? */ 924 assert(0); 925 return FALSE; 926 break; 927 928 case TGSI_OPCODE_POPA: 929 /* deprecated? */ 930 assert(0); 931 return FALSE; 932 break; 933 934 case TGSI_OPCODE_CEIL: 935 src0 = emit_fetch(bld, inst, 0); 936 dst0 = lp_build_ceil(&bld->base, src0); 937 break; 938 939 case TGSI_OPCODE_I2F: 940 /* deprecated? */ 941 assert(0); 942 return FALSE; 943 break; 944 945 case TGSI_OPCODE_NOT: 946 /* deprecated? */ 947 assert(0); 948 return FALSE; 949 break; 950 951 case TGSI_OPCODE_TRUNC: 952 src0 = emit_fetch(bld, inst, 0); 953 dst0 = lp_build_trunc(&bld->base, src0); 954 break; 955 956 case TGSI_OPCODE_SHL: 957 /* deprecated? */ 958 assert(0); 959 return FALSE; 960 break; 961 962 case TGSI_OPCODE_ISHR: 963 /* deprecated? */ 964 assert(0); 965 return FALSE; 966 break; 967 968 case TGSI_OPCODE_AND: 969 /* deprecated? */ 970 assert(0); 971 return FALSE; 972 break; 973 974 case TGSI_OPCODE_OR: 975 /* deprecated? */ 976 assert(0); 977 return FALSE; 978 break; 979 980 case TGSI_OPCODE_MOD: 981 /* deprecated? */ 982 assert(0); 983 return FALSE; 984 break; 985 986 case TGSI_OPCODE_XOR: 987 /* deprecated? */ 988 assert(0); 989 return FALSE; 990 break; 991 992 case TGSI_OPCODE_SAD: 993 /* deprecated? */ 994 assert(0); 995 return FALSE; 996 break; 997 998 case TGSI_OPCODE_TXF: 999 /* deprecated? */ 1000 assert(0); 1001 return FALSE; 1002 break; 1003 1004 case TGSI_OPCODE_TXQ: 1005 /* deprecated? */ 1006 assert(0); 1007 return FALSE; 1008 break; 1009 1010 case TGSI_OPCODE_CONT: 1011 return FALSE; 1012 1013 case TGSI_OPCODE_EMIT: 1014 return FALSE; 1015 break; 1016 1017 case TGSI_OPCODE_ENDPRIM: 1018 return FALSE; 1019 break; 1020 1021 case TGSI_OPCODE_NOP: 1022 break; 1023 1024 default: 1025 return FALSE; 1026 } 1027 1028 if (info->num_dst) { 1029 emit_store(bld, inst, 0, dst0); 1030 } 1031 1032 return TRUE; 1033} 1034 1035 1036void 1037lp_build_tgsi_aos(struct gallivm_state *gallivm, 1038 const struct tgsi_token *tokens, 1039 struct lp_type type, 1040 const unsigned char swizzles[4], 1041 LLVMValueRef consts_ptr, 1042 const LLVMValueRef *inputs, 1043 LLVMValueRef *outputs, 1044 struct lp_build_sampler_aos *sampler, 1045 const struct tgsi_shader_info *info) 1046{ 1047 struct lp_build_tgsi_aos_context bld; 1048 struct tgsi_parse_context parse; 1049 uint num_immediates = 0; 1050 uint num_instructions = 0; 1051 unsigned chan; 1052 int pc = 0; 1053 1054 /* Setup build context */ 1055 memset(&bld, 0, sizeof bld); 1056 lp_build_context_init(&bld.base, gallivm, type); 1057 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type)); 1058 1059 for (chan = 0; chan < 4; ++chan) { 1060 bld.swizzles[chan] = swizzles[chan]; 1061 bld.inv_swizzles[swizzles[chan]] = chan; 1062 } 1063 1064 bld.inputs = inputs; 1065 bld.outputs = outputs; 1066 bld.consts_ptr = consts_ptr; 1067 bld.sampler = sampler; 1068 bld.indirect_files = info->indirect_files; 1069 bld.instructions = (struct tgsi_full_instruction *) 1070 MALLOC(LP_MAX_INSTRUCTIONS * sizeof(struct tgsi_full_instruction)); 1071 bld.max_instructions = LP_MAX_INSTRUCTIONS; 1072 1073 if (!bld.instructions) { 1074 return; 1075 } 1076 1077 tgsi_parse_init(&parse, tokens); 1078 1079 while (!tgsi_parse_end_of_tokens(&parse)) { 1080 tgsi_parse_token(&parse); 1081 1082 switch(parse.FullToken.Token.Type) { 1083 case TGSI_TOKEN_TYPE_DECLARATION: 1084 /* Inputs already interpolated */ 1085 emit_declaration(&bld, &parse.FullToken.FullDeclaration); 1086 break; 1087 1088 case TGSI_TOKEN_TYPE_INSTRUCTION: 1089 { 1090 /* save expanded instruction */ 1091 if (num_instructions == bld.max_instructions) { 1092 struct tgsi_full_instruction *instructions; 1093 instructions = REALLOC(bld.instructions, 1094 bld.max_instructions 1095 * sizeof(struct tgsi_full_instruction), 1096 (bld.max_instructions + LP_MAX_INSTRUCTIONS) 1097 * sizeof(struct tgsi_full_instruction)); 1098 if (!instructions) { 1099 break; 1100 } 1101 bld.instructions = instructions; 1102 bld.max_instructions += LP_MAX_INSTRUCTIONS; 1103 } 1104 1105 memcpy(bld.instructions + num_instructions, 1106 &parse.FullToken.FullInstruction, 1107 sizeof(bld.instructions[0])); 1108 1109 num_instructions++; 1110 } 1111 1112 break; 1113 1114 case TGSI_TOKEN_TYPE_IMMEDIATE: 1115 /* simply copy the immediate values into the next immediates[] slot */ 1116 { 1117 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1118 float imm[4]; 1119 assert(size <= 4); 1120 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 1121 for (chan = 0; chan < 4; ++chan) { 1122 imm[chan] = 0.0f; 1123 } 1124 for (chan = 0; chan < size; ++chan) { 1125 unsigned swizzle = bld.swizzles[chan]; 1126 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float; 1127 } 1128 bld.immediates[num_immediates] = 1129 lp_build_const_aos(gallivm, type, 1130 imm[0], imm[1], imm[2], imm[3], 1131 NULL); 1132 num_immediates++; 1133 } 1134 break; 1135 1136 case TGSI_TOKEN_TYPE_PROPERTY: 1137 break; 1138 1139 default: 1140 assert(0); 1141 } 1142 } 1143 1144 while (pc != -1) { 1145 struct tgsi_full_instruction *instr = bld.instructions + pc; 1146 const struct tgsi_opcode_info *opcode_info = 1147 tgsi_get_opcode_info(instr->Instruction.Opcode); 1148 if (!emit_instruction(&bld, instr, opcode_info, &pc)) 1149 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 1150 opcode_info->mnemonic); 1151 } 1152 1153 if (0) { 1154 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 1155 LLVMValueRef function = LLVMGetBasicBlockParent(block); 1156 debug_printf("11111111111111111111111111111 \n"); 1157 tgsi_dump(tokens, 0); 1158 lp_debug_dump_value(function); 1159 debug_printf("2222222222222222222222222222 \n"); 1160 } 1161 tgsi_parse_free(&parse); 1162 1163 if (0) { 1164 LLVMModuleRef module = LLVMGetGlobalParent( 1165 LLVMGetBasicBlockParent(LLVMGetInsertBlock(bld.base.builder))); 1166 LLVMDumpModule(module); 1167 } 1168 1169 FREE(bld.instructions); 1170} 1171 1172