1/************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * TGSI to LLVM IR translation -- AoS. 31 * 32 * FIXME: 33 * - No control flow support: the existing control flow code should be factored 34 * out into from the SoA code into a common module and shared. 35 * - No derivatives. Derivate logic should be pluggable, just like the samplers. 36 * 37 * @author Jose Fonseca <jfonseca@vmware.com> 38 */ 39 40#include "pipe/p_config.h" 41#include "pipe/p_shader_tokens.h" 42#include "util/u_debug.h" 43#include "util/u_math.h" 44#include "util/u_memory.h" 45#include "tgsi/tgsi_dump.h" 46#include "tgsi/tgsi_info.h" 47#include "tgsi/tgsi_parse.h" 48#include "tgsi/tgsi_util.h" 49#include "tgsi/tgsi_scan.h" 50#include "lp_bld_type.h" 51#include "lp_bld_const.h" 52#include "lp_bld_arit.h" 53#include "lp_bld_logic.h" 54#include "lp_bld_swizzle.h" 55#include "lp_bld_flow.h" 56#include "lp_bld_quad.h" 57#include "lp_bld_tgsi.h" 58#include "lp_bld_debug.h" 59#include "lp_bld_sample.h" 60 61 62/** 63 * Wrapper around lp_build_swizzle_aos which translates swizzles to another 64 * ordering. 65 */ 66static LLVMValueRef 67swizzle_aos(struct lp_build_tgsi_context *bld_base, 68 LLVMValueRef a, 69 unsigned swizzle_x, 70 unsigned swizzle_y, 71 unsigned swizzle_z, 72 unsigned swizzle_w) 73{ 74 unsigned char swizzles[4]; 75 struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base); 76 77 assert(swizzle_x < 4); 78 assert(swizzle_y < 4); 79 assert(swizzle_z < 4); 80 assert(swizzle_w < 4); 81 82 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x]; 83 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y]; 84 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z]; 85 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w]; 86 87 return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles); 88} 89 90 91static LLVMValueRef 92swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld, 93 LLVMValueRef a, 94 unsigned chan) 95{ 96 chan = bld->swizzles[chan]; 97 return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan); 98} 99 100 101static LLVMValueRef 102emit_fetch_constant( 103 struct lp_build_tgsi_context * bld_base, 104 const struct tgsi_full_src_register * reg, 105 enum tgsi_opcode_type stype, 106 unsigned swizzle) 107{ 108 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 109 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 110 struct lp_type type = bld_base->base.type; 111 LLVMValueRef res; 112 unsigned chan; 113 114 assert(!reg->Register.Indirect); 115 116 /* 117 * Get the constants components 118 */ 119 120 res = bld->bld_base.base.undef; 121 for (chan = 0; chan < 4; ++chan) { 122 LLVMValueRef index; 123 LLVMValueRef scalar_ptr; 124 LLVMValueRef scalar; 125 LLVMValueRef swizzle; 126 127 index = lp_build_const_int32(bld->bld_base.base.gallivm, 128 reg->Register.Index * 4 + chan); 129 130 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, ""); 131 132 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 133 134 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); 135 136 /* 137 * NOTE: constants array is always assumed to be RGBA 138 */ 139 140 swizzle = lp_build_const_int32(bld->bld_base.base.gallivm, 141 bld->swizzles[chan]); 142 143 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, ""); 144 } 145 146 /* 147 * Broadcast the first quaternion to all others. 148 * 149 * XXX: could be factored into a reusable function. 150 */ 151 152 if (type.length > 4) { 153 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 154 unsigned i; 155 156 for (chan = 0; chan < 4; ++chan) { 157 shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan); 158 } 159 160 for (i = 4; i < type.length; ++i) { 161 shuffles[i] = shuffles[i % 4]; 162 } 163 164 res = LLVMBuildShuffleVector(builder, 165 res, bld->bld_base.base.undef, 166 LLVMConstVector(shuffles, type.length), 167 ""); 168 } 169 return res; 170} 171 172static LLVMValueRef 173emit_fetch_immediate( 174 struct lp_build_tgsi_context * bld_base, 175 const struct tgsi_full_src_register * reg, 176 enum tgsi_opcode_type stype, 177 unsigned swizzle) 178{ 179 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 180 LLVMValueRef res = bld->immediates[reg->Register.Index]; 181 assert(res); 182 return res; 183} 184 185static LLVMValueRef 186emit_fetch_input( 187 struct lp_build_tgsi_context * bld_base, 188 const struct tgsi_full_src_register * reg, 189 enum tgsi_opcode_type stype, 190 unsigned swizzle) 191{ 192 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 193 LLVMValueRef res = bld->inputs[reg->Register.Index]; 194 assert(!reg->Register.Indirect); 195 assert(res); 196 return res; 197} 198 199static LLVMValueRef 200emit_fetch_temporary( 201 struct lp_build_tgsi_context * bld_base, 202 const struct tgsi_full_src_register * reg, 203 enum tgsi_opcode_type stype, 204 unsigned swizzle) 205{ 206 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 207 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 208 LLVMValueRef temp_ptr = bld->temps[reg->Register.Index]; 209 LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, ""); 210 assert(!reg->Register.Indirect); 211 if (!res) 212 return bld->bld_base.base.undef; 213 214 return res; 215} 216 217/** 218 * Register store. 219 */ 220void 221lp_emit_store_aos( 222 struct lp_build_tgsi_aos_context *bld, 223 const struct tgsi_full_instruction *inst, 224 unsigned index, 225 LLVMValueRef value) 226{ 227 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 228 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 229 LLVMValueRef mask = NULL; 230 LLVMValueRef ptr; 231 232 /* 233 * Saturate the value 234 */ 235 236 switch (inst->Instruction.Saturate) { 237 case TGSI_SAT_NONE: 238 break; 239 240 case TGSI_SAT_ZERO_ONE: 241 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero); 242 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 243 break; 244 245 case TGSI_SAT_MINUS_PLUS_ONE: 246 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0)); 247 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 248 break; 249 250 default: 251 assert(0); 252 } 253 254 /* 255 * Translate the register file 256 */ 257 258 assert(!reg->Register.Indirect); 259 260 switch (reg->Register.File) { 261 case TGSI_FILE_OUTPUT: 262 ptr = bld->outputs[reg->Register.Index]; 263 break; 264 265 case TGSI_FILE_TEMPORARY: 266 ptr = bld->temps[reg->Register.Index]; 267 break; 268 269 case TGSI_FILE_ADDRESS: 270 ptr = bld->addr[reg->Indirect.Index]; 271 break; 272 273 case TGSI_FILE_PREDICATE: 274 ptr = bld->preds[reg->Register.Index]; 275 break; 276 277 default: 278 assert(0); 279 return; 280 } 281 282 if (!ptr) 283 return; 284 /* 285 * Predicate 286 */ 287 288 if (inst->Instruction.Predicate) { 289 LLVMValueRef pred; 290 291 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS); 292 293 pred = LLVMBuildLoad(builder, 294 bld->preds[inst->Predicate.Index], ""); 295 296 /* 297 * Convert the value to an integer mask. 298 */ 299 pred = lp_build_compare(bld->bld_base.base.gallivm, 300 bld->bld_base.base.type, 301 PIPE_FUNC_NOTEQUAL, 302 pred, 303 bld->bld_base.base.zero); 304 305 if (inst->Predicate.Negate) { 306 pred = LLVMBuildNot(builder, pred, ""); 307 } 308 309 pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred, 310 inst->Predicate.SwizzleX, 311 inst->Predicate.SwizzleY, 312 inst->Predicate.SwizzleZ, 313 inst->Predicate.SwizzleW); 314 315 if (mask) { 316 mask = LLVMBuildAnd(builder, mask, pred, ""); 317 } else { 318 mask = pred; 319 } 320 } 321 322 /* 323 * Writemask 324 */ 325 326 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { 327 LLVMValueRef writemask; 328 329 writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm, 330 bld->bld_base.base.type, 331 reg->Register.WriteMask, 332 bld->swizzles); 333 334 if (mask) { 335 mask = LLVMBuildAnd(builder, mask, writemask, ""); 336 } else { 337 mask = writemask; 338 } 339 } 340 341 if (mask) { 342 LLVMValueRef orig_value; 343 344 orig_value = LLVMBuildLoad(builder, ptr, ""); 345 value = lp_build_select(&bld->bld_base.base, 346 mask, value, orig_value); 347 } 348 349 LLVMBuildStore(builder, value, ptr); 350} 351 352 353/** 354 * High-level instruction translators. 355 */ 356 357static LLVMValueRef 358emit_tex(struct lp_build_tgsi_aos_context *bld, 359 const struct tgsi_full_instruction *inst, 360 enum lp_build_tex_modifier modifier) 361{ 362 unsigned target; 363 unsigned unit; 364 LLVMValueRef coords; 365 LLVMValueRef ddx; 366 LLVMValueRef ddy; 367 struct lp_derivatives derivs; 368 369 if (!bld->sampler) { 370 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 371 return bld->bld_base.base.undef; 372 } 373 374 target = inst->Texture.Texture; 375 376 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL); 377 378 if (0 && modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 379 ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL); 380 ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL); 381 unit = inst->Src[3].Register.Index; 382 } else { 383#if 0 384 ddx = lp_build_ddx( &bld->bld_base.base, coords ); 385 ddy = lp_build_ddy( &bld->bld_base.base, coords ); 386#else 387 /* TODO */ 388 derivs.ddx_ddy[0] = bld->bld_base.base.one; 389 derivs.ddx_ddy[1] = bld->bld_base.base.one; 390#endif 391 unit = inst->Src[1].Register.Index; 392 } 393 394 return bld->sampler->emit_fetch_texel(bld->sampler, 395 &bld->bld_base.base, 396 target, unit, 397 coords, derivs, 398 modifier); 399} 400 401 402void 403lp_emit_declaration_aos( 404 struct lp_build_tgsi_aos_context *bld, 405 const struct tgsi_full_declaration *decl) 406{ 407 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 408 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type); 409 410 unsigned first = decl->Range.First; 411 unsigned last = decl->Range.Last; 412 unsigned idx; 413 414 for (idx = first; idx <= last; ++idx) { 415 switch (decl->Declaration.File) { 416 case TGSI_FILE_TEMPORARY: 417 assert(idx < LP_MAX_TGSI_TEMPS); 418 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 419 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1); 420 bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm, 421 vec_type, array_size, ""); 422 } else { 423 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, ""); 424 } 425 break; 426 427 case TGSI_FILE_OUTPUT: 428 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, ""); 429 break; 430 431 case TGSI_FILE_ADDRESS: 432 assert(idx < LP_MAX_TGSI_ADDRS); 433 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, ""); 434 break; 435 436 case TGSI_FILE_PREDICATE: 437 assert(idx < LP_MAX_TGSI_PREDS); 438 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, ""); 439 break; 440 441 default: 442 /* don't need to declare other vars */ 443 break; 444 } 445 } 446} 447 448 449/** 450 * Emit LLVM for one TGSI instruction. 451 * \param return TRUE for success, FALSE otherwise 452 */ 453boolean 454lp_emit_instruction_aos( 455 struct lp_build_tgsi_aos_context *bld, 456 const struct tgsi_full_instruction *inst, 457 const struct tgsi_opcode_info *info, 458 int *pc) 459{ 460 LLVMValueRef src0, src1, src2; 461 LLVMValueRef tmp0, tmp1; 462 LLVMValueRef dst0 = NULL; 463 464 /* 465 * Stores and write masks are handled in a general fashion after the long 466 * instruction opcode switch statement. 467 * 468 * Although not stricitly necessary, we avoid generating instructions for 469 * channels which won't be stored, in cases where's that easy. For some 470 * complex instructions, like texture sampling, it is more convenient to 471 * assume a full writemask and then let LLVM optimization passes eliminate 472 * redundant code. 473 */ 474 475 (*pc)++; 476 477 assert(info->num_dst <= 1); 478 if (info->num_dst) { 479 dst0 = bld->bld_base.base.undef; 480 } 481 482 switch (inst->Instruction.Opcode) { 483 case TGSI_OPCODE_ARL: 484 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 485 dst0 = lp_build_floor(&bld->bld_base.base, src0); 486 break; 487 488 case TGSI_OPCODE_MOV: 489 dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 490 break; 491 492 case TGSI_OPCODE_LIT: 493 return FALSE; 494 495 case TGSI_OPCODE_RCP: 496 /* TGSI_OPCODE_RECIP */ 497 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 498 dst0 = lp_build_rcp(&bld->bld_base.base, src0); 499 break; 500 501 case TGSI_OPCODE_RSQ: 502 /* TGSI_OPCODE_RECIPSQRT */ 503 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 504 tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0); 505 dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0); 506 break; 507 508 case TGSI_OPCODE_EXP: 509 return FALSE; 510 511 case TGSI_OPCODE_LOG: 512 return FALSE; 513 514 case TGSI_OPCODE_MUL: 515 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 516 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 517 dst0 = lp_build_mul(&bld->bld_base.base, src0, src1); 518 break; 519 520 case TGSI_OPCODE_ADD: 521 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 522 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 523 dst0 = lp_build_add(&bld->bld_base.base, src0, src1); 524 break; 525 526 case TGSI_OPCODE_DP3: 527 /* TGSI_OPCODE_DOT3 */ 528 return FALSE; 529 530 case TGSI_OPCODE_DP4: 531 /* TGSI_OPCODE_DOT4 */ 532 return FALSE; 533 534 case TGSI_OPCODE_DST: 535 return FALSE; 536 537 case TGSI_OPCODE_MIN: 538 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 539 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 540 dst0 = lp_build_max(&bld->bld_base.base, src0, src1); 541 break; 542 543 case TGSI_OPCODE_MAX: 544 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 545 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 546 dst0 = lp_build_max(&bld->bld_base.base, src0, src1); 547 break; 548 549 case TGSI_OPCODE_SLT: 550 /* TGSI_OPCODE_SETLT */ 551 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 552 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 553 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1); 554 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 555 break; 556 557 case TGSI_OPCODE_SGE: 558 /* TGSI_OPCODE_SETGE */ 559 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 560 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 561 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1); 562 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 563 break; 564 565 case TGSI_OPCODE_MAD: 566 /* TGSI_OPCODE_MADD */ 567 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 568 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 569 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 570 tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1); 571 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); 572 break; 573 574 case TGSI_OPCODE_SUB: 575 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 576 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 577 dst0 = lp_build_sub(&bld->bld_base.base, src0, src1); 578 break; 579 580 case TGSI_OPCODE_LRP: 581 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 582 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 583 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 584 tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2); 585 tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0); 586 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); 587 break; 588 589 case TGSI_OPCODE_CND: 590 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 591 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 592 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 593 tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5); 594 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1); 595 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1); 596 break; 597 598 case TGSI_OPCODE_DP2A: 599 return FALSE; 600 601 case TGSI_OPCODE_FRC: 602 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 603 tmp0 = lp_build_floor(&bld->bld_base.base, src0); 604 dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0); 605 break; 606 607 case TGSI_OPCODE_CLAMP: 608 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 609 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 610 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 611 tmp0 = lp_build_max(&bld->bld_base.base, src0, src1); 612 dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2); 613 break; 614 615 case TGSI_OPCODE_FLR: 616 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 617 dst0 = lp_build_floor(&bld->bld_base.base, src0); 618 break; 619 620 case TGSI_OPCODE_ROUND: 621 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 622 dst0 = lp_build_round(&bld->bld_base.base, src0); 623 break; 624 625 case TGSI_OPCODE_EX2: 626 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 627 tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X); 628 dst0 = lp_build_exp2(&bld->bld_base.base, tmp0); 629 break; 630 631 case TGSI_OPCODE_LG2: 632 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 633 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 634 dst0 = lp_build_log2(&bld->bld_base.base, tmp0); 635 break; 636 637 case TGSI_OPCODE_POW: 638 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 639 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 640 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 641 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X); 642 dst0 = lp_build_pow(&bld->bld_base.base, src0, src1); 643 break; 644 645 case TGSI_OPCODE_XPD: 646 return FALSE; 647 648 case TGSI_OPCODE_RCC: 649 /* deprecated? */ 650 assert(0); 651 return FALSE; 652 653 case TGSI_OPCODE_DPH: 654 return FALSE; 655 656 case TGSI_OPCODE_COS: 657 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 658 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 659 dst0 = lp_build_cos(&bld->bld_base.base, tmp0); 660 break; 661 662 case TGSI_OPCODE_DDX: 663 return FALSE; 664 665 case TGSI_OPCODE_DDY: 666 return FALSE; 667 668 case TGSI_OPCODE_KILP: 669 /* predicated kill */ 670 return FALSE; 671 672 case TGSI_OPCODE_KIL: 673 /* conditional kill */ 674 return FALSE; 675 676 case TGSI_OPCODE_PK2H: 677 return FALSE; 678 break; 679 680 case TGSI_OPCODE_PK2US: 681 return FALSE; 682 break; 683 684 case TGSI_OPCODE_PK4B: 685 return FALSE; 686 break; 687 688 case TGSI_OPCODE_PK4UB: 689 return FALSE; 690 691 case TGSI_OPCODE_RFL: 692 return FALSE; 693 694 case TGSI_OPCODE_SEQ: 695 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 696 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 697 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1); 698 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 699 break; 700 701 case TGSI_OPCODE_SFL: 702 dst0 = bld->bld_base.base.zero; 703 break; 704 705 case TGSI_OPCODE_SGT: 706 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 707 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 708 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1); 709 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 710 break; 711 712 case TGSI_OPCODE_SIN: 713 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 714 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 715 dst0 = lp_build_sin(&bld->bld_base.base, tmp0); 716 break; 717 718 case TGSI_OPCODE_SLE: 719 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 720 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 721 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1); 722 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 723 break; 724 725 case TGSI_OPCODE_SNE: 726 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 727 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 728 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1); 729 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 730 break; 731 732 case TGSI_OPCODE_STR: 733 dst0 = bld->bld_base.base.one; 734 break; 735 736 case TGSI_OPCODE_TEX: 737 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE); 738 break; 739 740 case TGSI_OPCODE_TXD: 741 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); 742 break; 743 744 case TGSI_OPCODE_UP2H: 745 /* deprecated */ 746 assert (0); 747 return FALSE; 748 break; 749 750 case TGSI_OPCODE_UP2US: 751 /* deprecated */ 752 assert(0); 753 return FALSE; 754 break; 755 756 case TGSI_OPCODE_UP4B: 757 /* deprecated */ 758 assert(0); 759 return FALSE; 760 break; 761 762 case TGSI_OPCODE_UP4UB: 763 /* deprecated */ 764 assert(0); 765 return FALSE; 766 break; 767 768 case TGSI_OPCODE_X2D: 769 /* deprecated? */ 770 assert(0); 771 return FALSE; 772 break; 773 774 case TGSI_OPCODE_ARA: 775 /* deprecated */ 776 assert(0); 777 return FALSE; 778 break; 779 780 case TGSI_OPCODE_ARR: 781 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 782 dst0 = lp_build_round(&bld->bld_base.base, src0); 783 break; 784 785 case TGSI_OPCODE_BRA: 786 /* deprecated */ 787 assert(0); 788 return FALSE; 789 break; 790 791 case TGSI_OPCODE_CAL: 792 return FALSE; 793 794 case TGSI_OPCODE_RET: 795 return FALSE; 796 797 case TGSI_OPCODE_END: 798 *pc = -1; 799 break; 800 801 case TGSI_OPCODE_SSG: 802 /* TGSI_OPCODE_SGN */ 803 tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 804 dst0 = lp_build_sgn(&bld->bld_base.base, tmp0); 805 break; 806 807 case TGSI_OPCODE_CMP: 808 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 809 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 810 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 811 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero); 812 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2); 813 break; 814 815 case TGSI_OPCODE_SCS: 816 return FALSE; 817 818 case TGSI_OPCODE_TXB: 819 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); 820 break; 821 822 case TGSI_OPCODE_NRM: 823 /* fall-through */ 824 case TGSI_OPCODE_NRM4: 825 return FALSE; 826 827 case TGSI_OPCODE_DIV: 828 /* deprecated */ 829 assert(0); 830 return FALSE; 831 break; 832 833 case TGSI_OPCODE_DP2: 834 return FALSE; 835 836 case TGSI_OPCODE_TXL: 837 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); 838 break; 839 840 case TGSI_OPCODE_TXP: 841 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED); 842 break; 843 844 case TGSI_OPCODE_BRK: 845 return FALSE; 846 847 case TGSI_OPCODE_IF: 848 return FALSE; 849 850 case TGSI_OPCODE_BGNLOOP: 851 return FALSE; 852 853 case TGSI_OPCODE_BGNSUB: 854 return FALSE; 855 856 case TGSI_OPCODE_ELSE: 857 return FALSE; 858 859 case TGSI_OPCODE_ENDIF: 860 return FALSE; 861 862 case TGSI_OPCODE_ENDLOOP: 863 return FALSE; 864 865 case TGSI_OPCODE_ENDSUB: 866 return FALSE; 867 868 case TGSI_OPCODE_PUSHA: 869 /* deprecated? */ 870 assert(0); 871 return FALSE; 872 break; 873 874 case TGSI_OPCODE_POPA: 875 /* deprecated? */ 876 assert(0); 877 return FALSE; 878 break; 879 880 case TGSI_OPCODE_CEIL: 881 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 882 dst0 = lp_build_ceil(&bld->bld_base.base, src0); 883 break; 884 885 case TGSI_OPCODE_I2F: 886 /* deprecated? */ 887 assert(0); 888 return FALSE; 889 break; 890 891 case TGSI_OPCODE_NOT: 892 /* deprecated? */ 893 assert(0); 894 return FALSE; 895 break; 896 897 case TGSI_OPCODE_TRUNC: 898 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 899 dst0 = lp_build_trunc(&bld->bld_base.base, src0); 900 break; 901 902 case TGSI_OPCODE_SHL: 903 /* deprecated? */ 904 assert(0); 905 return FALSE; 906 break; 907 908 case TGSI_OPCODE_ISHR: 909 /* deprecated? */ 910 assert(0); 911 return FALSE; 912 break; 913 914 case TGSI_OPCODE_AND: 915 /* deprecated? */ 916 assert(0); 917 return FALSE; 918 break; 919 920 case TGSI_OPCODE_OR: 921 /* deprecated? */ 922 assert(0); 923 return FALSE; 924 break; 925 926 case TGSI_OPCODE_MOD: 927 /* deprecated? */ 928 assert(0); 929 return FALSE; 930 break; 931 932 case TGSI_OPCODE_XOR: 933 /* deprecated? */ 934 assert(0); 935 return FALSE; 936 break; 937 938 case TGSI_OPCODE_SAD: 939 /* deprecated? */ 940 assert(0); 941 return FALSE; 942 break; 943 944 case TGSI_OPCODE_TXF: 945 /* deprecated? */ 946 assert(0); 947 return FALSE; 948 break; 949 950 case TGSI_OPCODE_TXQ: 951 /* deprecated? */ 952 assert(0); 953 return FALSE; 954 break; 955 956 case TGSI_OPCODE_CONT: 957 return FALSE; 958 959 case TGSI_OPCODE_EMIT: 960 return FALSE; 961 break; 962 963 case TGSI_OPCODE_ENDPRIM: 964 return FALSE; 965 break; 966 967 case TGSI_OPCODE_NOP: 968 break; 969 970 default: 971 return FALSE; 972 } 973 974 if (info->num_dst) { 975 lp_emit_store_aos(bld, inst, 0, dst0); 976 } 977 978 return TRUE; 979} 980 981 982void 983lp_build_tgsi_aos(struct gallivm_state *gallivm, 984 const struct tgsi_token *tokens, 985 struct lp_type type, 986 const unsigned char swizzles[4], 987 LLVMValueRef consts_ptr, 988 const LLVMValueRef *inputs, 989 LLVMValueRef *outputs, 990 struct lp_build_sampler_aos *sampler, 991 const struct tgsi_shader_info *info) 992{ 993 struct lp_build_tgsi_aos_context bld; 994 struct tgsi_parse_context parse; 995 uint num_immediates = 0; 996 unsigned chan; 997 int pc = 0; 998 999 /* Setup build context */ 1000 memset(&bld, 0, sizeof bld); 1001 lp_build_context_init(&bld.bld_base.base, gallivm, type); 1002 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); 1003 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); 1004 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type)); 1005 1006 for (chan = 0; chan < 4; ++chan) { 1007 bld.swizzles[chan] = swizzles[chan]; 1008 bld.inv_swizzles[swizzles[chan]] = chan; 1009 } 1010 1011 bld.inputs = inputs; 1012 bld.outputs = outputs; 1013 bld.consts_ptr = consts_ptr; 1014 bld.sampler = sampler; 1015 bld.indirect_files = info->indirect_files; 1016 bld.bld_base.emit_swizzle = swizzle_aos; 1017 bld.bld_base.info = info; 1018 1019 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; 1020 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; 1021 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; 1022 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; 1023 1024 /* Set opcode actions */ 1025 lp_set_default_actions_cpu(&bld.bld_base); 1026 1027 if (!lp_bld_tgsi_list_init(&bld.bld_base)) { 1028 return; 1029 } 1030 1031 tgsi_parse_init(&parse, tokens); 1032 1033 while (!tgsi_parse_end_of_tokens(&parse)) { 1034 tgsi_parse_token(&parse); 1035 1036 switch(parse.FullToken.Token.Type) { 1037 case TGSI_TOKEN_TYPE_DECLARATION: 1038 /* Inputs already interpolated */ 1039 lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration); 1040 break; 1041 1042 case TGSI_TOKEN_TYPE_INSTRUCTION: 1043 /* save expanded instruction */ 1044 lp_bld_tgsi_add_instruction(&bld.bld_base, 1045 &parse.FullToken.FullInstruction); 1046 break; 1047 1048 case TGSI_TOKEN_TYPE_IMMEDIATE: 1049 /* simply copy the immediate values into the next immediates[] slot */ 1050 { 1051 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1052 float imm[4]; 1053 assert(size <= 4); 1054 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 1055 for (chan = 0; chan < 4; ++chan) { 1056 imm[chan] = 0.0f; 1057 } 1058 for (chan = 0; chan < size; ++chan) { 1059 unsigned swizzle = bld.swizzles[chan]; 1060 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float; 1061 } 1062 bld.immediates[num_immediates] = 1063 lp_build_const_aos(gallivm, type, 1064 imm[0], imm[1], imm[2], imm[3], 1065 NULL); 1066 num_immediates++; 1067 } 1068 break; 1069 1070 case TGSI_TOKEN_TYPE_PROPERTY: 1071 break; 1072 1073 default: 1074 assert(0); 1075 } 1076 } 1077 1078 while (pc != -1) { 1079 struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc; 1080 const struct tgsi_opcode_info *opcode_info = 1081 tgsi_get_opcode_info(instr->Instruction.Opcode); 1082 if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc)) 1083 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 1084 opcode_info->mnemonic); 1085 } 1086 1087 if (0) { 1088 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 1089 LLVMValueRef function = LLVMGetBasicBlockParent(block); 1090 debug_printf("11111111111111111111111111111 \n"); 1091 tgsi_dump(tokens, 0); 1092 lp_debug_dump_value(function); 1093 debug_printf("2222222222222222222222222222 \n"); 1094 } 1095 tgsi_parse_free(&parse); 1096 FREE(bld.bld_base.instructions); 1097 1098 if (0) { 1099 LLVMModuleRef module = LLVMGetGlobalParent( 1100 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 1101 LLVMDumpModule(module); 1102 } 1103 1104} 1105 1106