lp_bld_tgsi_aos.c revision c23fd547c060c4137eab0f878a1028c5903384eb
1/************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * TGSI to LLVM IR translation -- AoS. 31 * 32 * FIXME: 33 * - No control flow support: the existing control flow code should be factored 34 * out into from the SoA code into a common module and shared. 35 * - No derivatives. Derivate logic should be pluggable, just like the samplers. 36 * 37 * @author Jose Fonseca <jfonseca@vmware.com> 38 */ 39 40#include "pipe/p_config.h" 41#include "pipe/p_shader_tokens.h" 42#include "util/u_debug.h" 43#include "util/u_math.h" 44#include "util/u_memory.h" 45#include "tgsi/tgsi_dump.h" 46#include "tgsi/tgsi_info.h" 47#include "tgsi/tgsi_parse.h" 48#include "tgsi/tgsi_util.h" 49#include "tgsi/tgsi_scan.h" 50#include "lp_bld_type.h" 51#include "lp_bld_const.h" 52#include "lp_bld_arit.h" 53#include "lp_bld_logic.h" 54#include "lp_bld_swizzle.h" 55#include "lp_bld_flow.h" 56#include "lp_bld_quad.h" 57#include "lp_bld_tgsi.h" 58#include "lp_bld_debug.h" 59 60 61/** 62 * Wrapper around lp_build_swizzle_aos which translates swizzles to another 63 * ordering. 64 */ 65static LLVMValueRef 66swizzle_aos(struct lp_build_tgsi_context *bld_base, 67 LLVMValueRef a, 68 unsigned swizzle_x, 69 unsigned swizzle_y, 70 unsigned swizzle_z, 71 unsigned swizzle_w) 72{ 73 unsigned char swizzles[4]; 74 struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base); 75 76 assert(swizzle_x < 4); 77 assert(swizzle_y < 4); 78 assert(swizzle_z < 4); 79 assert(swizzle_w < 4); 80 81 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x]; 82 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y]; 83 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z]; 84 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w]; 85 86 return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles); 87} 88 89 90static LLVMValueRef 91swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld, 92 LLVMValueRef a, 93 unsigned chan) 94{ 95 chan = bld->swizzles[chan]; 96 return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan); 97} 98 99 100static LLVMValueRef 101emit_fetch_constant( 102 struct lp_build_tgsi_context * bld_base, 103 const struct tgsi_full_src_register * reg, 104 enum tgsi_opcode_type stype, 105 unsigned swizzle) 106{ 107 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 108 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 109 struct lp_type type = bld_base->base.type; 110 LLVMValueRef res; 111 unsigned chan; 112 113 assert(!reg->Register.Indirect); 114 115 /* 116 * Get the constants components 117 */ 118 119 res = bld->bld_base.base.undef; 120 for (chan = 0; chan < 4; ++chan) { 121 LLVMValueRef index; 122 LLVMValueRef scalar_ptr; 123 LLVMValueRef scalar; 124 LLVMValueRef swizzle; 125 126 index = lp_build_const_int32(bld->bld_base.base.gallivm, 127 reg->Register.Index * 4 + chan); 128 129 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, ""); 130 131 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 132 133 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); 134 135 /* 136 * NOTE: constants array is always assumed to be RGBA 137 */ 138 139 swizzle = lp_build_const_int32(bld->bld_base.base.gallivm, 140 bld->swizzles[chan]); 141 142 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, ""); 143 } 144 145 /* 146 * Broadcast the first quaternion to all others. 147 * 148 * XXX: could be factored into a reusable function. 149 */ 150 151 if (type.length > 4) { 152 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 153 unsigned i; 154 155 for (chan = 0; chan < 4; ++chan) { 156 shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan); 157 } 158 159 for (i = 4; i < type.length; ++i) { 160 shuffles[i] = shuffles[i % 4]; 161 } 162 163 res = LLVMBuildShuffleVector(builder, 164 res, bld->bld_base.base.undef, 165 LLVMConstVector(shuffles, type.length), 166 ""); 167 } 168 return res; 169} 170 171static LLVMValueRef 172emit_fetch_immediate( 173 struct lp_build_tgsi_context * bld_base, 174 const struct tgsi_full_src_register * reg, 175 enum tgsi_opcode_type stype, 176 unsigned swizzle) 177{ 178 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 179 LLVMValueRef res = bld->immediates[reg->Register.Index]; 180 assert(res); 181 return res; 182} 183 184static LLVMValueRef 185emit_fetch_input( 186 struct lp_build_tgsi_context * bld_base, 187 const struct tgsi_full_src_register * reg, 188 enum tgsi_opcode_type stype, 189 unsigned swizzle) 190{ 191 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 192 LLVMValueRef res = bld->inputs[reg->Register.Index]; 193 assert(!reg->Register.Indirect); 194 assert(res); 195 return res; 196} 197 198static LLVMValueRef 199emit_fetch_temporary( 200 struct lp_build_tgsi_context * bld_base, 201 const struct tgsi_full_src_register * reg, 202 enum tgsi_opcode_type stype, 203 unsigned swizzle) 204{ 205 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 206 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 207 LLVMValueRef temp_ptr = bld->temps[reg->Register.Index]; 208 LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, ""); 209 assert(!reg->Register.Indirect); 210 if (!res) 211 return bld->bld_base.base.undef; 212 213 return res; 214} 215 216/** 217 * Register store. 218 */ 219void 220lp_emit_store_aos( 221 struct lp_build_tgsi_aos_context *bld, 222 const struct tgsi_full_instruction *inst, 223 unsigned index, 224 LLVMValueRef value) 225{ 226 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 227 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 228 LLVMValueRef mask = NULL; 229 LLVMValueRef ptr; 230 231 /* 232 * Saturate the value 233 */ 234 235 switch (inst->Instruction.Saturate) { 236 case TGSI_SAT_NONE: 237 break; 238 239 case TGSI_SAT_ZERO_ONE: 240 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero); 241 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 242 break; 243 244 case TGSI_SAT_MINUS_PLUS_ONE: 245 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0)); 246 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 247 break; 248 249 default: 250 assert(0); 251 } 252 253 /* 254 * Translate the register file 255 */ 256 257 assert(!reg->Register.Indirect); 258 259 switch (reg->Register.File) { 260 case TGSI_FILE_OUTPUT: 261 ptr = bld->outputs[reg->Register.Index]; 262 break; 263 264 case TGSI_FILE_TEMPORARY: 265 ptr = bld->temps[reg->Register.Index]; 266 break; 267 268 case TGSI_FILE_ADDRESS: 269 ptr = bld->addr[reg->Indirect.Index]; 270 break; 271 272 case TGSI_FILE_PREDICATE: 273 ptr = bld->preds[reg->Register.Index]; 274 break; 275 276 default: 277 assert(0); 278 return; 279 } 280 281 if (!ptr) 282 return; 283 /* 284 * Predicate 285 */ 286 287 if (inst->Instruction.Predicate) { 288 LLVMValueRef pred; 289 290 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS); 291 292 pred = LLVMBuildLoad(builder, 293 bld->preds[inst->Predicate.Index], ""); 294 295 /* 296 * Convert the value to an integer mask. 297 */ 298 pred = lp_build_compare(bld->bld_base.base.gallivm, 299 bld->bld_base.base.type, 300 PIPE_FUNC_NOTEQUAL, 301 pred, 302 bld->bld_base.base.zero); 303 304 if (inst->Predicate.Negate) { 305 pred = LLVMBuildNot(builder, pred, ""); 306 } 307 308 pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred, 309 inst->Predicate.SwizzleX, 310 inst->Predicate.SwizzleY, 311 inst->Predicate.SwizzleZ, 312 inst->Predicate.SwizzleW); 313 314 if (mask) { 315 mask = LLVMBuildAnd(builder, mask, pred, ""); 316 } else { 317 mask = pred; 318 } 319 } 320 321 /* 322 * Writemask 323 */ 324 325 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { 326 LLVMValueRef writemask; 327 328 writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm, 329 bld->bld_base.base.type, 330 reg->Register.WriteMask, 331 bld->swizzles); 332 333 if (mask) { 334 mask = LLVMBuildAnd(builder, mask, writemask, ""); 335 } else { 336 mask = writemask; 337 } 338 } 339 340 if (mask) { 341 LLVMValueRef orig_value; 342 343 orig_value = LLVMBuildLoad(builder, ptr, ""); 344 value = lp_build_select(&bld->bld_base.base, 345 mask, value, orig_value); 346 } 347 348 LLVMBuildStore(builder, value, ptr); 349} 350 351 352/** 353 * High-level instruction translators. 354 */ 355 356static LLVMValueRef 357emit_tex(struct lp_build_tgsi_aos_context *bld, 358 const struct tgsi_full_instruction *inst, 359 enum lp_build_tex_modifier modifier) 360{ 361 unsigned target; 362 unsigned unit; 363 LLVMValueRef coords; 364 LLVMValueRef ddx; 365 LLVMValueRef ddy; 366 367 if (!bld->sampler) { 368 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 369 return bld->bld_base.base.undef; 370 } 371 372 target = inst->Texture.Texture; 373 374 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL); 375 376 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 377 ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL); 378 ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL); 379 unit = inst->Src[3].Register.Index; 380 } else { 381#if 0 382 ddx = lp_build_ddx( &bld->bld_base.base, coords ); 383 ddy = lp_build_ddy( &bld->bld_base.base, coords ); 384#else 385 /* TODO */ 386 ddx = bld->bld_base.base.one; 387 ddy = bld->bld_base.base.one; 388#endif 389 unit = inst->Src[1].Register.Index; 390 } 391 392 return bld->sampler->emit_fetch_texel(bld->sampler, 393 &bld->bld_base.base, 394 target, unit, 395 coords, ddx, ddy, 396 modifier); 397} 398 399 400void 401lp_emit_declaration_aos( 402 struct lp_build_tgsi_aos_context *bld, 403 const struct tgsi_full_declaration *decl) 404{ 405 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 406 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type); 407 408 unsigned first = decl->Range.First; 409 unsigned last = decl->Range.Last; 410 unsigned idx; 411 412 for (idx = first; idx <= last; ++idx) { 413 switch (decl->Declaration.File) { 414 case TGSI_FILE_TEMPORARY: 415 assert(idx < LP_MAX_TGSI_TEMPS); 416 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 417 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1); 418 bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm, 419 vec_type, array_size, ""); 420 } else { 421 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, ""); 422 } 423 break; 424 425 case TGSI_FILE_OUTPUT: 426 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, ""); 427 break; 428 429 case TGSI_FILE_ADDRESS: 430 assert(idx < LP_MAX_TGSI_ADDRS); 431 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, ""); 432 break; 433 434 case TGSI_FILE_PREDICATE: 435 assert(idx < LP_MAX_TGSI_PREDS); 436 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, ""); 437 break; 438 439 default: 440 /* don't need to declare other vars */ 441 break; 442 } 443 } 444} 445 446 447/** 448 * Emit LLVM for one TGSI instruction. 449 * \param return TRUE for success, FALSE otherwise 450 */ 451boolean 452lp_emit_instruction_aos( 453 struct lp_build_tgsi_aos_context *bld, 454 const struct tgsi_full_instruction *inst, 455 const struct tgsi_opcode_info *info, 456 int *pc) 457{ 458 LLVMValueRef src0, src1, src2; 459 LLVMValueRef tmp0, tmp1; 460 LLVMValueRef dst0 = NULL; 461 462 /* 463 * Stores and write masks are handled in a general fashion after the long 464 * instruction opcode switch statement. 465 * 466 * Although not stricitly necessary, we avoid generating instructions for 467 * channels which won't be stored, in cases where's that easy. For some 468 * complex instructions, like texture sampling, it is more convenient to 469 * assume a full writemask and then let LLVM optimization passes eliminate 470 * redundant code. 471 */ 472 473 (*pc)++; 474 475 assert(info->num_dst <= 1); 476 if (info->num_dst) { 477 dst0 = bld->bld_base.base.undef; 478 } 479 480 switch (inst->Instruction.Opcode) { 481 case TGSI_OPCODE_ARL: 482 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 483 dst0 = lp_build_floor(&bld->bld_base.base, src0); 484 break; 485 486 case TGSI_OPCODE_MOV: 487 dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 488 break; 489 490 case TGSI_OPCODE_LIT: 491 return FALSE; 492 493 case TGSI_OPCODE_RCP: 494 /* TGSI_OPCODE_RECIP */ 495 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 496 dst0 = lp_build_rcp(&bld->bld_base.base, src0); 497 break; 498 499 case TGSI_OPCODE_RSQ: 500 /* TGSI_OPCODE_RECIPSQRT */ 501 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 502 tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0); 503 dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0); 504 break; 505 506 case TGSI_OPCODE_EXP: 507 return FALSE; 508 509 case TGSI_OPCODE_LOG: 510 return FALSE; 511 512 case TGSI_OPCODE_MUL: 513 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 514 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 515 dst0 = lp_build_mul(&bld->bld_base.base, src0, src1); 516 break; 517 518 case TGSI_OPCODE_ADD: 519 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 520 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 521 dst0 = lp_build_add(&bld->bld_base.base, src0, src1); 522 break; 523 524 case TGSI_OPCODE_DP3: 525 /* TGSI_OPCODE_DOT3 */ 526 return FALSE; 527 528 case TGSI_OPCODE_DP4: 529 /* TGSI_OPCODE_DOT4 */ 530 return FALSE; 531 532 case TGSI_OPCODE_DST: 533 return FALSE; 534 535 case TGSI_OPCODE_MIN: 536 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 537 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 538 dst0 = lp_build_max(&bld->bld_base.base, src0, src1); 539 break; 540 541 case TGSI_OPCODE_MAX: 542 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 543 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 544 dst0 = lp_build_max(&bld->bld_base.base, src0, src1); 545 break; 546 547 case TGSI_OPCODE_SLT: 548 /* TGSI_OPCODE_SETLT */ 549 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 550 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 551 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1); 552 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 553 break; 554 555 case TGSI_OPCODE_SGE: 556 /* TGSI_OPCODE_SETGE */ 557 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 558 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 559 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1); 560 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 561 break; 562 563 case TGSI_OPCODE_MAD: 564 /* TGSI_OPCODE_MADD */ 565 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 566 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 567 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 568 tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1); 569 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); 570 break; 571 572 case TGSI_OPCODE_SUB: 573 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 574 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 575 dst0 = lp_build_sub(&bld->bld_base.base, src0, src1); 576 break; 577 578 case TGSI_OPCODE_LRP: 579 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 580 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 581 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 582 tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2); 583 tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0); 584 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); 585 break; 586 587 case TGSI_OPCODE_CND: 588 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 589 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 590 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 591 tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5); 592 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1); 593 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1); 594 break; 595 596 case TGSI_OPCODE_DP2A: 597 return FALSE; 598 599 case TGSI_OPCODE_FRC: 600 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 601 tmp0 = lp_build_floor(&bld->bld_base.base, src0); 602 dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0); 603 break; 604 605 case TGSI_OPCODE_CLAMP: 606 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 607 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 608 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 609 tmp0 = lp_build_max(&bld->bld_base.base, src0, src1); 610 dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2); 611 break; 612 613 case TGSI_OPCODE_FLR: 614 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 615 dst0 = lp_build_floor(&bld->bld_base.base, src0); 616 break; 617 618 case TGSI_OPCODE_ROUND: 619 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 620 dst0 = lp_build_round(&bld->bld_base.base, src0); 621 break; 622 623 case TGSI_OPCODE_EX2: 624 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 625 tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X); 626 dst0 = lp_build_exp2(&bld->bld_base.base, tmp0); 627 break; 628 629 case TGSI_OPCODE_LG2: 630 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 631 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 632 dst0 = lp_build_log2(&bld->bld_base.base, tmp0); 633 break; 634 635 case TGSI_OPCODE_POW: 636 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 637 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 638 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 639 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X); 640 dst0 = lp_build_pow(&bld->bld_base.base, src0, src1); 641 break; 642 643 case TGSI_OPCODE_XPD: 644 return FALSE; 645 646 case TGSI_OPCODE_RCC: 647 /* deprecated? */ 648 assert(0); 649 return FALSE; 650 651 case TGSI_OPCODE_DPH: 652 return FALSE; 653 654 case TGSI_OPCODE_COS: 655 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 656 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 657 dst0 = lp_build_cos(&bld->bld_base.base, tmp0); 658 break; 659 660 case TGSI_OPCODE_DDX: 661 return FALSE; 662 663 case TGSI_OPCODE_DDY: 664 return FALSE; 665 666 case TGSI_OPCODE_KILP: 667 /* predicated kill */ 668 return FALSE; 669 670 case TGSI_OPCODE_KIL: 671 /* conditional kill */ 672 return FALSE; 673 674 case TGSI_OPCODE_PK2H: 675 return FALSE; 676 break; 677 678 case TGSI_OPCODE_PK2US: 679 return FALSE; 680 break; 681 682 case TGSI_OPCODE_PK4B: 683 return FALSE; 684 break; 685 686 case TGSI_OPCODE_PK4UB: 687 return FALSE; 688 689 case TGSI_OPCODE_RFL: 690 return FALSE; 691 692 case TGSI_OPCODE_SEQ: 693 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 694 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 695 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1); 696 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 697 break; 698 699 case TGSI_OPCODE_SFL: 700 dst0 = bld->bld_base.base.zero; 701 break; 702 703 case TGSI_OPCODE_SGT: 704 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 705 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 706 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1); 707 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 708 break; 709 710 case TGSI_OPCODE_SIN: 711 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 712 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 713 dst0 = lp_build_sin(&bld->bld_base.base, tmp0); 714 break; 715 716 case TGSI_OPCODE_SLE: 717 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 718 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 719 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1); 720 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 721 break; 722 723 case TGSI_OPCODE_SNE: 724 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 725 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 726 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1); 727 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 728 break; 729 730 case TGSI_OPCODE_STR: 731 dst0 = bld->bld_base.base.one; 732 break; 733 734 case TGSI_OPCODE_TEX: 735 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE); 736 break; 737 738 case TGSI_OPCODE_TXD: 739 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); 740 break; 741 742 case TGSI_OPCODE_UP2H: 743 /* deprecated */ 744 assert (0); 745 return FALSE; 746 break; 747 748 case TGSI_OPCODE_UP2US: 749 /* deprecated */ 750 assert(0); 751 return FALSE; 752 break; 753 754 case TGSI_OPCODE_UP4B: 755 /* deprecated */ 756 assert(0); 757 return FALSE; 758 break; 759 760 case TGSI_OPCODE_UP4UB: 761 /* deprecated */ 762 assert(0); 763 return FALSE; 764 break; 765 766 case TGSI_OPCODE_X2D: 767 /* deprecated? */ 768 assert(0); 769 return FALSE; 770 break; 771 772 case TGSI_OPCODE_ARA: 773 /* deprecated */ 774 assert(0); 775 return FALSE; 776 break; 777 778 case TGSI_OPCODE_ARR: 779 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 780 dst0 = lp_build_round(&bld->bld_base.base, src0); 781 break; 782 783 case TGSI_OPCODE_BRA: 784 /* deprecated */ 785 assert(0); 786 return FALSE; 787 break; 788 789 case TGSI_OPCODE_CAL: 790 return FALSE; 791 792 case TGSI_OPCODE_RET: 793 return FALSE; 794 795 case TGSI_OPCODE_END: 796 *pc = -1; 797 break; 798 799 case TGSI_OPCODE_SSG: 800 /* TGSI_OPCODE_SGN */ 801 tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 802 dst0 = lp_build_sgn(&bld->bld_base.base, tmp0); 803 break; 804 805 case TGSI_OPCODE_CMP: 806 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 807 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 808 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 809 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero); 810 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2); 811 break; 812 813 case TGSI_OPCODE_SCS: 814 return FALSE; 815 816 case TGSI_OPCODE_TXB: 817 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); 818 break; 819 820 case TGSI_OPCODE_NRM: 821 /* fall-through */ 822 case TGSI_OPCODE_NRM4: 823 return FALSE; 824 825 case TGSI_OPCODE_DIV: 826 /* deprecated */ 827 assert(0); 828 return FALSE; 829 break; 830 831 case TGSI_OPCODE_DP2: 832 return FALSE; 833 834 case TGSI_OPCODE_TXL: 835 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); 836 break; 837 838 case TGSI_OPCODE_TXP: 839 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED); 840 break; 841 842 case TGSI_OPCODE_BRK: 843 return FALSE; 844 845 case TGSI_OPCODE_IF: 846 return FALSE; 847 848 case TGSI_OPCODE_BGNLOOP: 849 return FALSE; 850 851 case TGSI_OPCODE_BGNSUB: 852 return FALSE; 853 854 case TGSI_OPCODE_ELSE: 855 return FALSE; 856 857 case TGSI_OPCODE_ENDIF: 858 return FALSE; 859 860 case TGSI_OPCODE_ENDLOOP: 861 return FALSE; 862 863 case TGSI_OPCODE_ENDSUB: 864 return FALSE; 865 866 case TGSI_OPCODE_PUSHA: 867 /* deprecated? */ 868 assert(0); 869 return FALSE; 870 break; 871 872 case TGSI_OPCODE_POPA: 873 /* deprecated? */ 874 assert(0); 875 return FALSE; 876 break; 877 878 case TGSI_OPCODE_CEIL: 879 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 880 dst0 = lp_build_ceil(&bld->bld_base.base, src0); 881 break; 882 883 case TGSI_OPCODE_I2F: 884 /* deprecated? */ 885 assert(0); 886 return FALSE; 887 break; 888 889 case TGSI_OPCODE_NOT: 890 /* deprecated? */ 891 assert(0); 892 return FALSE; 893 break; 894 895 case TGSI_OPCODE_TRUNC: 896 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 897 dst0 = lp_build_trunc(&bld->bld_base.base, src0); 898 break; 899 900 case TGSI_OPCODE_SHL: 901 /* deprecated? */ 902 assert(0); 903 return FALSE; 904 break; 905 906 case TGSI_OPCODE_ISHR: 907 /* deprecated? */ 908 assert(0); 909 return FALSE; 910 break; 911 912 case TGSI_OPCODE_AND: 913 /* deprecated? */ 914 assert(0); 915 return FALSE; 916 break; 917 918 case TGSI_OPCODE_OR: 919 /* deprecated? */ 920 assert(0); 921 return FALSE; 922 break; 923 924 case TGSI_OPCODE_MOD: 925 /* deprecated? */ 926 assert(0); 927 return FALSE; 928 break; 929 930 case TGSI_OPCODE_XOR: 931 /* deprecated? */ 932 assert(0); 933 return FALSE; 934 break; 935 936 case TGSI_OPCODE_SAD: 937 /* deprecated? */ 938 assert(0); 939 return FALSE; 940 break; 941 942 case TGSI_OPCODE_TXF: 943 /* deprecated? */ 944 assert(0); 945 return FALSE; 946 break; 947 948 case TGSI_OPCODE_TXQ: 949 /* deprecated? */ 950 assert(0); 951 return FALSE; 952 break; 953 954 case TGSI_OPCODE_CONT: 955 return FALSE; 956 957 case TGSI_OPCODE_EMIT: 958 return FALSE; 959 break; 960 961 case TGSI_OPCODE_ENDPRIM: 962 return FALSE; 963 break; 964 965 case TGSI_OPCODE_NOP: 966 break; 967 968 default: 969 return FALSE; 970 } 971 972 if (info->num_dst) { 973 lp_emit_store_aos(bld, inst, 0, dst0); 974 } 975 976 return TRUE; 977} 978 979 980void 981lp_build_tgsi_aos(struct gallivm_state *gallivm, 982 const struct tgsi_token *tokens, 983 struct lp_type type, 984 const unsigned char swizzles[4], 985 LLVMValueRef consts_ptr, 986 const LLVMValueRef *inputs, 987 LLVMValueRef *outputs, 988 struct lp_build_sampler_aos *sampler, 989 const struct tgsi_shader_info *info) 990{ 991 struct lp_build_tgsi_aos_context bld; 992 struct tgsi_parse_context parse; 993 uint num_immediates = 0; 994 unsigned chan; 995 int pc = 0; 996 997 /* Setup build context */ 998 memset(&bld, 0, sizeof bld); 999 lp_build_context_init(&bld.bld_base.base, gallivm, type); 1000 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); 1001 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); 1002 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type)); 1003 1004 for (chan = 0; chan < 4; ++chan) { 1005 bld.swizzles[chan] = swizzles[chan]; 1006 bld.inv_swizzles[swizzles[chan]] = chan; 1007 } 1008 1009 bld.inputs = inputs; 1010 bld.outputs = outputs; 1011 bld.consts_ptr = consts_ptr; 1012 bld.sampler = sampler; 1013 bld.indirect_files = info->indirect_files; 1014 bld.bld_base.emit_swizzle = swizzle_aos; 1015 bld.bld_base.info = info; 1016 1017 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; 1018 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; 1019 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; 1020 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; 1021 1022 /* Set opcode actions */ 1023 lp_set_default_actions_cpu(&bld.bld_base); 1024 1025 if (!lp_bld_tgsi_list_init(&bld.bld_base)) { 1026 return; 1027 } 1028 1029 tgsi_parse_init(&parse, tokens); 1030 1031 while (!tgsi_parse_end_of_tokens(&parse)) { 1032 tgsi_parse_token(&parse); 1033 1034 switch(parse.FullToken.Token.Type) { 1035 case TGSI_TOKEN_TYPE_DECLARATION: 1036 /* Inputs already interpolated */ 1037 lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration); 1038 break; 1039 1040 case TGSI_TOKEN_TYPE_INSTRUCTION: 1041 /* save expanded instruction */ 1042 lp_bld_tgsi_add_instruction(&bld.bld_base, 1043 &parse.FullToken.FullInstruction); 1044 break; 1045 1046 case TGSI_TOKEN_TYPE_IMMEDIATE: 1047 /* simply copy the immediate values into the next immediates[] slot */ 1048 { 1049 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1050 float imm[4]; 1051 assert(size <= 4); 1052 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 1053 for (chan = 0; chan < 4; ++chan) { 1054 imm[chan] = 0.0f; 1055 } 1056 for (chan = 0; chan < size; ++chan) { 1057 unsigned swizzle = bld.swizzles[chan]; 1058 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float; 1059 } 1060 bld.immediates[num_immediates] = 1061 lp_build_const_aos(gallivm, type, 1062 imm[0], imm[1], imm[2], imm[3], 1063 NULL); 1064 num_immediates++; 1065 } 1066 break; 1067 1068 case TGSI_TOKEN_TYPE_PROPERTY: 1069 break; 1070 1071 default: 1072 assert(0); 1073 } 1074 } 1075 1076 while (pc != -1) { 1077 struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc; 1078 const struct tgsi_opcode_info *opcode_info = 1079 tgsi_get_opcode_info(instr->Instruction.Opcode); 1080 if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc)) 1081 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 1082 opcode_info->mnemonic); 1083 } 1084 1085 if (0) { 1086 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 1087 LLVMValueRef function = LLVMGetBasicBlockParent(block); 1088 debug_printf("11111111111111111111111111111 \n"); 1089 tgsi_dump(tokens, 0); 1090 lp_debug_dump_value(function); 1091 debug_printf("2222222222222222222222222222 \n"); 1092 } 1093 tgsi_parse_free(&parse); 1094 FREE(bld.bld_base.instructions); 1095 1096 if (0) { 1097 LLVMModuleRef module = LLVMGetGlobalParent( 1098 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 1099 LLVMDumpModule(module); 1100 } 1101 1102} 1103 1104