lp_bld_tgsi_aos.c revision c426e63aa064debc23f9819c3862f357f1726bce
1/************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * TGSI to LLVM IR translation -- AoS. 31 * 32 * FIXME: 33 * - No control flow support: the existing control flow code should be factored 34 * out into from the SoA code into a common module and shared. 35 * - No derivatives. Derivate logic should be pluggable, just like the samplers. 36 * 37 * @author Jose Fonseca <jfonseca@vmware.com> 38 */ 39 40#include "pipe/p_config.h" 41#include "pipe/p_shader_tokens.h" 42#include "util/u_debug.h" 43#include "util/u_math.h" 44#include "util/u_memory.h" 45#include "tgsi/tgsi_dump.h" 46#include "tgsi/tgsi_info.h" 47#include "tgsi/tgsi_parse.h" 48#include "tgsi/tgsi_util.h" 49#include "tgsi/tgsi_scan.h" 50#include "lp_bld_type.h" 51#include "lp_bld_const.h" 52#include "lp_bld_arit.h" 53#include "lp_bld_logic.h" 54#include "lp_bld_swizzle.h" 55#include "lp_bld_flow.h" 56#include "lp_bld_quad.h" 57#include "lp_bld_tgsi.h" 58#include "lp_bld_debug.h" 59 60 61/** 62 * Wrapper around lp_build_swizzle_aos which translates swizzles to another 63 * ordering. 64 */ 65static LLVMValueRef 66swizzle_aos(struct lp_build_tgsi_context *bld_base, 67 LLVMValueRef a, 68 unsigned swizzle_x, 69 unsigned swizzle_y, 70 unsigned swizzle_z, 71 unsigned swizzle_w) 72{ 73 unsigned char swizzles[4]; 74 struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base); 75 76 assert(swizzle_x < 4); 77 assert(swizzle_y < 4); 78 assert(swizzle_z < 4); 79 assert(swizzle_w < 4); 80 81 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x]; 82 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y]; 83 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z]; 84 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w]; 85 86 return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles); 87} 88 89 90static LLVMValueRef 91swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld, 92 LLVMValueRef a, 93 unsigned chan) 94{ 95 chan = bld->swizzles[chan]; 96 return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan); 97} 98 99 100static LLVMValueRef 101emit_fetch_constant( 102 struct lp_build_tgsi_context * bld_base, 103 const struct tgsi_full_src_register * reg, 104 enum tgsi_opcode_type stype, 105 unsigned swizzle) 106{ 107 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 108 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 109 struct lp_type type = bld_base->base.type; 110 LLVMValueRef res; 111 unsigned chan; 112 113 assert(!reg->Register.Indirect); 114 115 /* 116 * Get the constants components 117 */ 118 119 res = bld->bld_base.base.undef; 120 for (chan = 0; chan < 4; ++chan) { 121 LLVMValueRef index; 122 LLVMValueRef scalar_ptr; 123 LLVMValueRef scalar; 124 LLVMValueRef swizzle; 125 126 index = lp_build_const_int32(bld->bld_base.base.gallivm, 127 reg->Register.Index * 4 + chan); 128 129 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, ""); 130 131 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 132 133 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); 134 135 /* 136 * NOTE: constants array is always assumed to be RGBA 137 */ 138 139 swizzle = lp_build_const_int32(bld->bld_base.base.gallivm, 140 bld->swizzles[chan]); 141 142 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, ""); 143 } 144 145 /* 146 * Broadcast the first quaternion to all others. 147 * 148 * XXX: could be factored into a reusable function. 149 */ 150 151 if (type.length > 4) { 152 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 153 unsigned i; 154 155 for (chan = 0; chan < 4; ++chan) { 156 shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan); 157 } 158 159 for (i = 4; i < type.length; ++i) { 160 shuffles[i] = shuffles[i % 4]; 161 } 162 163 res = LLVMBuildShuffleVector(builder, 164 res, bld->bld_base.base.undef, 165 LLVMConstVector(shuffles, type.length), 166 ""); 167 } 168 return res; 169} 170 171static LLVMValueRef 172emit_fetch_immediate( 173 struct lp_build_tgsi_context * bld_base, 174 const struct tgsi_full_src_register * reg, 175 enum tgsi_opcode_type stype, 176 unsigned swizzle) 177{ 178 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 179 LLVMValueRef res = bld->immediates[reg->Register.Index]; 180 assert(res); 181 return res; 182} 183 184static LLVMValueRef 185emit_fetch_input( 186 struct lp_build_tgsi_context * bld_base, 187 const struct tgsi_full_src_register * reg, 188 enum tgsi_opcode_type stype, 189 unsigned swizzle) 190{ 191 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 192 LLVMValueRef res = bld->inputs[reg->Register.Index]; 193 assert(!reg->Register.Indirect); 194 assert(res); 195 return res; 196} 197 198static LLVMValueRef 199emit_fetch_temporary( 200 struct lp_build_tgsi_context * bld_base, 201 const struct tgsi_full_src_register * reg, 202 enum tgsi_opcode_type stype, 203 unsigned swizzle) 204{ 205 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 206 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 207 LLVMValueRef temp_ptr = bld->temps[reg->Register.Index]; 208 LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, ""); 209 assert(!reg->Register.Indirect); 210 if (!res) 211 return bld->bld_base.base.undef; 212 213 return res; 214} 215 216/** 217 * Register store. 218 */ 219void 220lp_emit_store_aos( 221 struct lp_build_tgsi_aos_context *bld, 222 const struct tgsi_full_instruction *inst, 223 unsigned index, 224 LLVMValueRef value) 225{ 226 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 227 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 228 LLVMValueRef mask = NULL; 229 LLVMValueRef ptr; 230 231 /* 232 * Saturate the value 233 */ 234 235 switch (inst->Instruction.Saturate) { 236 case TGSI_SAT_NONE: 237 break; 238 239 case TGSI_SAT_ZERO_ONE: 240 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero); 241 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 242 break; 243 244 case TGSI_SAT_MINUS_PLUS_ONE: 245 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0)); 246 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 247 break; 248 249 default: 250 assert(0); 251 } 252 253 /* 254 * Translate the register file 255 */ 256 257 assert(!reg->Register.Indirect); 258 259 switch (reg->Register.File) { 260 case TGSI_FILE_OUTPUT: 261 ptr = bld->outputs[reg->Register.Index]; 262 break; 263 264 case TGSI_FILE_TEMPORARY: 265 ptr = bld->temps[reg->Register.Index]; 266 break; 267 268 case TGSI_FILE_ADDRESS: 269 ptr = bld->addr[reg->Indirect.Index]; 270 break; 271 272 case TGSI_FILE_PREDICATE: 273 ptr = bld->preds[reg->Register.Index]; 274 break; 275 276 default: 277 assert(0); 278 return; 279 } 280 281 if (!ptr) 282 return; 283 /* 284 * Predicate 285 */ 286 287 if (inst->Instruction.Predicate) { 288 LLVMValueRef pred; 289 290 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS); 291 292 pred = LLVMBuildLoad(builder, 293 bld->preds[inst->Predicate.Index], ""); 294 295 /* 296 * Convert the value to an integer mask. 297 */ 298 pred = lp_build_compare(bld->bld_base.base.gallivm, 299 bld->bld_base.base.type, 300 PIPE_FUNC_NOTEQUAL, 301 pred, 302 bld->bld_base.base.zero); 303 304 if (inst->Predicate.Negate) { 305 pred = LLVMBuildNot(builder, pred, ""); 306 } 307 308 pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred, 309 inst->Predicate.SwizzleX, 310 inst->Predicate.SwizzleY, 311 inst->Predicate.SwizzleZ, 312 inst->Predicate.SwizzleW); 313 314 if (mask) { 315 mask = LLVMBuildAnd(builder, mask, pred, ""); 316 } else { 317 mask = pred; 318 } 319 } 320 321 /* 322 * Writemask 323 */ 324 325 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { 326 LLVMValueRef writemask; 327 328 writemask = lp_build_const_mask_aos(bld->bld_base.base.gallivm, bld->bld_base.base.type, 329 reg->Register.WriteMask); 330 331 if (mask) { 332 mask = LLVMBuildAnd(builder, mask, writemask, ""); 333 } else { 334 mask = writemask; 335 } 336 } 337 338 if (mask) { 339 LLVMValueRef orig_value; 340 341 orig_value = LLVMBuildLoad(builder, ptr, ""); 342 value = lp_build_select(&bld->bld_base.base, 343 mask, value, orig_value); 344 } 345 346 LLVMBuildStore(builder, value, ptr); 347} 348 349 350/** 351 * High-level instruction translators. 352 */ 353 354static LLVMValueRef 355emit_tex(struct lp_build_tgsi_aos_context *bld, 356 const struct tgsi_full_instruction *inst, 357 enum lp_build_tex_modifier modifier) 358{ 359 unsigned target; 360 unsigned unit; 361 LLVMValueRef coords; 362 LLVMValueRef ddx; 363 LLVMValueRef ddy; 364 365 if (!bld->sampler) { 366 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 367 return bld->bld_base.base.undef; 368 } 369 370 target = inst->Texture.Texture; 371 372 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL); 373 374 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 375 ddx = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL); 376 ddy = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL); 377 unit = inst->Src[3].Register.Index; 378 } else { 379#if 0 380 ddx = lp_build_ddx( &bld->bld_base.base, coords ); 381 ddy = lp_build_ddy( &bld->bld_base.base, coords ); 382#else 383 /* TODO */ 384 ddx = bld->bld_base.base.one; 385 ddy = bld->bld_base.base.one; 386#endif 387 unit = inst->Src[1].Register.Index; 388 } 389 390 return bld->sampler->emit_fetch_texel(bld->sampler, 391 &bld->bld_base.base, 392 target, unit, 393 coords, ddx, ddy, 394 modifier); 395} 396 397 398void 399lp_emit_declaration_aos( 400 struct lp_build_tgsi_aos_context *bld, 401 const struct tgsi_full_declaration *decl) 402{ 403 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 404 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type); 405 406 unsigned first = decl->Range.First; 407 unsigned last = decl->Range.Last; 408 unsigned idx; 409 410 for (idx = first; idx <= last; ++idx) { 411 switch (decl->Declaration.File) { 412 case TGSI_FILE_TEMPORARY: 413 assert(idx < LP_MAX_TGSI_TEMPS); 414 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 415 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1); 416 bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm, 417 vec_type, array_size, ""); 418 } else { 419 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, ""); 420 } 421 break; 422 423 case TGSI_FILE_OUTPUT: 424 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, ""); 425 break; 426 427 case TGSI_FILE_ADDRESS: 428 assert(idx < LP_MAX_TGSI_ADDRS); 429 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, ""); 430 break; 431 432 case TGSI_FILE_PREDICATE: 433 assert(idx < LP_MAX_TGSI_PREDS); 434 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, ""); 435 break; 436 437 default: 438 /* don't need to declare other vars */ 439 break; 440 } 441 } 442} 443 444 445/** 446 * Emit LLVM for one TGSI instruction. 447 * \param return TRUE for success, FALSE otherwise 448 */ 449boolean 450lp_emit_instruction_aos( 451 struct lp_build_tgsi_aos_context *bld, 452 const struct tgsi_full_instruction *inst, 453 const struct tgsi_opcode_info *info, 454 int *pc) 455{ 456 LLVMValueRef src0, src1, src2; 457 LLVMValueRef tmp0, tmp1; 458 LLVMValueRef dst0 = NULL; 459 460 /* 461 * Stores and write masks are handled in a general fashion after the long 462 * instruction opcode switch statement. 463 * 464 * Although not stricitly necessary, we avoid generating instructions for 465 * channels which won't be stored, in cases where's that easy. For some 466 * complex instructions, like texture sampling, it is more convenient to 467 * assume a full writemask and then let LLVM optimization passes eliminate 468 * redundant code. 469 */ 470 471 (*pc)++; 472 473 assert(info->num_dst <= 1); 474 if (info->num_dst) { 475 dst0 = bld->bld_base.base.undef; 476 } 477 478 switch (inst->Instruction.Opcode) { 479 case TGSI_OPCODE_ARL: 480 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 481 dst0 = lp_build_floor(&bld->bld_base.base, src0); 482 break; 483 484 case TGSI_OPCODE_MOV: 485 dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 486 break; 487 488 case TGSI_OPCODE_LIT: 489 return FALSE; 490 491 case TGSI_OPCODE_RCP: 492 /* TGSI_OPCODE_RECIP */ 493 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 494 dst0 = lp_build_rcp(&bld->bld_base.base, src0); 495 break; 496 497 case TGSI_OPCODE_RSQ: 498 /* TGSI_OPCODE_RECIPSQRT */ 499 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 500 tmp0 = lp_build_emit_llvm_unary(&bld->bld_base, TGSI_OPCODE_ABS, src0); 501 dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0); 502 break; 503 504 case TGSI_OPCODE_EXP: 505 return FALSE; 506 507 case TGSI_OPCODE_LOG: 508 return FALSE; 509 510 case TGSI_OPCODE_MUL: 511 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 512 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 513 dst0 = lp_build_mul(&bld->bld_base.base, src0, src1); 514 break; 515 516 case TGSI_OPCODE_ADD: 517 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 518 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 519 dst0 = lp_build_add(&bld->bld_base.base, src0, src1); 520 break; 521 522 case TGSI_OPCODE_DP3: 523 /* TGSI_OPCODE_DOT3 */ 524 return FALSE; 525 526 case TGSI_OPCODE_DP4: 527 /* TGSI_OPCODE_DOT4 */ 528 return FALSE; 529 530 case TGSI_OPCODE_DST: 531 return FALSE; 532 533 case TGSI_OPCODE_MIN: 534 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 535 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 536 dst0 = lp_build_max(&bld->bld_base.base, src0, src1); 537 break; 538 539 case TGSI_OPCODE_MAX: 540 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 541 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 542 dst0 = lp_build_max(&bld->bld_base.base, src0, src1); 543 break; 544 545 case TGSI_OPCODE_SLT: 546 /* TGSI_OPCODE_SETLT */ 547 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 548 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 549 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1); 550 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 551 break; 552 553 case TGSI_OPCODE_SGE: 554 /* TGSI_OPCODE_SETGE */ 555 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 556 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 557 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1); 558 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 559 break; 560 561 case TGSI_OPCODE_MAD: 562 /* TGSI_OPCODE_MADD */ 563 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 564 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 565 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 566 tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1); 567 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); 568 break; 569 570 case TGSI_OPCODE_SUB: 571 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 572 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 573 dst0 = lp_build_sub(&bld->bld_base.base, src0, src1); 574 break; 575 576 case TGSI_OPCODE_LRP: 577 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 578 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 579 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 580 tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2); 581 tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0); 582 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); 583 break; 584 585 case TGSI_OPCODE_CND: 586 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 587 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 588 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 589 tmp1 = lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, 0.5); 590 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src2, tmp1); 591 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src0, src1); 592 break; 593 594 case TGSI_OPCODE_DP2A: 595 return FALSE; 596 597 case TGSI_OPCODE_FRC: 598 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 599 tmp0 = lp_build_floor(&bld->bld_base.base, src0); 600 dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0); 601 break; 602 603 case TGSI_OPCODE_CLAMP: 604 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 605 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 606 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 607 tmp0 = lp_build_max(&bld->bld_base.base, src0, src1); 608 dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2); 609 break; 610 611 case TGSI_OPCODE_FLR: 612 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 613 dst0 = lp_build_floor(&bld->bld_base.base, src0); 614 break; 615 616 case TGSI_OPCODE_ROUND: 617 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 618 dst0 = lp_build_round(&bld->bld_base.base, src0); 619 break; 620 621 case TGSI_OPCODE_EX2: 622 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 623 tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X); 624 dst0 = lp_build_exp2(&bld->bld_base.base, tmp0); 625 break; 626 627 case TGSI_OPCODE_LG2: 628 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 629 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 630 dst0 = lp_build_log2(&bld->bld_base.base, tmp0); 631 break; 632 633 case TGSI_OPCODE_POW: 634 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 635 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 636 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 637 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X); 638 dst0 = lp_build_pow(&bld->bld_base.base, src0, src1); 639 break; 640 641 case TGSI_OPCODE_XPD: 642 return FALSE; 643 644 case TGSI_OPCODE_RCC: 645 /* deprecated? */ 646 assert(0); 647 return FALSE; 648 649 case TGSI_OPCODE_DPH: 650 return FALSE; 651 652 case TGSI_OPCODE_COS: 653 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 654 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 655 dst0 = lp_build_cos(&bld->bld_base.base, tmp0); 656 break; 657 658 case TGSI_OPCODE_DDX: 659 return FALSE; 660 661 case TGSI_OPCODE_DDY: 662 return FALSE; 663 664 case TGSI_OPCODE_KILP: 665 /* predicated kill */ 666 return FALSE; 667 668 case TGSI_OPCODE_KIL: 669 /* conditional kill */ 670 return FALSE; 671 672 case TGSI_OPCODE_PK2H: 673 return FALSE; 674 break; 675 676 case TGSI_OPCODE_PK2US: 677 return FALSE; 678 break; 679 680 case TGSI_OPCODE_PK4B: 681 return FALSE; 682 break; 683 684 case TGSI_OPCODE_PK4UB: 685 return FALSE; 686 687 case TGSI_OPCODE_RFL: 688 return FALSE; 689 690 case TGSI_OPCODE_SEQ: 691 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 692 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 693 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1); 694 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 695 break; 696 697 case TGSI_OPCODE_SFL: 698 dst0 = bld->bld_base.base.zero; 699 break; 700 701 case TGSI_OPCODE_SGT: 702 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 703 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 704 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1); 705 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 706 break; 707 708 case TGSI_OPCODE_SIN: 709 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 710 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 711 dst0 = lp_build_sin(&bld->bld_base.base, tmp0); 712 break; 713 714 case TGSI_OPCODE_SLE: 715 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 716 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 717 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1); 718 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 719 break; 720 721 case TGSI_OPCODE_SNE: 722 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 723 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 724 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1); 725 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 726 break; 727 728 case TGSI_OPCODE_STR: 729 dst0 = bld->bld_base.base.one; 730 break; 731 732 case TGSI_OPCODE_TEX: 733 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE); 734 break; 735 736 case TGSI_OPCODE_TXD: 737 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); 738 break; 739 740 case TGSI_OPCODE_UP2H: 741 /* deprecated */ 742 assert (0); 743 return FALSE; 744 break; 745 746 case TGSI_OPCODE_UP2US: 747 /* deprecated */ 748 assert(0); 749 return FALSE; 750 break; 751 752 case TGSI_OPCODE_UP4B: 753 /* deprecated */ 754 assert(0); 755 return FALSE; 756 break; 757 758 case TGSI_OPCODE_UP4UB: 759 /* deprecated */ 760 assert(0); 761 return FALSE; 762 break; 763 764 case TGSI_OPCODE_X2D: 765 /* deprecated? */ 766 assert(0); 767 return FALSE; 768 break; 769 770 case TGSI_OPCODE_ARA: 771 /* deprecated */ 772 assert(0); 773 return FALSE; 774 break; 775 776 case TGSI_OPCODE_ARR: 777 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 778 dst0 = lp_build_round(&bld->bld_base.base, src0); 779 break; 780 781 case TGSI_OPCODE_BRA: 782 /* deprecated */ 783 assert(0); 784 return FALSE; 785 break; 786 787 case TGSI_OPCODE_CAL: 788 return FALSE; 789 790 case TGSI_OPCODE_RET: 791 return FALSE; 792 793 case TGSI_OPCODE_END: 794 *pc = -1; 795 break; 796 797 case TGSI_OPCODE_SSG: 798 /* TGSI_OPCODE_SGN */ 799 tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 800 dst0 = lp_build_sgn(&bld->bld_base.base, tmp0); 801 break; 802 803 case TGSI_OPCODE_CMP: 804 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 805 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 806 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 807 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero); 808 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2); 809 break; 810 811 case TGSI_OPCODE_SCS: 812 return FALSE; 813 814 case TGSI_OPCODE_TXB: 815 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); 816 break; 817 818 case TGSI_OPCODE_NRM: 819 /* fall-through */ 820 case TGSI_OPCODE_NRM4: 821 return FALSE; 822 823 case TGSI_OPCODE_DIV: 824 /* deprecated */ 825 assert(0); 826 return FALSE; 827 break; 828 829 case TGSI_OPCODE_DP2: 830 return FALSE; 831 832 case TGSI_OPCODE_TXL: 833 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); 834 break; 835 836 case TGSI_OPCODE_TXP: 837 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED); 838 break; 839 840 case TGSI_OPCODE_BRK: 841 return FALSE; 842 843 case TGSI_OPCODE_IF: 844 return FALSE; 845 846 case TGSI_OPCODE_BGNLOOP: 847 return FALSE; 848 849 case TGSI_OPCODE_BGNSUB: 850 return FALSE; 851 852 case TGSI_OPCODE_ELSE: 853 return FALSE; 854 855 case TGSI_OPCODE_ENDIF: 856 return FALSE; 857 858 case TGSI_OPCODE_ENDLOOP: 859 return FALSE; 860 861 case TGSI_OPCODE_ENDSUB: 862 return FALSE; 863 864 case TGSI_OPCODE_PUSHA: 865 /* deprecated? */ 866 assert(0); 867 return FALSE; 868 break; 869 870 case TGSI_OPCODE_POPA: 871 /* deprecated? */ 872 assert(0); 873 return FALSE; 874 break; 875 876 case TGSI_OPCODE_CEIL: 877 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 878 dst0 = lp_build_ceil(&bld->bld_base.base, src0); 879 break; 880 881 case TGSI_OPCODE_I2F: 882 /* deprecated? */ 883 assert(0); 884 return FALSE; 885 break; 886 887 case TGSI_OPCODE_NOT: 888 /* deprecated? */ 889 assert(0); 890 return FALSE; 891 break; 892 893 case TGSI_OPCODE_TRUNC: 894 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 895 dst0 = lp_build_trunc(&bld->bld_base.base, src0); 896 break; 897 898 case TGSI_OPCODE_SHL: 899 /* deprecated? */ 900 assert(0); 901 return FALSE; 902 break; 903 904 case TGSI_OPCODE_ISHR: 905 /* deprecated? */ 906 assert(0); 907 return FALSE; 908 break; 909 910 case TGSI_OPCODE_AND: 911 /* deprecated? */ 912 assert(0); 913 return FALSE; 914 break; 915 916 case TGSI_OPCODE_OR: 917 /* deprecated? */ 918 assert(0); 919 return FALSE; 920 break; 921 922 case TGSI_OPCODE_MOD: 923 /* deprecated? */ 924 assert(0); 925 return FALSE; 926 break; 927 928 case TGSI_OPCODE_XOR: 929 /* deprecated? */ 930 assert(0); 931 return FALSE; 932 break; 933 934 case TGSI_OPCODE_SAD: 935 /* deprecated? */ 936 assert(0); 937 return FALSE; 938 break; 939 940 case TGSI_OPCODE_TXF: 941 /* deprecated? */ 942 assert(0); 943 return FALSE; 944 break; 945 946 case TGSI_OPCODE_TXQ: 947 /* deprecated? */ 948 assert(0); 949 return FALSE; 950 break; 951 952 case TGSI_OPCODE_CONT: 953 return FALSE; 954 955 case TGSI_OPCODE_EMIT: 956 return FALSE; 957 break; 958 959 case TGSI_OPCODE_ENDPRIM: 960 return FALSE; 961 break; 962 963 case TGSI_OPCODE_NOP: 964 break; 965 966 default: 967 return FALSE; 968 } 969 970 if (info->num_dst) { 971 lp_emit_store_aos(bld, inst, 0, dst0); 972 } 973 974 return TRUE; 975} 976 977 978void 979lp_build_tgsi_aos(struct gallivm_state *gallivm, 980 const struct tgsi_token *tokens, 981 struct lp_type type, 982 const unsigned char swizzles[4], 983 LLVMValueRef consts_ptr, 984 const LLVMValueRef *inputs, 985 LLVMValueRef *outputs, 986 struct lp_build_sampler_aos *sampler, 987 const struct tgsi_shader_info *info) 988{ 989 struct lp_build_tgsi_aos_context bld; 990 struct tgsi_parse_context parse; 991 uint num_immediates = 0; 992 unsigned chan; 993 int pc = 0; 994 995 /* Setup build context */ 996 memset(&bld, 0, sizeof bld); 997 lp_build_context_init(&bld.bld_base.base, gallivm, type); 998 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); 999 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); 1000 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type)); 1001 1002 for (chan = 0; chan < 4; ++chan) { 1003 bld.swizzles[chan] = swizzles[chan]; 1004 bld.inv_swizzles[swizzles[chan]] = chan; 1005 } 1006 1007 bld.inputs = inputs; 1008 bld.outputs = outputs; 1009 bld.consts_ptr = consts_ptr; 1010 bld.sampler = sampler; 1011 bld.indirect_files = info->indirect_files; 1012 bld.bld_base.emit_swizzle = swizzle_aos; 1013 bld.bld_base.info = info; 1014 1015 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; 1016 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; 1017 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; 1018 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; 1019 1020 /* Set opcode actions */ 1021 lp_set_default_actions_cpu(&bld.bld_base); 1022 1023 if (!lp_bld_tgsi_list_init(&bld.bld_base)) { 1024 return; 1025 } 1026 1027 tgsi_parse_init(&parse, tokens); 1028 1029 while (!tgsi_parse_end_of_tokens(&parse)) { 1030 tgsi_parse_token(&parse); 1031 1032 switch(parse.FullToken.Token.Type) { 1033 case TGSI_TOKEN_TYPE_DECLARATION: 1034 /* Inputs already interpolated */ 1035 lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration); 1036 break; 1037 1038 case TGSI_TOKEN_TYPE_INSTRUCTION: 1039 /* save expanded instruction */ 1040 lp_bld_tgsi_add_instruction(&bld.bld_base, 1041 &parse.FullToken.FullInstruction); 1042 break; 1043 1044 case TGSI_TOKEN_TYPE_IMMEDIATE: 1045 /* simply copy the immediate values into the next immediates[] slot */ 1046 { 1047 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1048 float imm[4]; 1049 assert(size <= 4); 1050 assert(num_immediates < LP_MAX_TGSI_IMMEDIATES); 1051 for (chan = 0; chan < 4; ++chan) { 1052 imm[chan] = 0.0f; 1053 } 1054 for (chan = 0; chan < size; ++chan) { 1055 unsigned swizzle = bld.swizzles[chan]; 1056 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float; 1057 } 1058 bld.immediates[num_immediates] = 1059 lp_build_const_aos(gallivm, type, 1060 imm[0], imm[1], imm[2], imm[3], 1061 NULL); 1062 num_immediates++; 1063 } 1064 break; 1065 1066 case TGSI_TOKEN_TYPE_PROPERTY: 1067 break; 1068 1069 default: 1070 assert(0); 1071 } 1072 } 1073 1074 while (pc != -1) { 1075 struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc; 1076 const struct tgsi_opcode_info *opcode_info = 1077 tgsi_get_opcode_info(instr->Instruction.Opcode); 1078 if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc)) 1079 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 1080 opcode_info->mnemonic); 1081 } 1082 1083 if (0) { 1084 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 1085 LLVMValueRef function = LLVMGetBasicBlockParent(block); 1086 debug_printf("11111111111111111111111111111 \n"); 1087 tgsi_dump(tokens, 0); 1088 lp_debug_dump_value(function); 1089 debug_printf("2222222222222222222222222222 \n"); 1090 } 1091 tgsi_parse_free(&parse); 1092 FREE(bld.bld_base.instructions); 1093 1094 if (0) { 1095 LLVMModuleRef module = LLVMGetGlobalParent( 1096 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 1097 LLVMDumpModule(module); 1098 } 1099 1100} 1101 1102