ir3_compiler.c revision 80058c0f08ea94d3de96909027a792e397fa9262
1103ccde8fe2f2c8abde914a8ba736b2e9cb8d20bElliott Hughes/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ 2103ccde8fe2f2c8abde914a8ba736b2e9cb8d20bElliott Hughes 3ed74484dcbc2e156a6e5fa861a62425b12e55128Elliott Hughes/* 4e4ffd9f2341f42c9281b4a93df76768580535eddElliott Hughes * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org> 5e4ffd9f2341f42c9281b4a93df76768580535eddElliott Hughes * 6e4ffd9f2341f42c9281b4a93df76768580535eddElliott Hughes * Permission is hereby granted, free of charge, to any person obtaining a 7e4ffd9f2341f42c9281b4a93df76768580535eddElliott Hughes * copy of this software and associated documentation files (the "Software"), 8e4ffd9f2341f42c9281b4a93df76768580535eddElliott Hughes * to deal in the Software without restriction, including without limitation 9e4ffd9f2341f42c9281b4a93df76768580535eddElliott Hughes * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10e4ffd9f2341f42c9281b4a93df76768580535eddElliott Hughes * and/or sell copies of the Software, and to permit persons to whom the 11e4ffd9f2341f42c9281b4a93df76768580535eddElliott Hughes * Software is furnished to do so, subject to the following conditions: 127efad83d430f4d824f2aaa75edea5106f6ff8aaeElliott Hughes * 13e4ffd9f2341f42c9281b4a93df76768580535eddElliott Hughes * The above copyright notice and this permission notice (including the next 14e4ffd9f2341f42c9281b4a93df76768580535eddElliott Hughes * paragraph) shall be included in all copies or substantial portions of the 15e4ffd9f2341f42c9281b4a93df76768580535eddElliott Hughes * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 * SOFTWARE. 24 * 25 * Authors: 26 * Rob Clark <robclark@freedesktop.org> 27 */ 28 29#include <stdarg.h> 30 31#include "pipe/p_state.h" 32#include "util/u_string.h" 33#include "util/u_memory.h" 34#include "util/u_inlines.h" 35#include "tgsi/tgsi_parse.h" 36#include "tgsi/tgsi_ureg.h" 37#include "tgsi/tgsi_info.h" 38#include "tgsi/tgsi_strings.h" 39#include "tgsi/tgsi_dump.h" 40#include "tgsi/tgsi_scan.h" 41 42#include "freedreno_lowering.h" 43#include "freedreno_util.h" 44 45#include "ir3_compiler.h" 46#include "ir3_shader.h" 47 48#include "instr-a3xx.h" 49#include "ir3.h" 50 51struct ir3_compile_context { 52 const struct tgsi_token *tokens; 53 bool free_tokens; 54 struct ir3 *ir; 55 struct ir3_shader_variant *so; 56 57 struct ir3_block *block; 58 struct ir3_instruction *current_instr; 59 60 /* we need to defer updates to block->outputs[] until the end 61 * of an instruction (so we don't see new value until *after* 62 * the src registers are processed) 63 */ 64 struct { 65 struct ir3_instruction *instr, **instrp; 66 } output_updates[16]; 67 unsigned num_output_updates; 68 69 /* are we in a sequence of "atomic" instructions? 70 */ 71 bool atomic; 72 73 /* For fragment shaders, from the hw perspective the only 74 * actual input is r0.xy position register passed to bary.f. 75 * But TGSI doesn't know that, it still declares things as 76 * IN[] registers. So we do all the input tracking normally 77 * and fix things up after compile_instructions() 78 * 79 * NOTE that frag_pos is the hardware position (possibly it 80 * is actually an index or tag or some such.. it is *not* 81 * values that can be directly used for gl_FragCoord..) 82 */ 83 struct ir3_instruction *frag_pos, *frag_face, *frag_coord[4]; 84 85 struct tgsi_parse_context parser; 86 unsigned type; 87 88 struct tgsi_shader_info info; 89 90 /* for calculating input/output positions/linkages: */ 91 unsigned next_inloc; 92 93 unsigned num_internal_temps; 94 struct tgsi_src_register internal_temps[6]; 95 96 /* idx/slot for last compiler generated immediate */ 97 unsigned immediate_idx; 98 99 /* stack of branch instructions that mark (potentially nested) 100 * branch if/else/loop/etc 101 */ 102 struct { 103 struct ir3_instruction *instr, *cond; 104 bool inv; /* true iff in else leg of branch */ 105 } branch[16]; 106 unsigned int branch_count; 107 108 /* list of kill instructions: */ 109 struct ir3_instruction *kill[16]; 110 unsigned int kill_count; 111 112 /* used when dst is same as one of the src, to avoid overwriting a 113 * src element before the remaining scalar instructions that make 114 * up the vector operation 115 */ 116 struct tgsi_dst_register tmp_dst; 117 struct tgsi_src_register *tmp_src; 118}; 119 120 121static void vectorize(struct ir3_compile_context *ctx, 122 struct ir3_instruction *instr, struct tgsi_dst_register *dst, 123 int nsrcs, ...); 124static void create_mov(struct ir3_compile_context *ctx, 125 struct tgsi_dst_register *dst, struct tgsi_src_register *src); 126static type_t get_ftype(struct ir3_compile_context *ctx); 127 128static unsigned 129compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so, 130 const struct tgsi_token *tokens) 131{ 132 unsigned ret; 133 struct tgsi_shader_info *info = &ctx->info; 134 const struct fd_lowering_config lconfig = { 135 .color_two_side = so->key.color_two_side, 136 .lower_DST = true, 137 .lower_XPD = true, 138 .lower_SCS = true, 139 .lower_LRP = true, 140 .lower_FRC = true, 141 .lower_POW = true, 142 .lower_LIT = true, 143 .lower_EXP = true, 144 .lower_LOG = true, 145 .lower_DP4 = true, 146 .lower_DP3 = true, 147 .lower_DPH = true, 148 .lower_DP2 = true, 149 .lower_DP2A = true, 150 }; 151 152 ctx->tokens = fd_transform_lowering(&lconfig, tokens, &ctx->info); 153 ctx->free_tokens = !!ctx->tokens; 154 if (!ctx->tokens) { 155 /* no lowering */ 156 ctx->tokens = tokens; 157 } 158 ctx->ir = so->ir; 159 ctx->so = so; 160 ctx->next_inloc = 8; 161 ctx->num_internal_temps = 0; 162 ctx->branch_count = 0; 163 ctx->kill_count = 0; 164 ctx->block = NULL; 165 ctx->current_instr = NULL; 166 ctx->num_output_updates = 0; 167 ctx->atomic = false; 168 ctx->frag_pos = NULL; 169 ctx->frag_face = NULL; 170 171 memset(ctx->frag_coord, 0, sizeof(ctx->frag_coord)); 172 173#define FM(x) (1 << TGSI_FILE_##x) 174 /* optimize can't deal with relative addressing: */ 175 if (info->indirect_files & (FM(TEMPORARY) | FM(INPUT) | FM(OUTPUT))) 176 return TGSI_PARSE_ERROR; 177 178 /* NOTE: if relative addressing is used, we set constlen in 179 * the compiler (to worst-case value) since we don't know in 180 * the assembler what the max addr reg value can be: 181 */ 182 if (info->indirect_files & FM(CONSTANT)) 183 so->constlen = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1); 184 185 /* Immediates go after constants: */ 186 so->first_immediate = info->file_max[TGSI_FILE_CONSTANT] + 1; 187 ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1); 188 189 ret = tgsi_parse_init(&ctx->parser, ctx->tokens); 190 if (ret != TGSI_PARSE_OK) 191 return ret; 192 193 ctx->type = ctx->parser.FullHeader.Processor.Processor; 194 195 return ret; 196} 197 198static void 199compile_error(struct ir3_compile_context *ctx, const char *format, ...) 200{ 201 va_list ap; 202 va_start(ap, format); 203 _debug_vprintf(format, ap); 204 va_end(ap); 205 tgsi_dump(ctx->tokens, 0); 206 debug_assert(0); 207} 208 209#define compile_assert(ctx, cond) do { \ 210 if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \ 211 } while (0) 212 213static void 214compile_free(struct ir3_compile_context *ctx) 215{ 216 if (ctx->free_tokens) 217 free((void *)ctx->tokens); 218 tgsi_parse_free(&ctx->parser); 219} 220 221struct instr_translater { 222 void (*fxn)(const struct instr_translater *t, 223 struct ir3_compile_context *ctx, 224 struct tgsi_full_instruction *inst); 225 unsigned tgsi_opc; 226 opc_t opc; 227 opc_t hopc; /* opc to use for half_precision mode, if different */ 228 unsigned arg; 229}; 230 231static void 232instr_finish(struct ir3_compile_context *ctx) 233{ 234 unsigned i; 235 236 if (ctx->atomic) 237 return; 238 239 for (i = 0; i < ctx->num_output_updates; i++) 240 *(ctx->output_updates[i].instrp) = ctx->output_updates[i].instr; 241 242 ctx->num_output_updates = 0; 243} 244 245/* For "atomic" groups of instructions, for example the four scalar 246 * instructions to perform a vec4 operation. Basically this just 247 * blocks out handling of output_updates so the next scalar instruction 248 * still sees the result from before the start of the atomic group. 249 * 250 * NOTE: when used properly, this could probably replace get/put_dst() 251 * stuff. 252 */ 253static void 254instr_atomic_start(struct ir3_compile_context *ctx) 255{ 256 ctx->atomic = true; 257} 258 259static void 260instr_atomic_end(struct ir3_compile_context *ctx) 261{ 262 ctx->atomic = false; 263 instr_finish(ctx); 264} 265 266static struct ir3_instruction * 267instr_create(struct ir3_compile_context *ctx, int category, opc_t opc) 268{ 269 instr_finish(ctx); 270 return (ctx->current_instr = ir3_instr_create(ctx->block, category, opc)); 271} 272 273static struct ir3_instruction * 274instr_clone(struct ir3_compile_context *ctx, struct ir3_instruction *instr) 275{ 276 instr_finish(ctx); 277 return (ctx->current_instr = ir3_instr_clone(instr)); 278} 279 280static struct ir3_block * 281push_block(struct ir3_compile_context *ctx) 282{ 283 struct ir3_block *block; 284 unsigned ntmp, nin, nout; 285 286#define SCALAR_REGS(file) (4 * (ctx->info.file_max[TGSI_FILE_ ## file] + 1)) 287 288 /* hmm, give ourselves room to create 4 extra temporaries (vec4): 289 */ 290 ntmp = SCALAR_REGS(TEMPORARY); 291 ntmp += 4 * 4; 292 293 nout = SCALAR_REGS(OUTPUT); 294 nin = SCALAR_REGS(INPUT); 295 296 /* for outermost block, 'inputs' are the actual shader INPUT 297 * register file. Reads from INPUT registers always go back to 298 * top block. For nested blocks, 'inputs' is used to track any 299 * TEMPORARY file register from one of the enclosing blocks that 300 * is ready in this block. 301 */ 302 if (!ctx->block) { 303 /* NOTE: fragment shaders actually have two inputs (r0.xy, the 304 * position) 305 */ 306 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 307 int n = 2; 308 if (ctx->info.reads_position) 309 n += 4; 310 if (ctx->info.uses_frontface) 311 n += 4; 312 nin = MAX2(n, nin); 313 nout += ARRAY_SIZE(ctx->kill); 314 } 315 } else { 316 nin = ntmp; 317 } 318 319 block = ir3_block_create(ctx->ir, ntmp, nin, nout); 320 321 if ((ctx->type == TGSI_PROCESSOR_FRAGMENT) && !ctx->block) 322 block->noutputs -= ARRAY_SIZE(ctx->kill); 323 324 block->parent = ctx->block; 325 ctx->block = block; 326 327 return block; 328} 329 330static void 331pop_block(struct ir3_compile_context *ctx) 332{ 333 ctx->block = ctx->block->parent; 334 compile_assert(ctx, ctx->block); 335} 336 337static struct ir3_instruction * 338create_output(struct ir3_block *block, struct ir3_instruction *instr, 339 unsigned n) 340{ 341 struct ir3_instruction *out; 342 343 out = ir3_instr_create(block, -1, OPC_META_OUTPUT); 344 out->inout.block = block; 345 ir3_reg_create(out, n, 0); 346 if (instr) 347 ir3_reg_create(out, 0, IR3_REG_SSA)->instr = instr; 348 349 return out; 350} 351 352static struct ir3_instruction * 353create_input(struct ir3_block *block, struct ir3_instruction *instr, 354 unsigned n) 355{ 356 struct ir3_instruction *in; 357 358 in = ir3_instr_create(block, -1, OPC_META_INPUT); 359 in->inout.block = block; 360 ir3_reg_create(in, n, 0); 361 if (instr) 362 ir3_reg_create(in, 0, IR3_REG_SSA)->instr = instr; 363 364 return in; 365} 366 367static struct ir3_instruction * 368block_input(struct ir3_block *block, unsigned n) 369{ 370 /* references to INPUT register file always go back up to 371 * top level: 372 */ 373 if (block->parent) 374 return block_input(block->parent, n); 375 return block->inputs[n]; 376} 377 378/* return temporary in scope, creating if needed meta-input node 379 * to track block inputs 380 */ 381static struct ir3_instruction * 382block_temporary(struct ir3_block *block, unsigned n) 383{ 384 /* references to TEMPORARY register file, find the nearest 385 * enclosing block which has already assigned this temporary, 386 * creating meta-input instructions along the way to keep 387 * track of block inputs 388 */ 389 if (block->parent && !block->temporaries[n]) { 390 /* if already have input for this block, reuse: */ 391 if (!block->inputs[n]) 392 block->inputs[n] = block_temporary(block->parent, n); 393 394 /* and create new input to return: */ 395 return create_input(block, block->inputs[n], n); 396 } 397 return block->temporaries[n]; 398} 399 400static struct ir3_instruction * 401create_immed(struct ir3_compile_context *ctx, float val) 402{ 403 /* NOTE: *don't* use instr_create() here! 404 */ 405 struct ir3_instruction *instr; 406 instr = ir3_instr_create(ctx->block, 1, 0); 407 instr->cat1.src_type = get_ftype(ctx); 408 instr->cat1.dst_type = get_ftype(ctx); 409 ir3_reg_create(instr, 0, 0); 410 ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = val; 411 return instr; 412} 413 414static void 415ssa_dst(struct ir3_compile_context *ctx, struct ir3_instruction *instr, 416 const struct tgsi_dst_register *dst, unsigned chan) 417{ 418 unsigned n = regid(dst->Index, chan); 419 unsigned idx = ctx->num_output_updates; 420 421 compile_assert(ctx, idx < ARRAY_SIZE(ctx->output_updates)); 422 423 /* NOTE: defer update of temporaries[idx] or output[idx] 424 * until instr_finish(), so that if the current instruction 425 * reads the same TEMP/OUT[] it gets the old value: 426 * 427 * bleh.. this might be a bit easier to just figure out 428 * in instr_finish(). But at that point we've already 429 * lost information about OUTPUT vs TEMPORARY register 430 * file.. 431 */ 432 433 switch (dst->File) { 434 case TGSI_FILE_OUTPUT: 435 compile_assert(ctx, n < ctx->block->noutputs); 436 ctx->output_updates[idx].instrp = &ctx->block->outputs[n]; 437 ctx->output_updates[idx].instr = instr; 438 ctx->num_output_updates++; 439 break; 440 case TGSI_FILE_TEMPORARY: 441 compile_assert(ctx, n < ctx->block->ntemporaries); 442 ctx->output_updates[idx].instrp = &ctx->block->temporaries[n]; 443 ctx->output_updates[idx].instr = instr; 444 ctx->num_output_updates++; 445 break; 446 case TGSI_FILE_ADDRESS: 447 compile_assert(ctx, n < 1); 448 ctx->output_updates[idx].instrp = &ctx->block->address; 449 ctx->output_updates[idx].instr = instr; 450 ctx->num_output_updates++; 451 break; 452 } 453} 454 455static void 456ssa_src(struct ir3_compile_context *ctx, struct ir3_register *reg, 457 const struct tgsi_src_register *src, unsigned chan) 458{ 459 struct ir3_block *block = ctx->block; 460 unsigned n = regid(src->Index, chan); 461 462 switch (src->File) { 463 case TGSI_FILE_INPUT: 464 reg->flags |= IR3_REG_SSA; 465 reg->instr = block_input(ctx->block, n); 466 break; 467 case TGSI_FILE_OUTPUT: 468 /* really this should just happen in case of 'MOV_SAT OUT[n], ..', 469 * for the following clamp instructions: 470 */ 471 reg->flags |= IR3_REG_SSA; 472 reg->instr = block->outputs[n]; 473 /* we don't have to worry about read from an OUTPUT that was 474 * assigned outside of the current block, because the _SAT 475 * clamp instructions will always be in the same block as 476 * the original instruction which wrote the OUTPUT 477 */ 478 compile_assert(ctx, reg->instr); 479 break; 480 case TGSI_FILE_TEMPORARY: 481 reg->flags |= IR3_REG_SSA; 482 reg->instr = block_temporary(ctx->block, n); 483 break; 484 } 485 486 if ((reg->flags & IR3_REG_SSA) && !reg->instr) { 487 /* this can happen when registers (or components of a TGSI 488 * register) are used as src before they have been assigned 489 * (undefined contents). To avoid confusing the rest of the 490 * compiler, and to generally keep things peachy, substitute 491 * an instruction that sets the src to 0.0. Or to keep 492 * things undefined, I could plug in a random number? :-P 493 * 494 * NOTE: *don't* use instr_create() here! 495 */ 496 reg->instr = create_immed(ctx, 0.0); 497 } 498} 499 500static struct ir3_register * 501add_dst_reg_wrmask(struct ir3_compile_context *ctx, 502 struct ir3_instruction *instr, const struct tgsi_dst_register *dst, 503 unsigned chan, unsigned wrmask) 504{ 505 unsigned flags = 0, num = 0; 506 struct ir3_register *reg; 507 508 switch (dst->File) { 509 case TGSI_FILE_OUTPUT: 510 case TGSI_FILE_TEMPORARY: 511 /* uses SSA */ 512 break; 513 case TGSI_FILE_ADDRESS: 514 flags |= IR3_REG_ADDR; 515 /* uses SSA */ 516 break; 517 default: 518 compile_error(ctx, "unsupported dst register file: %s\n", 519 tgsi_file_name(dst->File)); 520 break; 521 } 522 523 if (dst->Indirect) 524 flags |= IR3_REG_RELATIV; 525 526 reg = ir3_reg_create(instr, regid(num, chan), flags); 527 528 /* NOTE: do not call ssa_dst() if atomic.. vectorize() 529 * itself will call ssa_dst(). This is to filter out 530 * the (initially bogus) .x component dst which is 531 * created (but not necessarily used, ie. if the net 532 * result of the vector operation does not write to 533 * the .x component) 534 */ 535 536 reg->wrmask = wrmask; 537 if (wrmask == 0x1) { 538 /* normal case */ 539 if (!ctx->atomic) 540 ssa_dst(ctx, instr, dst, chan); 541 } else if ((dst->File == TGSI_FILE_TEMPORARY) || 542 (dst->File == TGSI_FILE_OUTPUT) || 543 (dst->File == TGSI_FILE_ADDRESS)) { 544 unsigned i; 545 546 /* if instruction writes multiple, we need to create 547 * some place-holder collect the registers: 548 */ 549 for (i = 0; i < 4; i++) { 550 if (wrmask & (1 << i)) { 551 struct ir3_instruction *collect = 552 ir3_instr_create(ctx->block, -1, OPC_META_FO); 553 collect->fo.off = i; 554 /* unused dst reg: */ 555 ir3_reg_create(collect, 0, 0); 556 /* and src reg used to hold original instr */ 557 ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = instr; 558 if (!ctx->atomic) 559 ssa_dst(ctx, collect, dst, chan+i); 560 } 561 } 562 } 563 564 return reg; 565} 566 567static struct ir3_register * 568add_dst_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr, 569 const struct tgsi_dst_register *dst, unsigned chan) 570{ 571 return add_dst_reg_wrmask(ctx, instr, dst, chan, 0x1); 572} 573 574static struct ir3_register * 575add_src_reg_wrmask(struct ir3_compile_context *ctx, 576 struct ir3_instruction *instr, const struct tgsi_src_register *src, 577 unsigned chan, unsigned wrmask) 578{ 579 unsigned flags = 0, num = 0; 580 struct ir3_register *reg; 581 struct ir3_instruction *orig = NULL; 582 583 /* TODO we need to use a mov to temp for const >= 64.. or maybe 584 * we could use relative addressing.. 585 */ 586 compile_assert(ctx, src->Index < 64); 587 588 switch (src->File) { 589 case TGSI_FILE_IMMEDIATE: 590 /* TODO if possible, use actual immediate instead of const.. but 591 * TGSI has vec4 immediates, we can only embed scalar (of limited 592 * size, depending on instruction..) 593 */ 594 flags |= IR3_REG_CONST; 595 num = src->Index + ctx->so->first_immediate; 596 break; 597 case TGSI_FILE_CONSTANT: 598 flags |= IR3_REG_CONST; 599 num = src->Index; 600 break; 601 case TGSI_FILE_OUTPUT: 602 /* NOTE: we should only end up w/ OUTPUT file for things like 603 * clamp()'ing saturated dst instructions 604 */ 605 case TGSI_FILE_INPUT: 606 case TGSI_FILE_TEMPORARY: 607 /* uses SSA */ 608 break; 609 default: 610 compile_error(ctx, "unsupported src register file: %s\n", 611 tgsi_file_name(src->File)); 612 break; 613 } 614 615 if (src->Absolute) 616 flags |= IR3_REG_ABS; 617 if (src->Negate) 618 flags |= IR3_REG_NEGATE; 619 620 if (src->Indirect) { 621 flags |= IR3_REG_RELATIV; 622 623 /* shouldn't happen, and we can't cope with it below: */ 624 compile_assert(ctx, wrmask == 0x1); 625 626 /* wrap in a meta-deref to track both the src and address: */ 627 orig = instr; 628 629 instr = ir3_instr_create(ctx->block, -1, OPC_META_DEREF); 630 ir3_reg_create(instr, 0, 0); 631 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->block->address; 632 } 633 634 reg = ir3_reg_create(instr, regid(num, chan), flags); 635 636 reg->wrmask = wrmask; 637 if (wrmask == 0x1) { 638 /* normal case */ 639 ssa_src(ctx, reg, src, chan); 640 } else if ((src->File == TGSI_FILE_TEMPORARY) || 641 (src->File == TGSI_FILE_OUTPUT) || 642 (src->File == TGSI_FILE_INPUT)) { 643 struct ir3_instruction *collect; 644 unsigned i; 645 646 compile_assert(ctx, !src->Indirect); 647 648 /* if instruction reads multiple, we need to create 649 * some place-holder collect the registers: 650 */ 651 collect = ir3_instr_create(ctx->block, -1, OPC_META_FI); 652 ir3_reg_create(collect, 0, 0); /* unused dst reg */ 653 654 for (i = 0; i < 4; i++) { 655 if (wrmask & (1 << i)) { 656 /* and src reg used point to the original instr */ 657 ssa_src(ctx, ir3_reg_create(collect, 0, IR3_REG_SSA), 658 src, chan + i); 659 } else if (wrmask & ~((i << i) - 1)) { 660 /* if any remaining components, then dummy 661 * placeholder src reg to fill in the blanks: 662 */ 663 ir3_reg_create(collect, 0, 0); 664 } 665 } 666 667 reg->flags |= IR3_REG_SSA; 668 reg->instr = collect; 669 } 670 671 if (src->Indirect) { 672 reg = ir3_reg_create(orig, 0, flags | IR3_REG_SSA); 673 reg->instr = instr; 674 } 675 return reg; 676} 677 678static struct ir3_register * 679add_src_reg(struct ir3_compile_context *ctx, struct ir3_instruction *instr, 680 const struct tgsi_src_register *src, unsigned chan) 681{ 682 return add_src_reg_wrmask(ctx, instr, src, chan, 0x1); 683} 684 685static void 686src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) 687{ 688 src->File = dst->File; 689 src->Indirect = dst->Indirect; 690 src->Dimension = dst->Dimension; 691 src->Index = dst->Index; 692 src->Absolute = 0; 693 src->Negate = 0; 694 src->SwizzleX = TGSI_SWIZZLE_X; 695 src->SwizzleY = TGSI_SWIZZLE_Y; 696 src->SwizzleZ = TGSI_SWIZZLE_Z; 697 src->SwizzleW = TGSI_SWIZZLE_W; 698} 699 700/* Get internal-temp src/dst to use for a sequence of instructions 701 * generated by a single TGSI op. 702 */ 703static struct tgsi_src_register * 704get_internal_temp(struct ir3_compile_context *ctx, 705 struct tgsi_dst_register *tmp_dst) 706{ 707 struct tgsi_src_register *tmp_src; 708 int n; 709 710 tmp_dst->File = TGSI_FILE_TEMPORARY; 711 tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; 712 tmp_dst->Indirect = 0; 713 tmp_dst->Dimension = 0; 714 715 /* assign next temporary: */ 716 n = ctx->num_internal_temps++; 717 compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps)); 718 tmp_src = &ctx->internal_temps[n]; 719 720 tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1; 721 722 src_from_dst(tmp_src, tmp_dst); 723 724 return tmp_src; 725} 726 727static inline bool 728is_const(struct tgsi_src_register *src) 729{ 730 return (src->File == TGSI_FILE_CONSTANT) || 731 (src->File == TGSI_FILE_IMMEDIATE); 732} 733 734static inline bool 735is_relative(struct tgsi_src_register *src) 736{ 737 return src->Indirect; 738} 739 740static inline bool 741is_rel_or_const(struct tgsi_src_register *src) 742{ 743 return is_relative(src) || is_const(src); 744} 745 746static type_t 747get_ftype(struct ir3_compile_context *ctx) 748{ 749 return TYPE_F32; 750} 751 752static type_t 753get_utype(struct ir3_compile_context *ctx) 754{ 755 return TYPE_U32; 756} 757 758static unsigned 759src_swiz(struct tgsi_src_register *src, int chan) 760{ 761 switch (chan) { 762 case 0: return src->SwizzleX; 763 case 1: return src->SwizzleY; 764 case 2: return src->SwizzleZ; 765 case 3: return src->SwizzleW; 766 } 767 assert(0); 768 return 0; 769} 770 771/* for instructions that cannot take a const register as src, if needed 772 * generate a move to temporary gpr: 773 */ 774static struct tgsi_src_register * 775get_unconst(struct ir3_compile_context *ctx, struct tgsi_src_register *src) 776{ 777 struct tgsi_dst_register tmp_dst; 778 struct tgsi_src_register *tmp_src; 779 780 compile_assert(ctx, is_rel_or_const(src)); 781 782 tmp_src = get_internal_temp(ctx, &tmp_dst); 783 784 create_mov(ctx, &tmp_dst, src); 785 786 return tmp_src; 787} 788 789static void 790get_immediate(struct ir3_compile_context *ctx, 791 struct tgsi_src_register *reg, uint32_t val) 792{ 793 unsigned neg, swiz, idx, i; 794 /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ 795 static const unsigned swiz2tgsi[] = { 796 TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, 797 }; 798 799 for (i = 0; i < ctx->immediate_idx; i++) { 800 swiz = i % 4; 801 idx = i / 4; 802 803 if (ctx->so->immediates[idx].val[swiz] == val) { 804 neg = 0; 805 break; 806 } 807 808 if (ctx->so->immediates[idx].val[swiz] == -val) { 809 neg = 1; 810 break; 811 } 812 } 813 814 if (i == ctx->immediate_idx) { 815 /* need to generate a new immediate: */ 816 swiz = i % 4; 817 idx = i / 4; 818 neg = 0; 819 ctx->so->immediates[idx].val[swiz] = val; 820 ctx->so->immediates_count = idx + 1; 821 ctx->immediate_idx++; 822 } 823 824 reg->File = TGSI_FILE_IMMEDIATE; 825 reg->Indirect = 0; 826 reg->Dimension = 0; 827 reg->Index = idx; 828 reg->Absolute = 0; 829 reg->Negate = neg; 830 reg->SwizzleX = swiz2tgsi[swiz]; 831 reg->SwizzleY = swiz2tgsi[swiz]; 832 reg->SwizzleZ = swiz2tgsi[swiz]; 833 reg->SwizzleW = swiz2tgsi[swiz]; 834} 835 836static void 837create_mov(struct ir3_compile_context *ctx, struct tgsi_dst_register *dst, 838 struct tgsi_src_register *src) 839{ 840 type_t type_mov = get_ftype(ctx); 841 unsigned i; 842 843 for (i = 0; i < 4; i++) { 844 /* move to destination: */ 845 if (dst->WriteMask & (1 << i)) { 846 struct ir3_instruction *instr; 847 848 if (src->Absolute || src->Negate) { 849 /* can't have abs or neg on a mov instr, so use 850 * absneg.f instead to handle these cases: 851 */ 852 instr = instr_create(ctx, 2, OPC_ABSNEG_F); 853 } else { 854 instr = instr_create(ctx, 1, 0); 855 instr->cat1.src_type = type_mov; 856 instr->cat1.dst_type = type_mov; 857 } 858 859 add_dst_reg(ctx, instr, dst, i); 860 add_src_reg(ctx, instr, src, src_swiz(src, i)); 861 } 862 } 863} 864 865static void 866create_clamp(struct ir3_compile_context *ctx, 867 struct tgsi_dst_register *dst, struct tgsi_src_register *val, 868 struct tgsi_src_register *minval, struct tgsi_src_register *maxval) 869{ 870 struct ir3_instruction *instr; 871 872 instr = instr_create(ctx, 2, OPC_MAX_F); 873 vectorize(ctx, instr, dst, 2, val, 0, minval, 0); 874 875 instr = instr_create(ctx, 2, OPC_MIN_F); 876 vectorize(ctx, instr, dst, 2, val, 0, maxval, 0); 877} 878 879static void 880create_clamp_imm(struct ir3_compile_context *ctx, 881 struct tgsi_dst_register *dst, 882 uint32_t minval, uint32_t maxval) 883{ 884 struct tgsi_src_register minconst, maxconst; 885 struct tgsi_src_register src; 886 887 src_from_dst(&src, dst); 888 889 get_immediate(ctx, &minconst, minval); 890 get_immediate(ctx, &maxconst, maxval); 891 892 create_clamp(ctx, dst, &src, &minconst, &maxconst); 893} 894 895static struct tgsi_dst_register * 896get_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst) 897{ 898 struct tgsi_dst_register *dst = &inst->Dst[0].Register; 899 unsigned i; 900 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 901 struct tgsi_src_register *src = &inst->Src[i].Register; 902 if ((src->File == dst->File) && (src->Index == dst->Index)) { 903 if ((dst->WriteMask == TGSI_WRITEMASK_XYZW) && 904 (src->SwizzleX == TGSI_SWIZZLE_X) && 905 (src->SwizzleY == TGSI_SWIZZLE_Y) && 906 (src->SwizzleZ == TGSI_SWIZZLE_Z) && 907 (src->SwizzleW == TGSI_SWIZZLE_W)) 908 continue; 909 ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst); 910 ctx->tmp_dst.WriteMask = dst->WriteMask; 911 dst = &ctx->tmp_dst; 912 break; 913 } 914 } 915 return dst; 916} 917 918static void 919put_dst(struct ir3_compile_context *ctx, struct tgsi_full_instruction *inst, 920 struct tgsi_dst_register *dst) 921{ 922 /* if necessary, add mov back into original dst: */ 923 if (dst != &inst->Dst[0].Register) { 924 create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src); 925 } 926} 927 928/* helper to generate the necessary repeat and/or additional instructions 929 * to turn a scalar instruction into a vector operation: 930 */ 931static void 932vectorize(struct ir3_compile_context *ctx, struct ir3_instruction *instr, 933 struct tgsi_dst_register *dst, int nsrcs, ...) 934{ 935 va_list ap; 936 int i, j, n = 0; 937 938 instr_atomic_start(ctx); 939 940 add_dst_reg(ctx, instr, dst, TGSI_SWIZZLE_X); 941 942 va_start(ap, nsrcs); 943 for (j = 0; j < nsrcs; j++) { 944 struct tgsi_src_register *src = 945 va_arg(ap, struct tgsi_src_register *); 946 unsigned flags = va_arg(ap, unsigned); 947 struct ir3_register *reg; 948 if (flags & IR3_REG_IMMED) { 949 reg = ir3_reg_create(instr, 0, IR3_REG_IMMED); 950 /* this is an ugly cast.. should have put flags first! */ 951 reg->iim_val = *(int *)&src; 952 } else { 953 reg = add_src_reg(ctx, instr, src, TGSI_SWIZZLE_X); 954 } 955 reg->flags |= flags & ~IR3_REG_NEGATE; 956 if (flags & IR3_REG_NEGATE) 957 reg->flags ^= IR3_REG_NEGATE; 958 } 959 va_end(ap); 960 961 for (i = 0; i < 4; i++) { 962 if (dst->WriteMask & (1 << i)) { 963 struct ir3_instruction *cur; 964 965 if (n++ == 0) { 966 cur = instr; 967 } else { 968 cur = instr_clone(ctx, instr); 969 } 970 971 ssa_dst(ctx, cur, dst, i); 972 973 /* fix-up dst register component: */ 974 cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i); 975 976 /* fix-up src register component: */ 977 va_start(ap, nsrcs); 978 for (j = 0; j < nsrcs; j++) { 979 struct ir3_register *reg = cur->regs[j+1]; 980 struct tgsi_src_register *src = 981 va_arg(ap, struct tgsi_src_register *); 982 unsigned flags = va_arg(ap, unsigned); 983 if (reg->flags & IR3_REG_SSA) { 984 ssa_src(ctx, reg, src, src_swiz(src, i)); 985 } else if (!(flags & IR3_REG_IMMED)) { 986 reg->num = regid(reg->num >> 2, src_swiz(src, i)); 987 } 988 } 989 va_end(ap); 990 } 991 } 992 993 instr_atomic_end(ctx); 994} 995 996/* 997 * Handlers for TGSI instructions which do not have a 1:1 mapping to 998 * native instructions: 999 */ 1000 1001static void 1002trans_clamp(const struct instr_translater *t, 1003 struct ir3_compile_context *ctx, 1004 struct tgsi_full_instruction *inst) 1005{ 1006 struct tgsi_dst_register *dst = get_dst(ctx, inst); 1007 struct tgsi_src_register *src0 = &inst->Src[0].Register; 1008 struct tgsi_src_register *src1 = &inst->Src[1].Register; 1009 struct tgsi_src_register *src2 = &inst->Src[2].Register; 1010 1011 create_clamp(ctx, dst, src0, src1, src2); 1012 1013 put_dst(ctx, inst, dst); 1014} 1015 1016/* ARL(x) = x, but mova from hrN.x to a0.. */ 1017static void 1018trans_arl(const struct instr_translater *t, 1019 struct ir3_compile_context *ctx, 1020 struct tgsi_full_instruction *inst) 1021{ 1022 struct ir3_instruction *instr; 1023 struct tgsi_dst_register tmp_dst; 1024 struct tgsi_src_register *tmp_src; 1025 struct tgsi_dst_register *dst = &inst->Dst[0].Register; 1026 struct tgsi_src_register *src = &inst->Src[0].Register; 1027 unsigned chan = src->SwizzleX; 1028 1029 compile_assert(ctx, dst->File == TGSI_FILE_ADDRESS); 1030 1031 /* NOTE: we allocate a temporary from a flat register 1032 * namespace (ignoring half vs full). It turns out 1033 * not to really matter since registers get reassigned 1034 * later in ir3_ra which (hopefully!) can deal a bit 1035 * better with mixed half and full precision. 1036 */ 1037 tmp_src = get_internal_temp(ctx, &tmp_dst); 1038 1039 /* cov.f{32,16}s16 Rtmp, Rsrc */ 1040 instr = instr_create(ctx, 1, 0); 1041 instr->cat1.src_type = get_ftype(ctx); 1042 instr->cat1.dst_type = TYPE_S16; 1043 add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; 1044 add_src_reg(ctx, instr, src, chan); 1045 1046 /* shl.b Rtmp, Rtmp, 2 */ 1047 instr = instr_create(ctx, 2, OPC_SHL_B); 1048 add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF; 1049 add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; 1050 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; 1051 1052 /* mova a0, Rtmp */ 1053 instr = instr_create(ctx, 1, 0); 1054 instr->cat1.src_type = TYPE_S16; 1055 instr->cat1.dst_type = TYPE_S16; 1056 add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF; 1057 add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF; 1058} 1059 1060/* 1061 * texture fetch/sample instructions: 1062 */ 1063 1064struct tex_info { 1065 int8_t order[4]; 1066 unsigned src_wrmask, flags; 1067}; 1068 1069static const struct tex_info * 1070get_tex_info(struct ir3_compile_context *ctx, 1071 struct tgsi_full_instruction *inst) 1072{ 1073 static const struct tex_info tex1d = { 1074 .order = { 0, -1, -1, -1 }, /* coord.x */ 1075 .src_wrmask = TGSI_WRITEMASK_XY, 1076 .flags = 0, 1077 }; 1078 static const struct tex_info tex1ds = { 1079 .order = { 0, -1, 2, -1 }, /* coord.xz */ 1080 .src_wrmask = TGSI_WRITEMASK_XYZ, 1081 .flags = IR3_INSTR_S, 1082 }; 1083 static const struct tex_info tex2d = { 1084 .order = { 0, 1, -1, -1 }, /* coord.xy */ 1085 .src_wrmask = TGSI_WRITEMASK_XY, 1086 .flags = 0, 1087 }; 1088 static const struct tex_info tex2ds = { 1089 .order = { 0, 1, 2, -1 }, /* coord.xyz */ 1090 .src_wrmask = TGSI_WRITEMASK_XYZ, 1091 .flags = IR3_INSTR_S, 1092 }; 1093 static const struct tex_info tex3d = { 1094 .order = { 0, 1, 2, -1 }, /* coord.xyz */ 1095 .src_wrmask = TGSI_WRITEMASK_XYZ, 1096 .flags = IR3_INSTR_3D, 1097 }; 1098 static const struct tex_info tex3ds = { 1099 .order = { 0, 1, 2, 3 }, /* coord.xyzw */ 1100 .src_wrmask = TGSI_WRITEMASK_XYZW, 1101 .flags = IR3_INSTR_S | IR3_INSTR_3D, 1102 }; 1103 static const struct tex_info txp1d = { 1104 .order = { 0, -1, 3, -1 }, /* coord.xw */ 1105 .src_wrmask = TGSI_WRITEMASK_XYZ, 1106 .flags = IR3_INSTR_P, 1107 }; 1108 static const struct tex_info txp1ds = { 1109 .order = { 0, -1, 2, 3 }, /* coord.xzw */ 1110 .src_wrmask = TGSI_WRITEMASK_XYZW, 1111 .flags = IR3_INSTR_P | IR3_INSTR_S, 1112 }; 1113 static const struct tex_info txp2d = { 1114 .order = { 0, 1, 3, -1 }, /* coord.xyw */ 1115 .src_wrmask = TGSI_WRITEMASK_XYZ, 1116 .flags = IR3_INSTR_P, 1117 }; 1118 static const struct tex_info txp2ds = { 1119 .order = { 0, 1, 2, 3 }, /* coord.xyzw */ 1120 .src_wrmask = TGSI_WRITEMASK_XYZW, 1121 .flags = IR3_INSTR_P | IR3_INSTR_S, 1122 }; 1123 static const struct tex_info txp3d = { 1124 .order = { 0, 1, 2, 3 }, /* coord.xyzw */ 1125 .src_wrmask = TGSI_WRITEMASK_XYZW, 1126 .flags = IR3_INSTR_P | IR3_INSTR_3D, 1127 }; 1128 1129 unsigned tex = inst->Texture.Texture; 1130 1131 switch (inst->Instruction.Opcode) { 1132 case TGSI_OPCODE_TEX: 1133 case TGSI_OPCODE_TXB: 1134 switch (tex) { 1135 case TGSI_TEXTURE_1D: 1136 return &tex1d; 1137 case TGSI_TEXTURE_SHADOW1D: 1138 return &tex1ds; 1139 case TGSI_TEXTURE_2D: 1140 case TGSI_TEXTURE_RECT: 1141 return &tex2d; 1142 case TGSI_TEXTURE_SHADOW2D: 1143 case TGSI_TEXTURE_SHADOWRECT: 1144 return &tex2ds; 1145 case TGSI_TEXTURE_3D: 1146 case TGSI_TEXTURE_CUBE: 1147 return &tex3d; 1148 case TGSI_TEXTURE_SHADOWCUBE: 1149 return &tex3ds; 1150 default: 1151 compile_error(ctx, "unknown texture type: %s\n", 1152 tgsi_texture_names[tex]); 1153 return NULL; 1154 } 1155 break; 1156 case TGSI_OPCODE_TXP: 1157 switch (tex) { 1158 case TGSI_TEXTURE_1D: 1159 return &txp1d; 1160 case TGSI_TEXTURE_SHADOW1D: 1161 return &txp1ds; 1162 case TGSI_TEXTURE_2D: 1163 case TGSI_TEXTURE_RECT: 1164 return &txp2d; 1165 case TGSI_TEXTURE_SHADOW2D: 1166 case TGSI_TEXTURE_SHADOWRECT: 1167 return &txp2ds; 1168 case TGSI_TEXTURE_3D: 1169 case TGSI_TEXTURE_CUBE: 1170 return &txp3d; 1171 default: 1172 compile_error(ctx, "unknown texture type: %s\n", 1173 tgsi_texture_names[tex]); 1174 break; 1175 } 1176 break; 1177 } 1178 compile_assert(ctx, 0); 1179 return NULL; 1180} 1181 1182static bool check_swiz(struct tgsi_src_register *src, const int8_t order[4]) 1183{ 1184 unsigned i; 1185 for (i = 1; (i < 4) && order[i] >= 0; i++) 1186 if (src_swiz(src, i) != (src_swiz(src, 0) + order[i])) 1187 return false; 1188 return true; 1189} 1190 1191static struct tgsi_src_register * 1192get_tex_coord(struct ir3_compile_context *ctx, 1193 struct tgsi_full_instruction *inst, 1194 const struct tex_info *tinf) 1195{ 1196 struct tgsi_src_register *coord = &inst->Src[0].Register; 1197 struct ir3_instruction *instr; 1198 unsigned tex = inst->Texture.Texture; 1199 bool needs_mov = false; 1200 1201 /* cat5 instruction cannot seem to handle const or relative: */ 1202 if (is_rel_or_const(coord)) 1203 needs_mov = true; 1204 1205 /* 1D textures we fix up w/ 0.0 as 2nd coord: */ 1206 if ((tex == TGSI_TEXTURE_1D) || (tex == TGSI_TEXTURE_SHADOW1D)) 1207 needs_mov = true; 1208 1209 /* The texture sample instructions need to coord in successive 1210 * registers/components (ie. src.xy but not src.yx). And TXP 1211 * needs the .w component in .z for 2D.. so in some cases we 1212 * might need to emit some mov instructions to shuffle things 1213 * around: 1214 */ 1215 if (!needs_mov) 1216 needs_mov = !check_swiz(coord, tinf->order); 1217 1218 if (needs_mov) { 1219 struct tgsi_dst_register tmp_dst; 1220 struct tgsi_src_register *tmp_src; 1221 unsigned j; 1222 1223 type_t type_mov = get_ftype(ctx); 1224 1225 /* need to move things around: */ 1226 tmp_src = get_internal_temp(ctx, &tmp_dst); 1227 1228 for (j = 0; j < 4; j++) { 1229 if (tinf->order[j] < 0) 1230 continue; 1231 instr = instr_create(ctx, 1, 0); /* mov */ 1232 instr->cat1.src_type = type_mov; 1233 instr->cat1.dst_type = type_mov; 1234 add_dst_reg(ctx, instr, &tmp_dst, j); 1235 add_src_reg(ctx, instr, coord, 1236 src_swiz(coord, tinf->order[j])); 1237 } 1238 1239 /* fix up .y coord: */ 1240 if ((tex == TGSI_TEXTURE_1D) || 1241 (tex == TGSI_TEXTURE_SHADOW1D)) { 1242 instr = instr_create(ctx, 1, 0); /* mov */ 1243 instr->cat1.src_type = type_mov; 1244 instr->cat1.dst_type = type_mov; 1245 add_dst_reg(ctx, instr, &tmp_dst, 1); /* .y */ 1246 ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = 0.5; 1247 } 1248 1249 coord = tmp_src; 1250 } 1251 1252 return coord; 1253} 1254 1255static void 1256trans_samp(const struct instr_translater *t, 1257 struct ir3_compile_context *ctx, 1258 struct tgsi_full_instruction *inst) 1259{ 1260 struct ir3_instruction *instr; 1261 struct tgsi_dst_register *dst = &inst->Dst[0].Register; 1262 struct tgsi_src_register *coord; 1263 struct tgsi_src_register *samp = &inst->Src[1].Register; 1264 const struct tex_info *tinf; 1265 1266 tinf = get_tex_info(ctx, inst); 1267 coord = get_tex_coord(ctx, inst, tinf); 1268 1269 instr = instr_create(ctx, 5, t->opc); 1270 instr->cat5.type = get_ftype(ctx); 1271 instr->cat5.samp = samp->Index; 1272 instr->cat5.tex = samp->Index; 1273 instr->flags |= tinf->flags; 1274 1275 add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask); 1276 add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleX, tinf->src_wrmask); 1277 1278 if (t->tgsi_opc == TGSI_OPCODE_TXB) 1279 add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleW, 0x1); 1280} 1281 1282/* DDX/DDY */ 1283static void 1284trans_deriv(const struct instr_translater *t, 1285 struct ir3_compile_context *ctx, 1286 struct tgsi_full_instruction *inst) 1287{ 1288 struct ir3_instruction *instr; 1289 struct tgsi_dst_register *dst = &inst->Dst[0].Register; 1290 struct tgsi_src_register *src = &inst->Src[0].Register; 1291 static const int8_t order[4] = {0, 1, 2, 3}; 1292 1293 if (!check_swiz(src, order)) { 1294 struct tgsi_dst_register tmp_dst; 1295 struct tgsi_src_register *tmp_src; 1296 1297 tmp_src = get_internal_temp(ctx, &tmp_dst); 1298 create_mov(ctx, &tmp_dst, src); 1299 1300 src = tmp_src; 1301 } 1302 1303 /* This might be a workaround for hw bug? Blob compiler always 1304 * seems to work two components at a time for dsy/dsx. It does 1305 * actually seem to work in some cases (or at least some piglit 1306 * tests) for four components at a time. But seems more reliable 1307 * to split this into two instructions like the blob compiler 1308 * does: 1309 */ 1310 1311 instr = instr_create(ctx, 5, t->opc); 1312 instr->cat5.type = get_ftype(ctx); 1313 add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask & 0x3); 1314 add_src_reg_wrmask(ctx, instr, src, 0, dst->WriteMask & 0x3); 1315 1316 instr = instr_create(ctx, 5, t->opc); 1317 instr->cat5.type = get_ftype(ctx); 1318 add_dst_reg_wrmask(ctx, instr, dst, 2, (dst->WriteMask >> 2) & 0x3); 1319 add_src_reg_wrmask(ctx, instr, src, 2, (dst->WriteMask >> 2) & 0x3); 1320} 1321 1322/* 1323 * SEQ(a,b) = (a == b) ? 1.0 : 0.0 1324 * cmps.f.eq tmp0, a, b 1325 * cov.u16f16 dst, tmp0 1326 * 1327 * SNE(a,b) = (a != b) ? 1.0 : 0.0 1328 * cmps.f.ne tmp0, a, b 1329 * cov.u16f16 dst, tmp0 1330 * 1331 * SGE(a,b) = (a >= b) ? 1.0 : 0.0 1332 * cmps.f.ge tmp0, a, b 1333 * cov.u16f16 dst, tmp0 1334 * 1335 * SLE(a,b) = (a <= b) ? 1.0 : 0.0 1336 * cmps.f.le tmp0, a, b 1337 * cov.u16f16 dst, tmp0 1338 * 1339 * SGT(a,b) = (a > b) ? 1.0 : 0.0 1340 * cmps.f.gt tmp0, a, b 1341 * cov.u16f16 dst, tmp0 1342 * 1343 * SLT(a,b) = (a < b) ? 1.0 : 0.0 1344 * cmps.f.lt tmp0, a, b 1345 * cov.u16f16 dst, tmp0 1346 * 1347 * CMP(a,b,c) = (a < 0.0) ? b : c 1348 * cmps.f.lt tmp0, a, {0.0} 1349 * sel.b16 dst, b, tmp0, c 1350 */ 1351static void 1352trans_cmp(const struct instr_translater *t, 1353 struct ir3_compile_context *ctx, 1354 struct tgsi_full_instruction *inst) 1355{ 1356 struct ir3_instruction *instr; 1357 struct tgsi_dst_register tmp_dst; 1358 struct tgsi_src_register *tmp_src; 1359 struct tgsi_src_register constval0; 1360 /* final instruction for CMP() uses orig src1 and src2: */ 1361 struct tgsi_dst_register *dst = get_dst(ctx, inst); 1362 struct tgsi_src_register *a0, *a1, *a2; 1363 unsigned condition; 1364 1365 tmp_src = get_internal_temp(ctx, &tmp_dst); 1366 1367 a0 = &inst->Src[0].Register; /* a */ 1368 a1 = &inst->Src[1].Register; /* b */ 1369 1370 switch (t->tgsi_opc) { 1371 case TGSI_OPCODE_SEQ: 1372 case TGSI_OPCODE_FSEQ: 1373 condition = IR3_COND_EQ; 1374 break; 1375 case TGSI_OPCODE_SNE: 1376 case TGSI_OPCODE_FSNE: 1377 condition = IR3_COND_NE; 1378 break; 1379 case TGSI_OPCODE_SGE: 1380 case TGSI_OPCODE_FSGE: 1381 condition = IR3_COND_GE; 1382 break; 1383 case TGSI_OPCODE_SLT: 1384 case TGSI_OPCODE_FSLT: 1385 condition = IR3_COND_LT; 1386 break; 1387 case TGSI_OPCODE_SLE: 1388 condition = IR3_COND_LE; 1389 break; 1390 case TGSI_OPCODE_SGT: 1391 condition = IR3_COND_GT; 1392 break; 1393 case TGSI_OPCODE_CMP: 1394 get_immediate(ctx, &constval0, fui(0.0)); 1395 a0 = &inst->Src[0].Register; /* a */ 1396 a1 = &constval0; /* {0.0} */ 1397 condition = IR3_COND_LT; 1398 break; 1399 default: 1400 compile_assert(ctx, 0); 1401 return; 1402 } 1403 1404 if (is_const(a0) && is_const(a1)) 1405 a0 = get_unconst(ctx, a0); 1406 1407 /* cmps.f.<cond> tmp, a0, a1 */ 1408 instr = instr_create(ctx, 2, OPC_CMPS_F); 1409 instr->cat2.condition = condition; 1410 vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); 1411 1412 switch (t->tgsi_opc) { 1413 case TGSI_OPCODE_SEQ: 1414 case TGSI_OPCODE_FSEQ: 1415 case TGSI_OPCODE_SGE: 1416 case TGSI_OPCODE_FSGE: 1417 case TGSI_OPCODE_SLE: 1418 case TGSI_OPCODE_SNE: 1419 case TGSI_OPCODE_FSNE: 1420 case TGSI_OPCODE_SGT: 1421 case TGSI_OPCODE_SLT: 1422 case TGSI_OPCODE_FSLT: 1423 /* cov.u16f16 dst, tmp0 */ 1424 instr = instr_create(ctx, 1, 0); 1425 instr->cat1.src_type = get_utype(ctx); 1426 instr->cat1.dst_type = get_ftype(ctx); 1427 vectorize(ctx, instr, dst, 1, tmp_src, 0); 1428 break; 1429 case TGSI_OPCODE_CMP: 1430 a1 = &inst->Src[1].Register; 1431 a2 = &inst->Src[2].Register; 1432 /* sel.{b32,b16} dst, src2, tmp, src1 */ 1433 instr = instr_create(ctx, 3, OPC_SEL_B32); 1434 vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0); 1435 1436 break; 1437 } 1438 1439 put_dst(ctx, inst, dst); 1440} 1441 1442/* 1443 * USNE(a,b) = (a != b) ? 1 : 0 1444 * cmps.u32.ne dst, a, b 1445 * 1446 * USEQ(a,b) = (a == b) ? 1 : 0 1447 * cmps.u32.eq dst, a, b 1448 * 1449 * ISGE(a,b) = (a > b) ? 1 : 0 1450 * cmps.s32.ge dst, a, b 1451 * 1452 * USGE(a,b) = (a > b) ? 1 : 0 1453 * cmps.u32.ge dst, a, b 1454 * 1455 * ISLT(a,b) = (a < b) ? 1 : 0 1456 * cmps.s32.lt dst, a, b 1457 * 1458 * USLT(a,b) = (a < b) ? 1 : 0 1459 * cmps.u32.lt dst, a, b 1460 * 1461 * UCMP(a,b,c) = (a < 0) ? b : c 1462 * cmps.u32.lt tmp0, a, {0} 1463 * sel.b16 dst, b, tmp0, c 1464 */ 1465static void 1466trans_icmp(const struct instr_translater *t, 1467 struct ir3_compile_context *ctx, 1468 struct tgsi_full_instruction *inst) 1469{ 1470 struct ir3_instruction *instr; 1471 struct tgsi_dst_register *dst = get_dst(ctx, inst); 1472 struct tgsi_src_register constval0; 1473 struct tgsi_src_register *a0, *a1, *a2; 1474 unsigned condition; 1475 1476 a0 = &inst->Src[0].Register; /* a */ 1477 a1 = &inst->Src[1].Register; /* b */ 1478 1479 switch (t->tgsi_opc) { 1480 case TGSI_OPCODE_USNE: 1481 condition = IR3_COND_NE; 1482 break; 1483 case TGSI_OPCODE_USEQ: 1484 condition = IR3_COND_EQ; 1485 break; 1486 case TGSI_OPCODE_ISGE: 1487 case TGSI_OPCODE_USGE: 1488 condition = IR3_COND_GE; 1489 break; 1490 case TGSI_OPCODE_ISLT: 1491 case TGSI_OPCODE_USLT: 1492 condition = IR3_COND_LT; 1493 break; 1494 case TGSI_OPCODE_UCMP: 1495 get_immediate(ctx, &constval0, 0); 1496 a0 = &inst->Src[0].Register; /* a */ 1497 a1 = &constval0; /* {0} */ 1498 condition = IR3_COND_LT; 1499 break; 1500 1501 default: 1502 compile_assert(ctx, 0); 1503 return; 1504 } 1505 1506 if (is_const(a0) && is_const(a1)) 1507 a0 = get_unconst(ctx, a0); 1508 1509 if (t->tgsi_opc == TGSI_OPCODE_UCMP) { 1510 struct tgsi_dst_register tmp_dst; 1511 struct tgsi_src_register *tmp_src; 1512 tmp_src = get_internal_temp(ctx, &tmp_dst); 1513 /* cmps.u32.lt tmp, a0, a1 */ 1514 instr = instr_create(ctx, 2, t->opc); 1515 instr->cat2.condition = condition; 1516 vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0); 1517 1518 a1 = &inst->Src[1].Register; 1519 a2 = &inst->Src[2].Register; 1520 /* sel.{b32,b16} dst, src2, tmp, src1 */ 1521 instr = instr_create(ctx, 3, OPC_SEL_B32); 1522 vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0); 1523 } else { 1524 /* cmps.{u32,s32}.<cond> dst, a0, a1 */ 1525 instr = instr_create(ctx, 2, t->opc); 1526 instr->cat2.condition = condition; 1527 vectorize(ctx, instr, dst, 2, a0, 0, a1, 0); 1528 } 1529 put_dst(ctx, inst, dst); 1530} 1531 1532/* 1533 * Conditional / Flow control 1534 */ 1535 1536static void 1537push_branch(struct ir3_compile_context *ctx, bool inv, 1538 struct ir3_instruction *instr, struct ir3_instruction *cond) 1539{ 1540 unsigned int idx = ctx->branch_count++; 1541 compile_assert(ctx, idx < ARRAY_SIZE(ctx->branch)); 1542 ctx->branch[idx].instr = instr; 1543 ctx->branch[idx].inv = inv; 1544 /* else side of branch has same condition: */ 1545 if (!inv) 1546 ctx->branch[idx].cond = cond; 1547} 1548 1549static struct ir3_instruction * 1550pop_branch(struct ir3_compile_context *ctx) 1551{ 1552 unsigned int idx = --ctx->branch_count; 1553 return ctx->branch[idx].instr; 1554} 1555 1556static void 1557trans_if(const struct instr_translater *t, 1558 struct ir3_compile_context *ctx, 1559 struct tgsi_full_instruction *inst) 1560{ 1561 struct ir3_instruction *instr, *cond; 1562 struct tgsi_src_register *src = &inst->Src[0].Register; 1563 struct tgsi_dst_register tmp_dst; 1564 struct tgsi_src_register *tmp_src; 1565 struct tgsi_src_register constval; 1566 1567 get_immediate(ctx, &constval, fui(0.0)); 1568 tmp_src = get_internal_temp(ctx, &tmp_dst); 1569 1570 if (is_const(src)) 1571 src = get_unconst(ctx, src); 1572 1573 /* cmps.f.ne tmp0, b, {0.0} */ 1574 instr = instr_create(ctx, 2, OPC_CMPS_F); 1575 add_dst_reg(ctx, instr, &tmp_dst, 0); 1576 add_src_reg(ctx, instr, src, src->SwizzleX); 1577 add_src_reg(ctx, instr, &constval, constval.SwizzleX); 1578 instr->cat2.condition = IR3_COND_NE; 1579 1580 compile_assert(ctx, instr->regs[1]->flags & IR3_REG_SSA); /* because get_unconst() */ 1581 cond = instr->regs[1]->instr; 1582 1583 /* meta:flow tmp0 */ 1584 instr = instr_create(ctx, -1, OPC_META_FLOW); 1585 ir3_reg_create(instr, 0, 0); /* dummy dst */ 1586 add_src_reg(ctx, instr, tmp_src, TGSI_SWIZZLE_X); 1587 1588 push_branch(ctx, false, instr, cond); 1589 instr->flow.if_block = push_block(ctx); 1590} 1591 1592static void 1593trans_else(const struct instr_translater *t, 1594 struct ir3_compile_context *ctx, 1595 struct tgsi_full_instruction *inst) 1596{ 1597 struct ir3_instruction *instr; 1598 1599 pop_block(ctx); 1600 1601 instr = pop_branch(ctx); 1602 1603 compile_assert(ctx, (instr->category == -1) && 1604 (instr->opc == OPC_META_FLOW)); 1605 1606 push_branch(ctx, true, instr, NULL); 1607 instr->flow.else_block = push_block(ctx); 1608} 1609 1610static struct ir3_instruction * 1611find_temporary(struct ir3_block *block, unsigned n) 1612{ 1613 if (block->parent && !block->temporaries[n]) 1614 return find_temporary(block->parent, n); 1615 return block->temporaries[n]; 1616} 1617 1618static struct ir3_instruction * 1619find_output(struct ir3_block *block, unsigned n) 1620{ 1621 if (block->parent && !block->outputs[n]) 1622 return find_output(block->parent, n); 1623 return block->outputs[n]; 1624} 1625 1626static struct ir3_instruction * 1627create_phi(struct ir3_compile_context *ctx, struct ir3_instruction *cond, 1628 struct ir3_instruction *a, struct ir3_instruction *b) 1629{ 1630 struct ir3_instruction *phi; 1631 1632 compile_assert(ctx, cond); 1633 1634 /* Either side of the condition could be null.. which 1635 * indicates a variable written on only one side of the 1636 * branch. Normally this should only be variables not 1637 * used outside of that side of the branch. So we could 1638 * just 'return a ? a : b;' in that case. But for better 1639 * defined undefined behavior we just stick in imm{0.0}. 1640 * In the common case of a value only used within the 1641 * one side of the branch, the PHI instruction will not 1642 * get scheduled 1643 */ 1644 if (!a) 1645 a = create_immed(ctx, 0.0); 1646 if (!b) 1647 b = create_immed(ctx, 0.0); 1648 1649 phi = instr_create(ctx, -1, OPC_META_PHI); 1650 ir3_reg_create(phi, 0, 0); /* dummy dst */ 1651 ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = cond; 1652 ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = a; 1653 ir3_reg_create(phi, 0, IR3_REG_SSA)->instr = b; 1654 1655 return phi; 1656} 1657 1658static void 1659trans_endif(const struct instr_translater *t, 1660 struct ir3_compile_context *ctx, 1661 struct tgsi_full_instruction *inst) 1662{ 1663 struct ir3_instruction *instr; 1664 struct ir3_block *ifb, *elseb; 1665 struct ir3_instruction **ifout, **elseout; 1666 unsigned i, ifnout = 0, elsenout = 0; 1667 1668 pop_block(ctx); 1669 1670 instr = pop_branch(ctx); 1671 1672 compile_assert(ctx, (instr->category == -1) && 1673 (instr->opc == OPC_META_FLOW)); 1674 1675 ifb = instr->flow.if_block; 1676 elseb = instr->flow.else_block; 1677 /* if there is no else block, the parent block is used for the 1678 * branch-not-taken src of the PHI instructions: 1679 */ 1680 if (!elseb) 1681 elseb = ifb->parent; 1682 1683 /* worst case sizes: */ 1684 ifnout = ifb->ntemporaries + ifb->noutputs; 1685 elsenout = elseb->ntemporaries + elseb->noutputs; 1686 1687 ifout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * ifnout); 1688 if (elseb != ifb->parent) 1689 elseout = ir3_alloc(ctx->ir, sizeof(ifb->outputs[0]) * elsenout); 1690 1691 ifnout = 0; 1692 elsenout = 0; 1693 1694 /* generate PHI instructions for any temporaries written: */ 1695 for (i = 0; i < ifb->ntemporaries; i++) { 1696 struct ir3_instruction *a = ifb->temporaries[i]; 1697 struct ir3_instruction *b = elseb->temporaries[i]; 1698 1699 /* if temporary written in if-block, or if else block 1700 * is present and temporary written in else-block: 1701 */ 1702 if (a || ((elseb != ifb->parent) && b)) { 1703 struct ir3_instruction *phi; 1704 1705 /* if only written on one side, find the closest 1706 * enclosing update on other side: 1707 */ 1708 if (!a) 1709 a = find_temporary(ifb, i); 1710 if (!b) 1711 b = find_temporary(elseb, i); 1712 1713 ifout[ifnout] = a; 1714 a = create_output(ifb, a, ifnout++); 1715 1716 if (elseb != ifb->parent) { 1717 elseout[elsenout] = b; 1718 b = create_output(elseb, b, elsenout++); 1719 } 1720 1721 phi = create_phi(ctx, instr, a, b); 1722 ctx->block->temporaries[i] = phi; 1723 } 1724 } 1725 1726 compile_assert(ctx, ifb->noutputs == elseb->noutputs); 1727 1728 /* .. and any outputs written: */ 1729 for (i = 0; i < ifb->noutputs; i++) { 1730 struct ir3_instruction *a = ifb->outputs[i]; 1731 struct ir3_instruction *b = elseb->outputs[i]; 1732 1733 /* if output written in if-block, or if else block 1734 * is present and output written in else-block: 1735 */ 1736 if (a || ((elseb != ifb->parent) && b)) { 1737 struct ir3_instruction *phi; 1738 1739 /* if only written on one side, find the closest 1740 * enclosing update on other side: 1741 */ 1742 if (!a) 1743 a = find_output(ifb, i); 1744 if (!b) 1745 b = find_output(elseb, i); 1746 1747 ifout[ifnout] = a; 1748 a = create_output(ifb, a, ifnout++); 1749 1750 if (elseb != ifb->parent) { 1751 elseout[elsenout] = b; 1752 b = create_output(elseb, b, elsenout++); 1753 } 1754 1755 phi = create_phi(ctx, instr, a, b); 1756 ctx->block->outputs[i] = phi; 1757 } 1758 } 1759 1760 ifb->noutputs = ifnout; 1761 ifb->outputs = ifout; 1762 1763 if (elseb != ifb->parent) { 1764 elseb->noutputs = elsenout; 1765 elseb->outputs = elseout; 1766 } 1767 1768 // TODO maybe we want to compact block->inputs? 1769} 1770 1771/* 1772 * Kill 1773 */ 1774 1775static void 1776trans_kill(const struct instr_translater *t, 1777 struct ir3_compile_context *ctx, 1778 struct tgsi_full_instruction *inst) 1779{ 1780 struct ir3_instruction *instr, *immed, *cond = NULL; 1781 bool inv = false; 1782 1783 switch (t->tgsi_opc) { 1784 case TGSI_OPCODE_KILL: 1785 /* unconditional kill, use enclosing if condition: */ 1786 if (ctx->branch_count > 0) { 1787 unsigned int idx = ctx->branch_count - 1; 1788 cond = ctx->branch[idx].cond; 1789 inv = ctx->branch[idx].inv; 1790 } else { 1791 cond = create_immed(ctx, 1.0); 1792 } 1793 1794 break; 1795 } 1796 1797 compile_assert(ctx, cond); 1798 1799 immed = create_immed(ctx, 0.0); 1800 1801 /* cmps.f.ne p0.x, cond, {0.0} */ 1802 instr = instr_create(ctx, 2, OPC_CMPS_F); 1803 instr->cat2.condition = IR3_COND_NE; 1804 ir3_reg_create(instr, regid(REG_P0, 0), 0); 1805 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; 1806 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed; 1807 cond = instr; 1808 1809 /* kill p0.x */ 1810 instr = instr_create(ctx, 0, OPC_KILL); 1811 instr->cat0.inv = inv; 1812 ir3_reg_create(instr, 0, 0); /* dummy dst */ 1813 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; 1814 1815 ctx->kill[ctx->kill_count++] = instr; 1816} 1817 1818/* 1819 * Kill-If 1820 */ 1821 1822static void 1823trans_killif(const struct instr_translater *t, 1824 struct ir3_compile_context *ctx, 1825 struct tgsi_full_instruction *inst) 1826{ 1827 struct tgsi_src_register *src = &inst->Src[0].Register; 1828 struct ir3_instruction *instr, *immed, *cond = NULL; 1829 bool inv = false; 1830 1831 immed = create_immed(ctx, 0.0); 1832 1833 /* cmps.f.ne p0.x, cond, {0.0} */ 1834 instr = instr_create(ctx, 2, OPC_CMPS_F); 1835 instr->cat2.condition = IR3_COND_NE; 1836 ir3_reg_create(instr, regid(REG_P0, 0), 0); 1837 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed; 1838 add_src_reg(ctx, instr, src, src->SwizzleX); 1839 1840 cond = instr; 1841 1842 /* kill p0.x */ 1843 instr = instr_create(ctx, 0, OPC_KILL); 1844 instr->cat0.inv = inv; 1845 ir3_reg_create(instr, 0, 0); /* dummy dst */ 1846 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond; 1847 1848 ctx->kill[ctx->kill_count++] = instr; 1849 1850} 1851/* 1852 * I2F / U2F / F2I / F2U 1853 */ 1854 1855static void 1856trans_cov(const struct instr_translater *t, 1857 struct ir3_compile_context *ctx, 1858 struct tgsi_full_instruction *inst) 1859{ 1860 struct ir3_instruction *instr; 1861 struct tgsi_dst_register *dst = get_dst(ctx, inst); 1862 struct tgsi_src_register *src = &inst->Src[0].Register; 1863 1864 // cov.f32s32 dst, tmp0 / 1865 instr = instr_create(ctx, 1, 0); 1866 switch (t->tgsi_opc) { 1867 case TGSI_OPCODE_U2F: 1868 instr->cat1.src_type = TYPE_U32; 1869 instr->cat1.dst_type = TYPE_F32; 1870 break; 1871 case TGSI_OPCODE_I2F: 1872 instr->cat1.src_type = TYPE_S32; 1873 instr->cat1.dst_type = TYPE_F32; 1874 break; 1875 case TGSI_OPCODE_F2U: 1876 instr->cat1.src_type = TYPE_F32; 1877 instr->cat1.dst_type = TYPE_U32; 1878 break; 1879 case TGSI_OPCODE_F2I: 1880 instr->cat1.src_type = TYPE_F32; 1881 instr->cat1.dst_type = TYPE_S32; 1882 break; 1883 1884 } 1885 vectorize(ctx, instr, dst, 1, src, 0); 1886} 1887 1888/* 1889 * Handlers for TGSI instructions which do have 1:1 mapping to native 1890 * instructions: 1891 */ 1892 1893static void 1894instr_cat0(const struct instr_translater *t, 1895 struct ir3_compile_context *ctx, 1896 struct tgsi_full_instruction *inst) 1897{ 1898 instr_create(ctx, 0, t->opc); 1899} 1900 1901static void 1902instr_cat1(const struct instr_translater *t, 1903 struct ir3_compile_context *ctx, 1904 struct tgsi_full_instruction *inst) 1905{ 1906 struct tgsi_dst_register *dst = get_dst(ctx, inst); 1907 struct tgsi_src_register *src = &inst->Src[0].Register; 1908 create_mov(ctx, dst, src); 1909 put_dst(ctx, inst, dst); 1910} 1911 1912static void 1913instr_cat2(const struct instr_translater *t, 1914 struct ir3_compile_context *ctx, 1915 struct tgsi_full_instruction *inst) 1916{ 1917 struct tgsi_dst_register *dst = get_dst(ctx, inst); 1918 struct tgsi_src_register *src0 = &inst->Src[0].Register; 1919 struct tgsi_src_register *src1 = &inst->Src[1].Register; 1920 struct ir3_instruction *instr; 1921 unsigned src0_flags = 0, src1_flags = 0; 1922 1923 switch (t->tgsi_opc) { 1924 case TGSI_OPCODE_ABS: 1925 case TGSI_OPCODE_IABS: 1926 src0_flags = IR3_REG_ABS; 1927 break; 1928 case TGSI_OPCODE_SUB: 1929 case TGSI_OPCODE_INEG: 1930 src1_flags = IR3_REG_NEGATE; 1931 break; 1932 } 1933 1934 switch (t->opc) { 1935 case OPC_ABSNEG_F: 1936 case OPC_ABSNEG_S: 1937 case OPC_CLZ_B: 1938 case OPC_CLZ_S: 1939 case OPC_SIGN_F: 1940 case OPC_FLOOR_F: 1941 case OPC_CEIL_F: 1942 case OPC_RNDNE_F: 1943 case OPC_RNDAZ_F: 1944 case OPC_TRUNC_F: 1945 case OPC_NOT_B: 1946 case OPC_BFREV_B: 1947 case OPC_SETRM: 1948 case OPC_CBITS_B: 1949 /* these only have one src reg */ 1950 instr = instr_create(ctx, 2, t->opc); 1951 vectorize(ctx, instr, dst, 1, src0, src0_flags); 1952 break; 1953 default: 1954 if (is_const(src0) && is_const(src1)) 1955 src0 = get_unconst(ctx, src0); 1956 1957 instr = instr_create(ctx, 2, t->opc); 1958 vectorize(ctx, instr, dst, 2, src0, src0_flags, 1959 src1, src1_flags); 1960 break; 1961 } 1962 1963 put_dst(ctx, inst, dst); 1964} 1965 1966static void 1967instr_cat3(const struct instr_translater *t, 1968 struct ir3_compile_context *ctx, 1969 struct tgsi_full_instruction *inst) 1970{ 1971 struct tgsi_dst_register *dst = get_dst(ctx, inst); 1972 struct tgsi_src_register *src0 = &inst->Src[0].Register; 1973 struct tgsi_src_register *src1 = &inst->Src[1].Register; 1974 struct ir3_instruction *instr; 1975 1976 /* in particular, can't handle const for src1 for cat3.. 1977 * for mad, we can swap first two src's if needed: 1978 */ 1979 if (is_rel_or_const(src1)) { 1980 if (is_mad(t->opc) && !is_rel_or_const(src0)) { 1981 struct tgsi_src_register *tmp; 1982 tmp = src0; 1983 src0 = src1; 1984 src1 = tmp; 1985 } else { 1986 src1 = get_unconst(ctx, src1); 1987 } 1988 } 1989 1990 instr = instr_create(ctx, 3, t->opc); 1991 vectorize(ctx, instr, dst, 3, src0, 0, src1, 0, 1992 &inst->Src[2].Register, 0); 1993 put_dst(ctx, inst, dst); 1994} 1995 1996static void 1997instr_cat4(const struct instr_translater *t, 1998 struct ir3_compile_context *ctx, 1999 struct tgsi_full_instruction *inst) 2000{ 2001 struct tgsi_dst_register *dst = get_dst(ctx, inst); 2002 struct tgsi_src_register *src = &inst->Src[0].Register; 2003 struct ir3_instruction *instr; 2004 unsigned i; 2005 2006 /* seems like blob compiler avoids const as src.. */ 2007 if (is_const(src)) 2008 src = get_unconst(ctx, src); 2009 2010 /* we need to replicate into each component: */ 2011 for (i = 0; i < 4; i++) { 2012 if (dst->WriteMask & (1 << i)) { 2013 instr = instr_create(ctx, 4, t->opc); 2014 add_dst_reg(ctx, instr, dst, i); 2015 add_src_reg(ctx, instr, src, src->SwizzleX); 2016 } 2017 } 2018 2019 put_dst(ctx, inst, dst); 2020} 2021 2022static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { 2023#define INSTR(n, f, ...) \ 2024 [TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ } 2025 2026 INSTR(MOV, instr_cat1), 2027 INSTR(RCP, instr_cat4, .opc = OPC_RCP), 2028 INSTR(RSQ, instr_cat4, .opc = OPC_RSQ), 2029 INSTR(SQRT, instr_cat4, .opc = OPC_SQRT), 2030 INSTR(MUL, instr_cat2, .opc = OPC_MUL_F), 2031 INSTR(ADD, instr_cat2, .opc = OPC_ADD_F), 2032 INSTR(SUB, instr_cat2, .opc = OPC_ADD_F), 2033 INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), 2034 INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), 2035 INSTR(UADD, instr_cat2, .opc = OPC_ADD_U), 2036 INSTR(IMIN, instr_cat2, .opc = OPC_MIN_S), 2037 INSTR(UMIN, instr_cat2, .opc = OPC_MIN_U), 2038 INSTR(IMAX, instr_cat2, .opc = OPC_MAX_S), 2039 INSTR(UMAX, instr_cat2, .opc = OPC_MAX_U), 2040 INSTR(AND, instr_cat2, .opc = OPC_AND_B), 2041 INSTR(OR, instr_cat2, .opc = OPC_OR_B), 2042 INSTR(NOT, instr_cat2, .opc = OPC_NOT_B), 2043 INSTR(XOR, instr_cat2, .opc = OPC_XOR_B), 2044 INSTR(UMUL, instr_cat2, .opc = OPC_MUL_U), 2045 INSTR(SHL, instr_cat2, .opc = OPC_SHL_B), 2046 INSTR(USHR, instr_cat2, .opc = OPC_SHR_B), 2047 INSTR(ISHR, instr_cat2, .opc = OPC_ASHR_B), 2048 INSTR(IABS, instr_cat2, .opc = OPC_ABSNEG_S), 2049 INSTR(INEG, instr_cat2, .opc = OPC_ABSNEG_S), 2050 INSTR(AND, instr_cat2, .opc = OPC_AND_B), 2051 INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), 2052 INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F), 2053 INSTR(CLAMP, trans_clamp), 2054 INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F), 2055 INSTR(ROUND, instr_cat2, .opc = OPC_RNDNE_F), 2056 INSTR(SSG, instr_cat2, .opc = OPC_SIGN_F), 2057 INSTR(CEIL, instr_cat2, .opc = OPC_CEIL_F), 2058 INSTR(ARL, trans_arl), 2059 INSTR(EX2, instr_cat4, .opc = OPC_EXP2), 2060 INSTR(LG2, instr_cat4, .opc = OPC_LOG2), 2061 INSTR(ABS, instr_cat2, .opc = OPC_ABSNEG_F), 2062 INSTR(COS, instr_cat4, .opc = OPC_COS), 2063 INSTR(SIN, instr_cat4, .opc = OPC_SIN), 2064 INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX), 2065 INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP), 2066 INSTR(TXB, trans_samp, .opc = OPC_SAMB, .arg = TGSI_OPCODE_TXB), 2067 INSTR(DDX, trans_deriv, .opc = OPC_DSX), 2068 INSTR(DDY, trans_deriv, .opc = OPC_DSY), 2069 INSTR(SGT, trans_cmp), 2070 INSTR(SLT, trans_cmp), 2071 INSTR(FSLT, trans_cmp), 2072 INSTR(SGE, trans_cmp), 2073 INSTR(FSGE, trans_cmp), 2074 INSTR(SLE, trans_cmp), 2075 INSTR(SNE, trans_cmp), 2076 INSTR(FSNE, trans_cmp), 2077 INSTR(SEQ, trans_cmp), 2078 INSTR(FSEQ, trans_cmp), 2079 INSTR(CMP, trans_cmp), 2080 INSTR(USNE, trans_icmp, .opc = OPC_CMPS_U), 2081 INSTR(USEQ, trans_icmp, .opc = OPC_CMPS_U), 2082 INSTR(ISGE, trans_icmp, .opc = OPC_CMPS_S), 2083 INSTR(USGE, trans_icmp, .opc = OPC_CMPS_U), 2084 INSTR(ISLT, trans_icmp, .opc = OPC_CMPS_S), 2085 INSTR(USLT, trans_icmp, .opc = OPC_CMPS_U), 2086 INSTR(UCMP, trans_icmp, .opc = OPC_CMPS_U), 2087 INSTR(IF, trans_if), 2088 INSTR(UIF, trans_if), 2089 INSTR(ELSE, trans_else), 2090 INSTR(ENDIF, trans_endif), 2091 INSTR(END, instr_cat0, .opc = OPC_END), 2092 INSTR(KILL, trans_kill, .opc = OPC_KILL), 2093 INSTR(KILL_IF, trans_killif, .opc = OPC_KILL), 2094 INSTR(I2F, trans_cov), 2095 INSTR(U2F, trans_cov), 2096 INSTR(F2I, trans_cov), 2097 INSTR(F2U, trans_cov), 2098}; 2099 2100static ir3_semantic 2101decl_semantic(const struct tgsi_declaration_semantic *sem) 2102{ 2103 return ir3_semantic_name(sem->Name, sem->Index); 2104} 2105 2106static struct ir3_instruction * 2107decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid, 2108 unsigned j, unsigned inloc) 2109{ 2110 struct ir3_instruction *instr; 2111 struct ir3_register *src; 2112 2113 /* bary.f dst, #inloc, r0.x */ 2114 instr = instr_create(ctx, 2, OPC_BARY_F); 2115 ir3_reg_create(instr, regid, 0); /* dummy dst */ 2116 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc; 2117 src = ir3_reg_create(instr, 0, IR3_REG_SSA); 2118 src->wrmask = 0x3; 2119 src->instr = ctx->frag_pos; 2120 2121 return instr; 2122} 2123 2124/* TGSI_SEMANTIC_POSITION 2125 * """""""""""""""""""""" 2126 * 2127 * For fragment shaders, TGSI_SEMANTIC_POSITION is used to indicate that 2128 * fragment shader input contains the fragment's window position. The X 2129 * component starts at zero and always increases from left to right. 2130 * The Y component starts at zero and always increases but Y=0 may either 2131 * indicate the top of the window or the bottom depending on the fragment 2132 * coordinate origin convention (see TGSI_PROPERTY_FS_COORD_ORIGIN). 2133 * The Z coordinate ranges from 0 to 1 to represent depth from the front 2134 * to the back of the Z buffer. The W component contains the reciprocol 2135 * of the interpolated vertex position W component. 2136 */ 2137static struct ir3_instruction * 2138decl_in_frag_coord(struct ir3_compile_context *ctx, unsigned regid, 2139 unsigned j) 2140{ 2141 struct ir3_instruction *instr, *src; 2142 2143 compile_assert(ctx, !ctx->frag_coord[j]); 2144 2145 ctx->frag_coord[j] = create_input(ctx->block, NULL, 0); 2146 2147 2148 switch (j) { 2149 case 0: /* .x */ 2150 case 1: /* .y */ 2151 /* for frag_coord, we get unsigned values.. we need 2152 * to subtract (integer) 8 and divide by 16 (right- 2153 * shift by 4) then convert to float: 2154 */ 2155 2156 /* add.s tmp, src, -8 */ 2157 instr = instr_create(ctx, 2, OPC_ADD_S); 2158 ir3_reg_create(instr, regid, 0); /* dummy dst */ 2159 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_coord[j]; 2160 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -8; 2161 src = instr; 2162 2163 /* shr.b tmp, tmp, 4 */ 2164 instr = instr_create(ctx, 2, OPC_SHR_B); 2165 ir3_reg_create(instr, regid, 0); /* dummy dst */ 2166 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; 2167 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 4; 2168 src = instr; 2169 2170 /* mov.u32f32 dst, tmp */ 2171 instr = instr_create(ctx, 1, 0); 2172 instr->cat1.src_type = TYPE_U32; 2173 instr->cat1.dst_type = TYPE_F32; 2174 ir3_reg_create(instr, regid, 0); /* dummy dst */ 2175 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; 2176 2177 break; 2178 case 2: /* .z */ 2179 case 3: /* .w */ 2180 /* seems that we can use these as-is: */ 2181 instr = ctx->frag_coord[j]; 2182 break; 2183 default: 2184 compile_error(ctx, "invalid channel\n"); 2185 instr = create_immed(ctx, 0.0); 2186 break; 2187 } 2188 2189 return instr; 2190} 2191 2192/* TGSI_SEMANTIC_FACE 2193 * """""""""""""""""" 2194 * 2195 * This label applies to fragment shader inputs only and indicates that 2196 * the register contains front/back-face information of the form (F, 0, 2197 * 0, 1). The first component will be positive when the fragment belongs 2198 * to a front-facing polygon, and negative when the fragment belongs to a 2199 * back-facing polygon. 2200 */ 2201static struct ir3_instruction * 2202decl_in_frag_face(struct ir3_compile_context *ctx, unsigned regid, 2203 unsigned j) 2204{ 2205 struct ir3_instruction *instr, *src; 2206 2207 switch (j) { 2208 case 0: /* .x */ 2209 compile_assert(ctx, !ctx->frag_face); 2210 2211 ctx->frag_face = create_input(ctx->block, NULL, 0); 2212 2213 /* for faceness, we always get -1 or 0 (int).. but TGSI expects 2214 * positive vs negative float.. and piglit further seems to 2215 * expect -1.0 or 1.0: 2216 * 2217 * mul.s tmp, hr0.x, 2 2218 * add.s tmp, tmp, 1 2219 * mov.s16f32, dst, tmp 2220 * 2221 */ 2222 2223 instr = instr_create(ctx, 2, OPC_MUL_S); 2224 ir3_reg_create(instr, regid, 0); /* dummy dst */ 2225 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = ctx->frag_face; 2226 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2; 2227 src = instr; 2228 2229 instr = instr_create(ctx, 2, OPC_ADD_S); 2230 ir3_reg_create(instr, regid, 0); /* dummy dst */ 2231 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; 2232 ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1; 2233 src = instr; 2234 2235 instr = instr_create(ctx, 1, 0); /* mov */ 2236 instr->cat1.src_type = TYPE_S32; 2237 instr->cat1.dst_type = TYPE_F32; 2238 ir3_reg_create(instr, regid, 0); /* dummy dst */ 2239 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; 2240 2241 break; 2242 case 1: /* .y */ 2243 case 2: /* .z */ 2244 instr = create_immed(ctx, 0.0); 2245 break; 2246 case 3: /* .w */ 2247 instr = create_immed(ctx, 1.0); 2248 break; 2249 default: 2250 compile_error(ctx, "invalid channel\n"); 2251 instr = create_immed(ctx, 0.0); 2252 break; 2253 } 2254 2255 return instr; 2256} 2257 2258static void 2259decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) 2260{ 2261 struct ir3_shader_variant *so = ctx->so; 2262 unsigned name = decl->Semantic.Name; 2263 unsigned i; 2264 2265 /* I don't think we should get frag shader input without 2266 * semantic info? Otherwise how do inputs get linked to 2267 * vert outputs? 2268 */ 2269 compile_assert(ctx, (ctx->type == TGSI_PROCESSOR_VERTEX) || 2270 decl->Declaration.Semantic); 2271 2272 for (i = decl->Range.First; i <= decl->Range.Last; i++) { 2273 unsigned n = so->inputs_count++; 2274 unsigned r = regid(i, 0); 2275 unsigned ncomp, j; 2276 2277 /* we'll figure out the actual components used after scheduling */ 2278 ncomp = 4; 2279 2280 DBG("decl in -> r%d", i); 2281 2282 compile_assert(ctx, n < ARRAY_SIZE(so->inputs)); 2283 2284 so->inputs[n].semantic = decl_semantic(&decl->Semantic); 2285 so->inputs[n].compmask = (1 << ncomp) - 1; 2286 so->inputs[n].regid = r; 2287 so->inputs[n].inloc = ctx->next_inloc; 2288 2289 for (j = 0; j < ncomp; j++) { 2290 struct ir3_instruction *instr = NULL; 2291 2292 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 2293 /* for fragment shaders, POSITION and FACE are handled 2294 * specially, not using normal varying / bary.f 2295 */ 2296 if (name == TGSI_SEMANTIC_POSITION) { 2297 so->inputs[n].bary = false; 2298 so->frag_coord = true; 2299 instr = decl_in_frag_coord(ctx, r + j, j); 2300 } else if (name == TGSI_SEMANTIC_FACE) { 2301 so->inputs[n].bary = false; 2302 so->frag_face = true; 2303 instr = decl_in_frag_face(ctx, r + j, j); 2304 } else { 2305 so->inputs[n].bary = true; 2306 instr = decl_in_frag_bary(ctx, r + j, j, 2307 so->inputs[n].inloc + j - 8); 2308 } 2309 } else { 2310 instr = create_input(ctx->block, NULL, (i * 4) + j); 2311 } 2312 2313 ctx->block->inputs[(i * 4) + j] = instr; 2314 } 2315 2316 if (so->inputs[n].bary || (ctx->type == TGSI_PROCESSOR_VERTEX)) { 2317 ctx->next_inloc += ncomp; 2318 so->total_in += ncomp; 2319 } 2320 } 2321} 2322 2323static void 2324decl_out(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl) 2325{ 2326 struct ir3_shader_variant *so = ctx->so; 2327 unsigned comp = 0; 2328 unsigned name = decl->Semantic.Name; 2329 unsigned i; 2330 2331 compile_assert(ctx, decl->Declaration.Semantic); 2332 2333 DBG("decl out[%d] -> r%d", name, decl->Range.First); 2334 2335 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 2336 switch (name) { 2337 case TGSI_SEMANTIC_POSITION: 2338 so->writes_pos = true; 2339 break; 2340 case TGSI_SEMANTIC_PSIZE: 2341 so->writes_psize = true; 2342 break; 2343 case TGSI_SEMANTIC_COLOR: 2344 case TGSI_SEMANTIC_BCOLOR: 2345 case TGSI_SEMANTIC_GENERIC: 2346 case TGSI_SEMANTIC_FOG: 2347 case TGSI_SEMANTIC_TEXCOORD: 2348 break; 2349 default: 2350 compile_error(ctx, "unknown VS semantic name: %s\n", 2351 tgsi_semantic_names[name]); 2352 } 2353 } else { 2354 switch (name) { 2355 case TGSI_SEMANTIC_POSITION: 2356 comp = 2; /* tgsi will write to .z component */ 2357 so->writes_pos = true; 2358 break; 2359 case TGSI_SEMANTIC_COLOR: 2360 break; 2361 default: 2362 compile_error(ctx, "unknown FS semantic name: %s\n", 2363 tgsi_semantic_names[name]); 2364 } 2365 } 2366 2367 for (i = decl->Range.First; i <= decl->Range.Last; i++) { 2368 unsigned n = so->outputs_count++; 2369 unsigned ncomp, j; 2370 2371 ncomp = 4; 2372 2373 compile_assert(ctx, n < ARRAY_SIZE(so->outputs)); 2374 2375 so->outputs[n].semantic = decl_semantic(&decl->Semantic); 2376 so->outputs[n].regid = regid(i, comp); 2377 2378 /* avoid undefined outputs, stick a dummy mov from imm{0.0}, 2379 * which if the output is actually assigned will be over- 2380 * written 2381 */ 2382 for (j = 0; j < ncomp; j++) 2383 ctx->block->outputs[(i * 4) + j] = create_immed(ctx, 0.0); 2384 } 2385} 2386 2387/* from TGSI perspective, we actually have inputs. But most of the "inputs" 2388 * for a fragment shader are just bary.f instructions. The *actual* inputs 2389 * from the hw perspective are the frag_pos and optionally frag_coord and 2390 * frag_face. 2391 */ 2392static void 2393fixup_frag_inputs(struct ir3_compile_context *ctx) 2394{ 2395 struct ir3_shader_variant *so = ctx->so; 2396 struct ir3_block *block = ctx->block; 2397 struct ir3_instruction **inputs; 2398 struct ir3_instruction *instr; 2399 int n, regid = 0; 2400 2401 block->ninputs = 0; 2402 2403 n = 4; /* always have frag_pos */ 2404 n += COND(so->frag_face, 4); 2405 n += COND(so->frag_coord, 4); 2406 2407 inputs = ir3_alloc(ctx->ir, n * (sizeof(struct ir3_instruction *))); 2408 2409 if (so->frag_face) { 2410 /* this ultimately gets assigned to hr0.x so doesn't conflict 2411 * with frag_coord/frag_pos.. 2412 */ 2413 inputs[block->ninputs++] = ctx->frag_face; 2414 ctx->frag_face->regs[0]->num = 0; 2415 2416 /* remaining channels not used, but let's avoid confusing 2417 * other parts that expect inputs to come in groups of vec4 2418 */ 2419 inputs[block->ninputs++] = NULL; 2420 inputs[block->ninputs++] = NULL; 2421 inputs[block->ninputs++] = NULL; 2422 } 2423 2424 /* since we don't know where to set the regid for frag_coord, 2425 * we have to use r0.x for it. But we don't want to *always* 2426 * use r1.x for frag_pos as that could increase the register 2427 * footprint on simple shaders: 2428 */ 2429 if (so->frag_coord) { 2430 ctx->frag_coord[0]->regs[0]->num = regid++; 2431 ctx->frag_coord[1]->regs[0]->num = regid++; 2432 ctx->frag_coord[2]->regs[0]->num = regid++; 2433 ctx->frag_coord[3]->regs[0]->num = regid++; 2434 2435 inputs[block->ninputs++] = ctx->frag_coord[0]; 2436 inputs[block->ninputs++] = ctx->frag_coord[1]; 2437 inputs[block->ninputs++] = ctx->frag_coord[2]; 2438 inputs[block->ninputs++] = ctx->frag_coord[3]; 2439 } 2440 2441 /* we always have frag_pos: */ 2442 so->pos_regid = regid; 2443 2444 /* r0.x */ 2445 instr = create_input(block, NULL, block->ninputs); 2446 instr->regs[0]->num = regid++; 2447 inputs[block->ninputs++] = instr; 2448 ctx->frag_pos->regs[1]->instr = instr; 2449 2450 /* r0.y */ 2451 instr = create_input(block, NULL, block->ninputs); 2452 instr->regs[0]->num = regid++; 2453 inputs[block->ninputs++] = instr; 2454 ctx->frag_pos->regs[2]->instr = instr; 2455 2456 block->inputs = inputs; 2457} 2458 2459static void 2460compile_instructions(struct ir3_compile_context *ctx) 2461{ 2462 push_block(ctx); 2463 2464 /* for fragment shader, we have a single input register (usually 2465 * r0.xy) which is used as the base for bary.f varying fetch instrs: 2466 */ 2467 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 2468 struct ir3_instruction *instr; 2469 instr = ir3_instr_create(ctx->block, -1, OPC_META_FI); 2470 ir3_reg_create(instr, 0, 0); 2471 ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */ 2472 ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */ 2473 ctx->frag_pos = instr; 2474 } 2475 2476 while (!tgsi_parse_end_of_tokens(&ctx->parser)) { 2477 tgsi_parse_token(&ctx->parser); 2478 2479 switch (ctx->parser.FullToken.Token.Type) { 2480 case TGSI_TOKEN_TYPE_DECLARATION: { 2481 struct tgsi_full_declaration *decl = 2482 &ctx->parser.FullToken.FullDeclaration; 2483 if (decl->Declaration.File == TGSI_FILE_OUTPUT) { 2484 decl_out(ctx, decl); 2485 } else if (decl->Declaration.File == TGSI_FILE_INPUT) { 2486 decl_in(ctx, decl); 2487 } 2488 break; 2489 } 2490 case TGSI_TOKEN_TYPE_IMMEDIATE: { 2491 /* TODO: if we know the immediate is small enough, and only 2492 * used with instructions that can embed an immediate, we 2493 * can skip this: 2494 */ 2495 struct tgsi_full_immediate *imm = 2496 &ctx->parser.FullToken.FullImmediate; 2497 unsigned n = ctx->so->immediates_count++; 2498 compile_assert(ctx, n < ARRAY_SIZE(ctx->so->immediates)); 2499 memcpy(ctx->so->immediates[n].val, imm->u, 16); 2500 break; 2501 } 2502 case TGSI_TOKEN_TYPE_INSTRUCTION: { 2503 struct tgsi_full_instruction *inst = 2504 &ctx->parser.FullToken.FullInstruction; 2505 unsigned opc = inst->Instruction.Opcode; 2506 const struct instr_translater *t = &translaters[opc]; 2507 2508 if (t->fxn) { 2509 t->fxn(t, ctx, inst); 2510 ctx->num_internal_temps = 0; 2511 } else { 2512 compile_error(ctx, "unknown TGSI opc: %s\n", 2513 tgsi_get_opcode_name(opc)); 2514 } 2515 2516 switch (inst->Instruction.Saturate) { 2517 case TGSI_SAT_ZERO_ONE: 2518 create_clamp_imm(ctx, &inst->Dst[0].Register, 2519 fui(0.0), fui(1.0)); 2520 break; 2521 case TGSI_SAT_MINUS_PLUS_ONE: 2522 create_clamp_imm(ctx, &inst->Dst[0].Register, 2523 fui(-1.0), fui(1.0)); 2524 break; 2525 } 2526 2527 instr_finish(ctx); 2528 2529 break; 2530 } 2531 default: 2532 break; 2533 } 2534 } 2535} 2536 2537static void 2538compile_dump(struct ir3_compile_context *ctx) 2539{ 2540 const char *name = (ctx->so->type == SHADER_VERTEX) ? "vert" : "frag"; 2541 static unsigned n = 0; 2542 char fname[16]; 2543 FILE *f; 2544 snprintf(fname, sizeof(fname), "%s-%04u.dot", name, n++); 2545 f = fopen(fname, "w"); 2546 if (!f) 2547 return; 2548 ir3_block_depth(ctx->block); 2549 ir3_dump(ctx->ir, name, ctx->block, f); 2550 fclose(f); 2551} 2552 2553int 2554ir3_compile_shader(struct ir3_shader_variant *so, 2555 const struct tgsi_token *tokens, struct ir3_shader_key key, 2556 bool cp) 2557{ 2558 struct ir3_compile_context ctx; 2559 struct ir3_block *block; 2560 struct ir3_instruction **inputs; 2561 unsigned i, j, actual_in; 2562 int ret = 0; 2563 2564 assert(!so->ir); 2565 2566 so->ir = ir3_create(); 2567 2568 assert(so->ir); 2569 2570 if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) { 2571 DBG("INIT failed!"); 2572 ret = -1; 2573 goto out; 2574 } 2575 2576 compile_instructions(&ctx); 2577 2578 block = ctx.block; 2579 2580 /* keep track of the inputs from TGSI perspective.. */ 2581 inputs = block->inputs; 2582 2583 /* but fixup actual inputs for frag shader: */ 2584 if (ctx.type == TGSI_PROCESSOR_FRAGMENT) 2585 fixup_frag_inputs(&ctx); 2586 2587 /* at this point, for binning pass, throw away unneeded outputs: */ 2588 if (key.binning_pass) { 2589 for (i = 0, j = 0; i < so->outputs_count; i++) { 2590 unsigned name = sem2name(so->outputs[i].semantic); 2591 unsigned idx = sem2name(so->outputs[i].semantic); 2592 2593 /* throw away everything but first position/psize */ 2594 if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) || 2595 (name == TGSI_SEMANTIC_PSIZE))) { 2596 if (i != j) { 2597 so->outputs[j] = so->outputs[i]; 2598 block->outputs[(j*4)+0] = block->outputs[(i*4)+0]; 2599 block->outputs[(j*4)+1] = block->outputs[(i*4)+1]; 2600 block->outputs[(j*4)+2] = block->outputs[(i*4)+2]; 2601 block->outputs[(j*4)+3] = block->outputs[(i*4)+3]; 2602 } 2603 j++; 2604 } 2605 } 2606 so->outputs_count = j; 2607 block->noutputs = j * 4; 2608 } 2609 2610 /* for rendering to alpha format, we only need the .w component, 2611 * and we need it to be in the .x position: 2612 */ 2613 if (key.alpha) { 2614 for (i = 0, j = 0; i < so->outputs_count; i++) { 2615 unsigned name = sem2name(so->outputs[i].semantic); 2616 2617 /* move .w component to .x and discard others: */ 2618 if (name == TGSI_SEMANTIC_COLOR) { 2619 block->outputs[(i*4)+0] = block->outputs[(i*4)+3]; 2620 block->outputs[(i*4)+1] = NULL; 2621 block->outputs[(i*4)+2] = NULL; 2622 block->outputs[(i*4)+3] = NULL; 2623 } 2624 } 2625 } 2626 2627 /* at this point, we want the kill's in the outputs array too, 2628 * so that they get scheduled (since they have no dst).. we've 2629 * already ensured that the array is big enough in push_block(): 2630 */ 2631 if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { 2632 for (i = 0; i < ctx.kill_count; i++) 2633 block->outputs[block->noutputs++] = ctx.kill[i]; 2634 } 2635 2636 if (fd_mesa_debug & FD_DBG_OPTDUMP) 2637 compile_dump(&ctx); 2638 2639 ret = ir3_block_flatten(block); 2640 if (ret < 0) { 2641 DBG("FLATTEN failed!"); 2642 goto out; 2643 } 2644 if ((ret > 0) && (fd_mesa_debug & FD_DBG_OPTDUMP)) 2645 compile_dump(&ctx); 2646 2647 if (fd_mesa_debug & FD_DBG_OPTMSGS) { 2648 printf("BEFORE CP:\n"); 2649 ir3_dump_instr_list(block->head); 2650 } 2651 2652 if (cp) 2653 ir3_block_cp(block); 2654 2655 if (fd_mesa_debug & FD_DBG_OPTDUMP) 2656 compile_dump(&ctx); 2657 2658 ir3_block_depth(block); 2659 2660 if (fd_mesa_debug & FD_DBG_OPTMSGS) { 2661 printf("AFTER DEPTH:\n"); 2662 ir3_dump_instr_list(block->head); 2663 } 2664 2665 ret = ir3_block_sched(block); 2666 if (ret) { 2667 DBG("SCHED failed!"); 2668 goto out; 2669 } 2670 2671 if (fd_mesa_debug & FD_DBG_OPTMSGS) { 2672 printf("AFTER SCHED:\n"); 2673 ir3_dump_instr_list(block->head); 2674 } 2675 2676 ret = ir3_block_ra(block, so->type, key.half_precision, 2677 so->frag_coord, so->frag_face, &so->has_samp); 2678 if (ret) { 2679 DBG("RA failed!"); 2680 goto out; 2681 } 2682 2683 if (fd_mesa_debug & FD_DBG_OPTMSGS) { 2684 printf("AFTER RA:\n"); 2685 ir3_dump_instr_list(block->head); 2686 } 2687 2688 /* fixup input/outputs: */ 2689 for (i = 0; i < so->outputs_count; i++) { 2690 so->outputs[i].regid = block->outputs[i*4]->regs[0]->num; 2691 /* preserve hack for depth output.. tgsi writes depth to .z, 2692 * but what we give the hw is the scalar register: 2693 */ 2694 if ((ctx.type == TGSI_PROCESSOR_FRAGMENT) && 2695 (sem2name(so->outputs[i].semantic) == TGSI_SEMANTIC_POSITION)) 2696 so->outputs[i].regid += 2; 2697 } 2698 /* Note that some or all channels of an input may be unused: */ 2699 actual_in = 0; 2700 for (i = 0; i < so->inputs_count; i++) { 2701 unsigned j, regid = ~0, compmask = 0; 2702 so->inputs[i].ncomp = 0; 2703 for (j = 0; j < 4; j++) { 2704 struct ir3_instruction *in = inputs[(i*4) + j]; 2705 if (in) { 2706 compmask |= (1 << j); 2707 regid = in->regs[0]->num - j; 2708 actual_in++; 2709 so->inputs[i].ncomp++; 2710 } 2711 } 2712 so->inputs[i].regid = regid; 2713 so->inputs[i].compmask = compmask; 2714 } 2715 2716 /* fragment shader always gets full vec4's even if it doesn't 2717 * fetch all components, but vertex shader we need to update 2718 * with the actual number of components fetch, otherwise thing 2719 * will hang due to mismaptch between VFD_DECODE's and 2720 * TOTALATTRTOVS 2721 */ 2722 if (so->type == SHADER_VERTEX) 2723 so->total_in = actual_in; 2724 2725out: 2726 if (ret) { 2727 ir3_destroy(so->ir); 2728 so->ir = NULL; 2729 } 2730 compile_free(&ctx); 2731 2732 return ret; 2733} 2734