r600_shader.c revision 92fbf856f42b22f68f62c2516e0c6453c454cf05
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "r600_sq.h" 24#include "r600_llvm.h" 25#include "r600_formats.h" 26#include "r600_opcodes.h" 27#include "r600_shader.h" 28#include "r600d.h" 29 30#include "sb/sb_public.h" 31 32#include "pipe/p_shader_tokens.h" 33#include "tgsi/tgsi_info.h" 34#include "tgsi/tgsi_parse.h" 35#include "tgsi/tgsi_scan.h" 36#include "tgsi/tgsi_dump.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39#include <stdio.h> 40#include <errno.h> 41 42/* CAYMAN notes 43Why CAYMAN got loops for lots of instructions is explained here. 44 45-These 8xx t-slot only ops are implemented in all vector slots. 46MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 47These 8xx t-slot only opcodes become vector ops, with all four 48slots expecting the arguments on sources a and b. Result is 49broadcast to all channels. 50MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64 51These 8xx t-slot only opcodes become vector ops in the z, y, and 52x slots. 53EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 54RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 55SQRT_IEEE/_64 56SIN/COS 57The w slot may have an independent co-issued operation, or if the 58result is required to be in the w slot, the opcode above may be 59issued in the w slot as well. 60The compiler must issue the source argument to slots z, y, and x 61*/ 62 63#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) 64static int r600_shader_from_tgsi(struct r600_context *rctx, 65 struct r600_pipe_shader *pipeshader, 66 union r600_shader_key key); 67 68 69static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, 70 int size, unsigned comp_mask) { 71 72 if (!size) 73 return; 74 75 if (ps->num_arrays == ps->max_arrays) { 76 ps->max_arrays += 64; 77 ps->arrays = realloc(ps->arrays, ps->max_arrays * 78 sizeof(struct r600_shader_array)); 79 } 80 81 int n = ps->num_arrays; 82 ++ps->num_arrays; 83 84 ps->arrays[n].comp_mask = comp_mask; 85 ps->arrays[n].gpr_start = start_gpr; 86 ps->arrays[n].gpr_count = size; 87} 88 89static void r600_dump_streamout(struct pipe_stream_output_info *so) 90{ 91 unsigned i; 92 93 fprintf(stderr, "STREAMOUT\n"); 94 for (i = 0; i < so->num_outputs; i++) { 95 unsigned mask = ((1 << so->output[i].num_components) - 1) << 96 so->output[i].start_component; 97 fprintf(stderr, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", 98 i, 99 so->output[i].stream, 100 so->output[i].output_buffer, 101 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, 102 so->output[i].register_index, 103 mask & 1 ? "x" : "", 104 mask & 2 ? "y" : "", 105 mask & 4 ? "z" : "", 106 mask & 8 ? "w" : "", 107 so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); 108 } 109} 110 111static int store_shader(struct pipe_context *ctx, 112 struct r600_pipe_shader *shader) 113{ 114 struct r600_context *rctx = (struct r600_context *)ctx; 115 uint32_t *ptr, i; 116 117 if (shader->bo == NULL) { 118 shader->bo = (struct r600_resource*) 119 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); 120 if (shader->bo == NULL) { 121 return -ENOMEM; 122 } 123 ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); 124 if (R600_BIG_ENDIAN) { 125 for (i = 0; i < shader->shader.bc.ndw; ++i) { 126 ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); 127 } 128 } else { 129 memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); 130 } 131 rctx->b.ws->buffer_unmap(shader->bo->cs_buf); 132 } 133 134 return 0; 135} 136 137int r600_pipe_shader_create(struct pipe_context *ctx, 138 struct r600_pipe_shader *shader, 139 union r600_shader_key key) 140{ 141 struct r600_context *rctx = (struct r600_context *)ctx; 142 struct r600_pipe_shader_selector *sel = shader->selector; 143 int r; 144 bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens); 145 unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); 146 unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 147 unsigned export_shader; 148 149 shader->shader.bc.isa = rctx->isa; 150 151 if (dump) { 152 fprintf(stderr, "--------------------------------------------------------------\n"); 153 tgsi_dump(sel->tokens, 0); 154 155 if (sel->so.num_outputs) { 156 r600_dump_streamout(&sel->so); 157 } 158 } 159 r = r600_shader_from_tgsi(rctx, shader, key); 160 if (r) { 161 R600_ERR("translation from TGSI failed !\n"); 162 goto error; 163 } 164 if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) { 165 /* only disable for vertex shaders in tess paths */ 166 if (key.vs.as_ls) 167 use_sb = 0; 168 } 169 use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_TESS_CTRL); 170 use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_TESS_EVAL); 171 172 /* disable SB for shaders using doubles */ 173 use_sb &= !shader->shader.uses_doubles; 174 175 /* Check if the bytecode has already been built. When using the llvm 176 * backend, r600_shader_from_tgsi() will take care of building the 177 * bytecode. 178 */ 179 if (!shader->shader.bc.bytecode) { 180 r = r600_bytecode_build(&shader->shader.bc); 181 if (r) { 182 R600_ERR("building bytecode failed !\n"); 183 goto error; 184 } 185 } 186 187 if (dump && !sb_disasm) { 188 fprintf(stderr, "--------------------------------------------------------------\n"); 189 r600_bytecode_disasm(&shader->shader.bc); 190 fprintf(stderr, "______________________________________________________________\n"); 191 } else if ((dump && sb_disasm) || use_sb) { 192 r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, 193 dump, use_sb); 194 if (r) { 195 R600_ERR("r600_sb_bytecode_process failed !\n"); 196 goto error; 197 } 198 } 199 200 if (shader->gs_copy_shader) { 201 if (dump) { 202 // dump copy shader 203 r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, 204 &shader->gs_copy_shader->shader, dump, 0); 205 if (r) 206 goto error; 207 } 208 209 if ((r = store_shader(ctx, shader->gs_copy_shader))) 210 goto error; 211 } 212 213 /* Store the shader in a buffer. */ 214 if ((r = store_shader(ctx, shader))) 215 goto error; 216 217 /* Build state. */ 218 switch (shader->shader.processor_type) { 219 case TGSI_PROCESSOR_TESS_CTRL: 220 evergreen_update_hs_state(ctx, shader); 221 break; 222 case TGSI_PROCESSOR_TESS_EVAL: 223 if (key.tes.as_es) 224 evergreen_update_es_state(ctx, shader); 225 else 226 evergreen_update_vs_state(ctx, shader); 227 break; 228 case TGSI_PROCESSOR_GEOMETRY: 229 if (rctx->b.chip_class >= EVERGREEN) { 230 evergreen_update_gs_state(ctx, shader); 231 evergreen_update_vs_state(ctx, shader->gs_copy_shader); 232 } else { 233 r600_update_gs_state(ctx, shader); 234 r600_update_vs_state(ctx, shader->gs_copy_shader); 235 } 236 break; 237 case TGSI_PROCESSOR_VERTEX: 238 export_shader = key.vs.as_es; 239 if (rctx->b.chip_class >= EVERGREEN) { 240 if (key.vs.as_ls) 241 evergreen_update_ls_state(ctx, shader); 242 else if (key.vs.as_es) 243 evergreen_update_es_state(ctx, shader); 244 else 245 evergreen_update_vs_state(ctx, shader); 246 } else { 247 if (export_shader) 248 r600_update_es_state(ctx, shader); 249 else 250 r600_update_vs_state(ctx, shader); 251 } 252 break; 253 case TGSI_PROCESSOR_FRAGMENT: 254 if (rctx->b.chip_class >= EVERGREEN) { 255 evergreen_update_ps_state(ctx, shader); 256 } else { 257 r600_update_ps_state(ctx, shader); 258 } 259 break; 260 default: 261 r = -EINVAL; 262 goto error; 263 } 264 return 0; 265 266error: 267 r600_pipe_shader_destroy(ctx, shader); 268 return r; 269} 270 271void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 272{ 273 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 274 r600_bytecode_clear(&shader->shader.bc); 275 r600_release_command_buffer(&shader->command_buffer); 276} 277 278/* 279 * tgsi -> r600 shader 280 */ 281struct r600_shader_tgsi_instruction; 282 283struct r600_shader_src { 284 unsigned sel; 285 unsigned swizzle[4]; 286 unsigned neg; 287 unsigned abs; 288 unsigned rel; 289 unsigned kc_bank; 290 boolean kc_rel; /* true if cache bank is indexed */ 291 uint32_t value[4]; 292}; 293 294struct eg_interp { 295 boolean enabled; 296 unsigned ij_index; 297}; 298 299struct r600_shader_ctx { 300 struct tgsi_shader_info info; 301 struct tgsi_parse_context parse; 302 const struct tgsi_token *tokens; 303 unsigned type; 304 unsigned file_offset[TGSI_FILE_COUNT]; 305 unsigned temp_reg; 306 const struct r600_shader_tgsi_instruction *inst_info; 307 struct r600_bytecode *bc; 308 struct r600_shader *shader; 309 struct r600_shader_src src[4]; 310 uint32_t *literals; 311 uint32_t nliterals; 312 uint32_t max_driver_temp_used; 313 boolean use_llvm; 314 /* needed for evergreen interpolation */ 315 struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid 316 /* evergreen/cayman also store sample mask in face register */ 317 int face_gpr; 318 /* sample id is .w component stored in fixed point position register */ 319 int fixed_pt_position_gpr; 320 int colors_used; 321 boolean clip_vertex_write; 322 unsigned cv_output; 323 unsigned edgeflag_output; 324 int fragcoord_input; 325 int native_integers; 326 int next_ring_offset; 327 int gs_out_ring_offset; 328 int gs_next_vertex; 329 struct r600_shader *gs_for_vs; 330 int gs_export_gpr_tregs[4]; 331 const struct pipe_stream_output_info *gs_stream_output_info; 332 unsigned enabled_stream_buffers_mask; 333 unsigned tess_input_info; /* temp with tess input offsets */ 334 unsigned tess_output_info; /* temp with tess input offsets */ 335}; 336 337struct r600_shader_tgsi_instruction { 338 unsigned op; 339 int (*process)(struct r600_shader_ctx *ctx); 340}; 341 342static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind); 343static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 344static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 345static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); 346static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 347static int tgsi_else(struct r600_shader_ctx *ctx); 348static int tgsi_endif(struct r600_shader_ctx *ctx); 349static int tgsi_bgnloop(struct r600_shader_ctx *ctx); 350static int tgsi_endloop(struct r600_shader_ctx *ctx); 351static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 352static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 353 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 354 unsigned int dst_reg); 355static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 356 const struct r600_shader_src *shader_src, 357 unsigned chan); 358 359static int tgsi_last_instruction(unsigned writemask) 360{ 361 int i, lasti = 0; 362 363 for (i = 0; i < 4; i++) { 364 if (writemask & (1 << i)) { 365 lasti = i; 366 } 367 } 368 return lasti; 369} 370 371static int tgsi_is_supported(struct r600_shader_ctx *ctx) 372{ 373 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 374 int j; 375 376 if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) { 377 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 378 return -EINVAL; 379 } 380 if (i->Instruction.Predicate) { 381 R600_ERR("predicate unsupported\n"); 382 return -EINVAL; 383 } 384#if 0 385 if (i->Instruction.Label) { 386 R600_ERR("label unsupported\n"); 387 return -EINVAL; 388 } 389#endif 390 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 391 if (i->Src[j].Register.Dimension) { 392 switch (i->Src[j].Register.File) { 393 case TGSI_FILE_CONSTANT: 394 break; 395 case TGSI_FILE_INPUT: 396 if (ctx->type == TGSI_PROCESSOR_GEOMETRY || 397 ctx->type == TGSI_PROCESSOR_TESS_CTRL || 398 ctx->type == TGSI_PROCESSOR_TESS_EVAL) 399 break; 400 case TGSI_FILE_OUTPUT: 401 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) 402 break; 403 default: 404 R600_ERR("unsupported src %d (file %d, dimension %d)\n", j, 405 i->Src[j].Register.File, 406 i->Src[j].Register.Dimension); 407 return -EINVAL; 408 } 409 } 410 } 411 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 412 if (i->Dst[j].Register.Dimension) { 413 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) 414 continue; 415 R600_ERR("unsupported dst (dimension)\n"); 416 return -EINVAL; 417 } 418 } 419 return 0; 420} 421 422int eg_get_interpolator_index(unsigned interpolate, unsigned location) 423{ 424 if (interpolate == TGSI_INTERPOLATE_COLOR || 425 interpolate == TGSI_INTERPOLATE_LINEAR || 426 interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 427 { 428 int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR; 429 int loc; 430 431 switch(location) { 432 case TGSI_INTERPOLATE_LOC_CENTER: 433 loc = 1; 434 break; 435 case TGSI_INTERPOLATE_LOC_CENTROID: 436 loc = 2; 437 break; 438 case TGSI_INTERPOLATE_LOC_SAMPLE: 439 default: 440 loc = 0; break; 441 } 442 443 return is_linear * 3 + loc; 444 } 445 446 return -1; 447} 448 449static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, 450 int input) 451{ 452 int i = eg_get_interpolator_index( 453 ctx->shader->input[input].interpolate, 454 ctx->shader->input[input].interpolate_location); 455 assert(i >= 0); 456 ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index; 457} 458 459static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 460{ 461 int i, r; 462 struct r600_bytecode_alu alu; 463 int gpr = 0, base_chan = 0; 464 int ij_index = ctx->shader->input[input].ij_index; 465 466 /* work out gpr and base_chan from index */ 467 gpr = ij_index / 2; 468 base_chan = (2 * (ij_index % 2)) + 1; 469 470 for (i = 0; i < 8; i++) { 471 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 472 473 if (i < 4) 474 alu.op = ALU_OP2_INTERP_ZW; 475 else 476 alu.op = ALU_OP2_INTERP_XY; 477 478 if ((i > 1) && (i < 6)) { 479 alu.dst.sel = ctx->shader->input[input].gpr; 480 alu.dst.write = 1; 481 } 482 483 alu.dst.chan = i % 4; 484 485 alu.src[0].sel = gpr; 486 alu.src[0].chan = (base_chan - (i % 2)); 487 488 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 489 490 alu.bank_swizzle_force = SQ_ALU_VEC_210; 491 if ((i % 4) == 3) 492 alu.last = 1; 493 r = r600_bytecode_add_alu(ctx->bc, &alu); 494 if (r) 495 return r; 496 } 497 return 0; 498} 499 500static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 501{ 502 int i, r; 503 struct r600_bytecode_alu alu; 504 505 for (i = 0; i < 4; i++) { 506 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 507 508 alu.op = ALU_OP1_INTERP_LOAD_P0; 509 510 alu.dst.sel = ctx->shader->input[input].gpr; 511 alu.dst.write = 1; 512 513 alu.dst.chan = i; 514 515 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 516 alu.src[0].chan = i; 517 518 if (i == 3) 519 alu.last = 1; 520 r = r600_bytecode_add_alu(ctx->bc, &alu); 521 if (r) 522 return r; 523 } 524 return 0; 525} 526 527/* 528 * Special export handling in shaders 529 * 530 * shader export ARRAY_BASE for EXPORT_POS: 531 * 60 is position 532 * 61 is misc vector 533 * 62, 63 are clip distance vectors 534 * 535 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 536 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 537 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 538 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 539 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 540 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 541 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 542 * exclusive from render target index) 543 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 544 * 545 * 546 * shader export ARRAY_BASE for EXPORT_PIXEL: 547 * 0-7 CB targets 548 * 61 computed Z vector 549 * 550 * The use of the values exported in the computed Z vector are controlled 551 * by DB_SHADER_CONTROL: 552 * Z_EXPORT_ENABLE - Z as a float in RED 553 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 554 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 555 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 556 * DB_SOURCE_FORMAT - export control restrictions 557 * 558 */ 559 560 561/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 562static int r600_spi_sid(struct r600_shader_io * io) 563{ 564 int index, name = io->name; 565 566 /* These params are handled differently, they don't need 567 * semantic indices, so we'll use 0 for them. 568 */ 569 if (name == TGSI_SEMANTIC_POSITION || 570 name == TGSI_SEMANTIC_PSIZE || 571 name == TGSI_SEMANTIC_EDGEFLAG || 572 name == TGSI_SEMANTIC_FACE || 573 name == TGSI_SEMANTIC_SAMPLEMASK) 574 index = 0; 575 else { 576 if (name == TGSI_SEMANTIC_GENERIC) { 577 /* For generic params simply use sid from tgsi */ 578 index = io->sid; 579 } else { 580 /* For non-generic params - pack name and sid into 8 bits */ 581 index = 0x80 | (name<<3) | (io->sid); 582 } 583 584 /* Make sure that all really used indices have nonzero value, so 585 * we can just compare it to 0 later instead of comparing the name 586 * with different values to detect special cases. */ 587 index++; 588 } 589 590 return index; 591}; 592 593/* we need this to get a common lds index for vs/tcs/tes input/outputs */ 594int r600_get_lds_unique_index(unsigned semantic_name, unsigned index) 595{ 596 switch (semantic_name) { 597 case TGSI_SEMANTIC_POSITION: 598 return 0; 599 case TGSI_SEMANTIC_PSIZE: 600 return 1; 601 case TGSI_SEMANTIC_CLIPDIST: 602 assert(index <= 1); 603 return 2 + index; 604 case TGSI_SEMANTIC_GENERIC: 605 if (index <= 63-4) 606 return 4 + index - 9; 607 else 608 /* same explanation as in the default statement, 609 * the only user hitting this is st/nine. 610 */ 611 return 0; 612 613 /* patch indices are completely separate and thus start from 0 */ 614 case TGSI_SEMANTIC_TESSOUTER: 615 return 0; 616 case TGSI_SEMANTIC_TESSINNER: 617 return 1; 618 case TGSI_SEMANTIC_PATCH: 619 return 2 + index; 620 621 default: 622 /* Don't fail here. The result of this function is only used 623 * for LS, TCS, TES, and GS, where legacy GL semantics can't 624 * occur, but this function is called for all vertex shaders 625 * before it's known whether LS will be compiled or not. 626 */ 627 return 0; 628 } 629} 630 631/* turn input into interpolate on EG */ 632static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 633{ 634 int r = 0; 635 636 if (ctx->shader->input[index].spi_sid) { 637 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 638 if (ctx->shader->input[index].interpolate > 0) { 639 evergreen_interp_assign_ij_index(ctx, index); 640 if (!ctx->use_llvm) 641 r = evergreen_interp_alu(ctx, index); 642 } else { 643 if (!ctx->use_llvm) 644 r = evergreen_interp_flat(ctx, index); 645 } 646 } 647 return r; 648} 649 650static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 651{ 652 struct r600_bytecode_alu alu; 653 int i, r; 654 int gpr_front = ctx->shader->input[front].gpr; 655 int gpr_back = ctx->shader->input[back].gpr; 656 657 for (i = 0; i < 4; i++) { 658 memset(&alu, 0, sizeof(alu)); 659 alu.op = ALU_OP3_CNDGT; 660 alu.is_op3 = 1; 661 alu.dst.write = 1; 662 alu.dst.sel = gpr_front; 663 alu.src[0].sel = ctx->face_gpr; 664 alu.src[1].sel = gpr_front; 665 alu.src[2].sel = gpr_back; 666 667 alu.dst.chan = i; 668 alu.src[1].chan = i; 669 alu.src[2].chan = i; 670 alu.last = (i==3); 671 672 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 673 return r; 674 } 675 676 return 0; 677} 678 679/* execute a single slot ALU calculation */ 680static int single_alu_op2(struct r600_shader_ctx *ctx, int op, 681 int dst_sel, int dst_chan, 682 int src0_sel, unsigned src0_chan_val, 683 int src1_sel, unsigned src1_chan_val) 684{ 685 struct r600_bytecode_alu alu; 686 int r, i; 687 688 if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) { 689 for (i = 0; i < 4; i++) { 690 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 691 alu.op = op; 692 alu.src[0].sel = src0_sel; 693 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 694 alu.src[0].value = src0_chan_val; 695 else 696 alu.src[0].chan = src0_chan_val; 697 alu.src[1].sel = src1_sel; 698 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 699 alu.src[1].value = src1_chan_val; 700 else 701 alu.src[1].chan = src1_chan_val; 702 alu.dst.sel = dst_sel; 703 alu.dst.chan = i; 704 alu.dst.write = i == dst_chan; 705 alu.last = (i == 3); 706 r = r600_bytecode_add_alu(ctx->bc, &alu); 707 if (r) 708 return r; 709 } 710 return 0; 711 } 712 713 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 714 alu.op = op; 715 alu.src[0].sel = src0_sel; 716 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 717 alu.src[0].value = src0_chan_val; 718 else 719 alu.src[0].chan = src0_chan_val; 720 alu.src[1].sel = src1_sel; 721 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 722 alu.src[1].value = src1_chan_val; 723 else 724 alu.src[1].chan = src1_chan_val; 725 alu.dst.sel = dst_sel; 726 alu.dst.chan = dst_chan; 727 alu.dst.write = 1; 728 alu.last = 1; 729 r = r600_bytecode_add_alu(ctx->bc, &alu); 730 if (r) 731 return r; 732 return 0; 733} 734 735/* execute a single slot ALU calculation */ 736static int single_alu_op3(struct r600_shader_ctx *ctx, int op, 737 int dst_sel, int dst_chan, 738 int src0_sel, unsigned src0_chan_val, 739 int src1_sel, unsigned src1_chan_val, 740 int src2_sel, unsigned src2_chan_val) 741{ 742 struct r600_bytecode_alu alu; 743 int r; 744 745 /* validate this for other ops */ 746 assert(op == ALU_OP3_MULADD_UINT24); 747 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 748 alu.op = op; 749 alu.src[0].sel = src0_sel; 750 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 751 alu.src[0].value = src0_chan_val; 752 else 753 alu.src[0].chan = src0_chan_val; 754 alu.src[1].sel = src1_sel; 755 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 756 alu.src[1].value = src1_chan_val; 757 else 758 alu.src[1].chan = src1_chan_val; 759 alu.src[2].sel = src2_sel; 760 if (src2_sel == V_SQ_ALU_SRC_LITERAL) 761 alu.src[2].value = src2_chan_val; 762 else 763 alu.src[2].chan = src2_chan_val; 764 alu.dst.sel = dst_sel; 765 alu.dst.chan = dst_chan; 766 alu.is_op3 = 1; 767 alu.last = 1; 768 r = r600_bytecode_add_alu(ctx->bc, &alu); 769 if (r) 770 return r; 771 return 0; 772} 773 774/* put it in temp_reg.x */ 775static int get_lds_offset0(struct r600_shader_ctx *ctx, 776 int rel_patch_chan, 777 int temp_reg, bool is_patch_var) 778{ 779 int r; 780 781 /* MUL temp.x, patch_stride (input_vals.x), rel_patch_id (r0.y (tcs)) */ 782 /* ADD 783 Dimension - patch0_offset (input_vals.z), 784 Non-dim - patch0_data_offset (input_vals.w) 785 */ 786 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 787 temp_reg, 0, 788 ctx->tess_output_info, 0, 789 0, rel_patch_chan, 790 ctx->tess_output_info, is_patch_var ? 3 : 2); 791 if (r) 792 return r; 793 return 0; 794} 795 796static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index) 797{ 798 return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg; 799} 800 801static int r600_get_temp(struct r600_shader_ctx *ctx) 802{ 803 return ctx->temp_reg + ctx->max_driver_temp_used++; 804} 805 806static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid) 807{ 808 int i; 809 i = ctx->shader->noutput++; 810 ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID; 811 ctx->shader->output[i].sid = 0; 812 ctx->shader->output[i].gpr = 0; 813 ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT; 814 ctx->shader->output[i].write_mask = 0x4; 815 ctx->shader->output[i].spi_sid = prim_id_sid; 816 817 return 0; 818} 819 820static int tgsi_declaration(struct r600_shader_ctx *ctx) 821{ 822 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 823 int r, i, j, count = d->Range.Last - d->Range.First + 1; 824 825 switch (d->Declaration.File) { 826 case TGSI_FILE_INPUT: 827 for (j = 0; j < count; j++) { 828 i = ctx->shader->ninput + j; 829 assert(i < Elements(ctx->shader->input)); 830 ctx->shader->input[i].name = d->Semantic.Name; 831 ctx->shader->input[i].sid = d->Semantic.Index + j; 832 ctx->shader->input[i].interpolate = d->Interp.Interpolate; 833 ctx->shader->input[i].interpolate_location = d->Interp.Location; 834 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j; 835 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 836 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 837 switch (ctx->shader->input[i].name) { 838 case TGSI_SEMANTIC_FACE: 839 if (ctx->face_gpr != -1) 840 ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ 841 else 842 ctx->face_gpr = ctx->shader->input[i].gpr; 843 break; 844 case TGSI_SEMANTIC_COLOR: 845 ctx->colors_used++; 846 break; 847 case TGSI_SEMANTIC_POSITION: 848 ctx->fragcoord_input = i; 849 break; 850 case TGSI_SEMANTIC_PRIMID: 851 /* set this for now */ 852 ctx->shader->gs_prim_id_input = true; 853 ctx->shader->ps_prim_id_input = i; 854 break; 855 } 856 if (ctx->bc->chip_class >= EVERGREEN) { 857 if ((r = evergreen_interp_input(ctx, i))) 858 return r; 859 } 860 } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 861 /* FIXME probably skip inputs if they aren't passed in the ring */ 862 ctx->shader->input[i].ring_offset = ctx->next_ring_offset; 863 ctx->next_ring_offset += 16; 864 if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) 865 ctx->shader->gs_prim_id_input = true; 866 } 867 } 868 ctx->shader->ninput += count; 869 break; 870 case TGSI_FILE_OUTPUT: 871 for (j = 0; j < count; j++) { 872 i = ctx->shader->noutput + j; 873 assert(i < Elements(ctx->shader->output)); 874 ctx->shader->output[i].name = d->Semantic.Name; 875 ctx->shader->output[i].sid = d->Semantic.Index + j; 876 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j; 877 ctx->shader->output[i].interpolate = d->Interp.Interpolate; 878 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 879 if (ctx->type == TGSI_PROCESSOR_VERTEX || 880 ctx->type == TGSI_PROCESSOR_GEOMETRY || 881 ctx->type == TGSI_PROCESSOR_TESS_EVAL) { 882 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 883 switch (d->Semantic.Name) { 884 case TGSI_SEMANTIC_CLIPDIST: 885 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << 886 ((d->Semantic.Index + j) << 2); 887 break; 888 case TGSI_SEMANTIC_PSIZE: 889 ctx->shader->vs_out_misc_write = 1; 890 ctx->shader->vs_out_point_size = 1; 891 break; 892 case TGSI_SEMANTIC_EDGEFLAG: 893 ctx->shader->vs_out_misc_write = 1; 894 ctx->shader->vs_out_edgeflag = 1; 895 ctx->edgeflag_output = i; 896 break; 897 case TGSI_SEMANTIC_VIEWPORT_INDEX: 898 ctx->shader->vs_out_misc_write = 1; 899 ctx->shader->vs_out_viewport = 1; 900 break; 901 case TGSI_SEMANTIC_LAYER: 902 ctx->shader->vs_out_misc_write = 1; 903 ctx->shader->vs_out_layer = 1; 904 break; 905 case TGSI_SEMANTIC_CLIPVERTEX: 906 ctx->clip_vertex_write = TRUE; 907 ctx->cv_output = i; 908 break; 909 } 910 if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 911 ctx->gs_out_ring_offset += 16; 912 } 913 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 914 switch (d->Semantic.Name) { 915 case TGSI_SEMANTIC_COLOR: 916 ctx->shader->nr_ps_max_color_exports++; 917 break; 918 } 919 } 920 } 921 ctx->shader->noutput += count; 922 break; 923 case TGSI_FILE_TEMPORARY: 924 if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 925 if (d->Array.ArrayID) { 926 r600_add_gpr_array(ctx->shader, 927 ctx->file_offset[TGSI_FILE_TEMPORARY] + 928 d->Range.First, 929 d->Range.Last - d->Range.First + 1, 0x0F); 930 } 931 } 932 break; 933 934 case TGSI_FILE_CONSTANT: 935 case TGSI_FILE_SAMPLER: 936 case TGSI_FILE_SAMPLER_VIEW: 937 case TGSI_FILE_ADDRESS: 938 break; 939 940 case TGSI_FILE_SYSTEM_VALUE: 941 if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK || 942 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID || 943 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) { 944 break; /* Already handled from allocate_system_value_inputs */ 945 } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 946 if (!ctx->native_integers) { 947 struct r600_bytecode_alu alu; 948 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 949 950 alu.op = ALU_OP1_INT_TO_FLT; 951 alu.src[0].sel = 0; 952 alu.src[0].chan = 3; 953 954 alu.dst.sel = 0; 955 alu.dst.chan = 3; 956 alu.dst.write = 1; 957 alu.last = 1; 958 959 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 960 return r; 961 } 962 break; 963 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 964 break; 965 else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID) 966 break; 967 default: 968 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 969 return -EINVAL; 970 } 971 return 0; 972} 973 974static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset) 975{ 976 struct tgsi_parse_context parse; 977 struct { 978 boolean enabled; 979 int *reg; 980 unsigned name, alternate_name; 981 } inputs[2] = { 982 { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */ 983 984 { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */ 985 }; 986 int i, k, num_regs = 0; 987 988 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 989 return 0; 990 } 991 992 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 993 while (!tgsi_parse_end_of_tokens(&parse)) { 994 tgsi_parse_token(&parse); 995 996 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 997 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 998 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 999 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 1000 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 1001 { 1002 int interpolate, location, k; 1003 1004 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 1005 location = TGSI_INTERPOLATE_LOC_CENTER; 1006 inputs[1].enabled = true; /* needs SAMPLEID */ 1007 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 1008 location = TGSI_INTERPOLATE_LOC_CENTER; 1009 /* Needs sample positions, currently those are always available */ 1010 } else { 1011 location = TGSI_INTERPOLATE_LOC_CENTROID; 1012 } 1013 1014 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 1015 k = eg_get_interpolator_index(interpolate, location); 1016 ctx->eg_interpolators[k].enabled = true; 1017 } 1018 } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { 1019 struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration; 1020 if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1021 for (k = 0; k < Elements(inputs); k++) { 1022 if (d->Semantic.Name == inputs[k].name || 1023 d->Semantic.Name == inputs[k].alternate_name) { 1024 inputs[k].enabled = true; 1025 } 1026 } 1027 } 1028 } 1029 } 1030 1031 tgsi_parse_free(&parse); 1032 1033 for (i = 0; i < Elements(inputs); i++) { 1034 boolean enabled = inputs[i].enabled; 1035 int *reg = inputs[i].reg; 1036 unsigned name = inputs[i].name; 1037 1038 if (enabled) { 1039 int gpr = gpr_offset + num_regs++; 1040 1041 // add to inputs, allocate a gpr 1042 k = ctx->shader->ninput ++; 1043 ctx->shader->input[k].name = name; 1044 ctx->shader->input[k].sid = 0; 1045 ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT; 1046 ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER; 1047 *reg = ctx->shader->input[k].gpr = gpr; 1048 } 1049 } 1050 1051 return gpr_offset + num_regs; 1052} 1053 1054/* 1055 * for evergreen we need to scan the shader to find the number of GPRs we need to 1056 * reserve for interpolation and system values 1057 * 1058 * we need to know if we are going to emit 1059 * any sample or centroid inputs 1060 * if perspective and linear are required 1061*/ 1062static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 1063{ 1064 int i; 1065 int num_baryc; 1066 struct tgsi_parse_context parse; 1067 1068 memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators)); 1069 1070 for (i = 0; i < ctx->info.num_inputs; i++) { 1071 int k; 1072 /* skip position/face/mask/sampleid */ 1073 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 1074 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE || 1075 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK || 1076 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID) 1077 continue; 1078 1079 k = eg_get_interpolator_index( 1080 ctx->info.input_interpolate[i], 1081 ctx->info.input_interpolate_loc[i]); 1082 if (k >= 0) 1083 ctx->eg_interpolators[k].enabled = TRUE; 1084 } 1085 1086 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 1087 return 0; 1088 } 1089 1090 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 1091 while (!tgsi_parse_end_of_tokens(&parse)) { 1092 tgsi_parse_token(&parse); 1093 1094 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 1095 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 1096 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 1097 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 1098 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 1099 { 1100 int interpolate, location, k; 1101 1102 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 1103 location = TGSI_INTERPOLATE_LOC_CENTER; 1104 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 1105 location = TGSI_INTERPOLATE_LOC_CENTER; 1106 } else { 1107 location = TGSI_INTERPOLATE_LOC_CENTROID; 1108 } 1109 1110 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 1111 k = eg_get_interpolator_index(interpolate, location); 1112 ctx->eg_interpolators[k].enabled = true; 1113 } 1114 } 1115 } 1116 1117 tgsi_parse_free(&parse); 1118 1119 /* assign gpr to each interpolator according to priority */ 1120 num_baryc = 0; 1121 for (i = 0; i < Elements(ctx->eg_interpolators); i++) { 1122 if (ctx->eg_interpolators[i].enabled) { 1123 ctx->eg_interpolators[i].ij_index = num_baryc; 1124 num_baryc ++; 1125 } 1126 } 1127 1128 /* XXX PULL MODEL and LINE STIPPLE */ 1129 1130 num_baryc = (num_baryc + 1) >> 1; 1131 return allocate_system_value_inputs(ctx, num_baryc); 1132} 1133 1134/* sample_id_sel == NULL means fetch for current sample */ 1135static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel) 1136{ 1137 struct r600_bytecode_vtx vtx; 1138 int r, t1; 1139 1140 assert(ctx->fixed_pt_position_gpr != -1); 1141 1142 t1 = r600_get_temp(ctx); 1143 1144 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 1145 vtx.op = FETCH_OP_VFETCH; 1146 vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 1147 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1148 if (sample_id == NULL) { 1149 vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; 1150 vtx.src_sel_x = 3; 1151 } 1152 else { 1153 struct r600_bytecode_alu alu; 1154 1155 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1156 alu.op = ALU_OP1_MOV; 1157 r600_bytecode_src(&alu.src[0], sample_id, chan_sel); 1158 alu.dst.sel = t1; 1159 alu.dst.write = 1; 1160 alu.last = 1; 1161 r = r600_bytecode_add_alu(ctx->bc, &alu); 1162 if (r) 1163 return r; 1164 1165 vtx.src_gpr = t1; 1166 vtx.src_sel_x = 0; 1167 } 1168 vtx.mega_fetch_count = 16; 1169 vtx.dst_gpr = t1; 1170 vtx.dst_sel_x = 0; 1171 vtx.dst_sel_y = 1; 1172 vtx.dst_sel_z = 2; 1173 vtx.dst_sel_w = 3; 1174 vtx.data_format = FMT_32_32_32_32_FLOAT; 1175 vtx.num_format_all = 2; 1176 vtx.format_comp_all = 1; 1177 vtx.use_const_fields = 0; 1178 vtx.offset = 1; // first element is size of buffer 1179 vtx.endian = r600_endian_swap(32); 1180 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 1181 1182 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 1183 if (r) 1184 return r; 1185 1186 return t1; 1187} 1188 1189static void tgsi_src(struct r600_shader_ctx *ctx, 1190 const struct tgsi_full_src_register *tgsi_src, 1191 struct r600_shader_src *r600_src) 1192{ 1193 memset(r600_src, 0, sizeof(*r600_src)); 1194 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 1195 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 1196 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 1197 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 1198 r600_src->neg = tgsi_src->Register.Negate; 1199 r600_src->abs = tgsi_src->Register.Absolute; 1200 1201 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 1202 int index; 1203 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 1204 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 1205 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 1206 1207 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 1208 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs); 1209 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 1210 return; 1211 } 1212 index = tgsi_src->Register.Index; 1213 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 1214 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 1215 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 1216 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) { 1217 r600_src->swizzle[0] = 2; // Z value 1218 r600_src->swizzle[1] = 2; 1219 r600_src->swizzle[2] = 2; 1220 r600_src->swizzle[3] = 2; 1221 r600_src->sel = ctx->face_gpr; 1222 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) { 1223 r600_src->swizzle[0] = 3; // W value 1224 r600_src->swizzle[1] = 3; 1225 r600_src->swizzle[2] = 3; 1226 r600_src->swizzle[3] = 3; 1227 r600_src->sel = ctx->fixed_pt_position_gpr; 1228 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) { 1229 r600_src->swizzle[0] = 0; 1230 r600_src->swizzle[1] = 1; 1231 r600_src->swizzle[2] = 4; 1232 r600_src->swizzle[3] = 4; 1233 r600_src->sel = load_sample_position(ctx, NULL, -1); 1234 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 1235 r600_src->swizzle[0] = 3; 1236 r600_src->swizzle[1] = 3; 1237 r600_src->swizzle[2] = 3; 1238 r600_src->swizzle[3] = 3; 1239 r600_src->sel = 0; 1240 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 1241 r600_src->swizzle[0] = 0; 1242 r600_src->swizzle[1] = 0; 1243 r600_src->swizzle[2] = 0; 1244 r600_src->swizzle[3] = 0; 1245 r600_src->sel = 0; 1246 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1247 r600_src->swizzle[0] = 3; 1248 r600_src->swizzle[1] = 3; 1249 r600_src->swizzle[2] = 3; 1250 r600_src->swizzle[3] = 3; 1251 r600_src->sel = 1; 1252 } 1253 } else { 1254 if (tgsi_src->Register.Indirect) 1255 r600_src->rel = V_SQ_REL_RELATIVE; 1256 r600_src->sel = tgsi_src->Register.Index; 1257 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 1258 } 1259 if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { 1260 if (tgsi_src->Register.Dimension) { 1261 r600_src->kc_bank = tgsi_src->Dimension.Index; 1262 if (tgsi_src->Dimension.Indirect) { 1263 r600_src->kc_rel = 1; 1264 } 1265 } 1266 } 1267} 1268 1269static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 1270 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 1271 unsigned int dst_reg) 1272{ 1273 struct r600_bytecode_vtx vtx; 1274 unsigned int ar_reg; 1275 int r; 1276 1277 if (offset) { 1278 struct r600_bytecode_alu alu; 1279 1280 memset(&alu, 0, sizeof(alu)); 1281 1282 alu.op = ALU_OP2_ADD_INT; 1283 alu.src[0].sel = ctx->bc->ar_reg; 1284 alu.src[0].chan = ar_chan; 1285 1286 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1287 alu.src[1].value = offset; 1288 1289 alu.dst.sel = dst_reg; 1290 alu.dst.chan = ar_chan; 1291 alu.dst.write = 1; 1292 alu.last = 1; 1293 1294 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1295 return r; 1296 1297 ar_reg = dst_reg; 1298 } else { 1299 ar_reg = ctx->bc->ar_reg; 1300 } 1301 1302 memset(&vtx, 0, sizeof(vtx)); 1303 vtx.buffer_id = cb_idx; 1304 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1305 vtx.src_gpr = ar_reg; 1306 vtx.src_sel_x = ar_chan; 1307 vtx.mega_fetch_count = 16; 1308 vtx.dst_gpr = dst_reg; 1309 vtx.dst_sel_x = 0; /* SEL_X */ 1310 vtx.dst_sel_y = 1; /* SEL_Y */ 1311 vtx.dst_sel_z = 2; /* SEL_Z */ 1312 vtx.dst_sel_w = 3; /* SEL_W */ 1313 vtx.data_format = FMT_32_32_32_32_FLOAT; 1314 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 1315 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 1316 vtx.endian = r600_endian_swap(32); 1317 vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE; 1318 1319 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1320 return r; 1321 1322 return 0; 1323} 1324 1325static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1326{ 1327 struct r600_bytecode_vtx vtx; 1328 int r; 1329 unsigned index = src->Register.Index; 1330 unsigned vtx_id = src->Dimension.Index; 1331 int offset_reg = vtx_id / 3; 1332 int offset_chan = vtx_id % 3; 1333 1334 /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, 1335 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ 1336 1337 if (offset_reg == 0 && offset_chan == 2) 1338 offset_chan = 3; 1339 1340 if (src->Dimension.Indirect) { 1341 int treg[3]; 1342 int t2; 1343 struct r600_bytecode_alu alu; 1344 int r, i; 1345 1346 /* you have got to be shitting me - 1347 we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt. 1348 at least this is what fglrx seems to do. */ 1349 for (i = 0; i < 3; i++) { 1350 treg[i] = r600_get_temp(ctx); 1351 } 1352 r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F); 1353 1354 t2 = r600_get_temp(ctx); 1355 for (i = 0; i < 3; i++) { 1356 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1357 alu.op = ALU_OP1_MOV; 1358 alu.src[0].sel = 0; 1359 alu.src[0].chan = i == 2 ? 3 : i; 1360 alu.dst.sel = treg[i]; 1361 alu.dst.chan = 0; 1362 alu.dst.write = 1; 1363 alu.last = 1; 1364 r = r600_bytecode_add_alu(ctx->bc, &alu); 1365 if (r) 1366 return r; 1367 } 1368 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1369 alu.op = ALU_OP1_MOV; 1370 alu.src[0].sel = treg[0]; 1371 alu.src[0].rel = 1; 1372 alu.dst.sel = t2; 1373 alu.dst.write = 1; 1374 alu.last = 1; 1375 r = r600_bytecode_add_alu(ctx->bc, &alu); 1376 if (r) 1377 return r; 1378 offset_reg = t2; 1379 } 1380 1381 1382 memset(&vtx, 0, sizeof(vtx)); 1383 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1384 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1385 vtx.src_gpr = offset_reg; 1386 vtx.src_sel_x = offset_chan; 1387 vtx.offset = index * 16; /*bytes*/ 1388 vtx.mega_fetch_count = 16; 1389 vtx.dst_gpr = dst_reg; 1390 vtx.dst_sel_x = 0; /* SEL_X */ 1391 vtx.dst_sel_y = 1; /* SEL_Y */ 1392 vtx.dst_sel_z = 2; /* SEL_Z */ 1393 vtx.dst_sel_w = 3; /* SEL_W */ 1394 if (ctx->bc->chip_class >= EVERGREEN) { 1395 vtx.use_const_fields = 1; 1396 } else { 1397 vtx.data_format = FMT_32_32_32_32_FLOAT; 1398 } 1399 1400 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1401 return r; 1402 1403 return 0; 1404} 1405 1406static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) 1407{ 1408 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1409 int i; 1410 1411 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1412 struct tgsi_full_src_register *src = &inst->Src[i]; 1413 1414 if (src->Register.File == TGSI_FILE_INPUT) { 1415 if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) { 1416 /* primitive id is in R0.z */ 1417 ctx->src[i].sel = 0; 1418 ctx->src[i].swizzle[0] = 2; 1419 } 1420 } 1421 if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { 1422 int treg = r600_get_temp(ctx); 1423 1424 fetch_gs_input(ctx, src, treg); 1425 ctx->src[i].sel = treg; 1426 } 1427 } 1428 return 0; 1429} 1430 1431 1432/* Tessellation shaders pass outputs to the next shader using LDS. 1433 * 1434 * LS outputs = TCS(HS) inputs 1435 * TCS(HS) outputs = TES(DS) inputs 1436 * 1437 * The LDS layout is: 1438 * - TCS inputs for patch 0 1439 * - TCS inputs for patch 1 1440 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) 1441 * - ... 1442 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset 1443 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset 1444 * - TCS outputs for patch 1 1445 * - Per-patch TCS outputs for patch 1 1446 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) 1447 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) 1448 * - ... 1449 * 1450 * All three shaders VS(LS), TCS, TES share the same LDS space. 1451 */ 1452/* this will return with the dw address in temp_reg.x */ 1453static int r600_get_byte_address(struct r600_shader_ctx *ctx, int temp_reg, 1454 const struct tgsi_full_dst_register *dst, 1455 const struct tgsi_full_src_register *src, 1456 int stride_bytes_reg, int stride_bytes_chan) 1457{ 1458 struct tgsi_full_dst_register reg; 1459 ubyte *name, *index, *array_first; 1460 int r; 1461 int param; 1462 struct tgsi_shader_info *info = &ctx->info; 1463 /* Set the register description. The address computation is the same 1464 * for sources and destinations. */ 1465 if (src) { 1466 reg.Register.File = src->Register.File; 1467 reg.Register.Index = src->Register.Index; 1468 reg.Register.Indirect = src->Register.Indirect; 1469 reg.Register.Dimension = src->Register.Dimension; 1470 reg.Indirect = src->Indirect; 1471 reg.Dimension = src->Dimension; 1472 reg.DimIndirect = src->DimIndirect; 1473 } else 1474 reg = *dst; 1475 1476 /* If the register is 2-dimensional (e.g. an array of vertices 1477 * in a primitive), calculate the base address of the vertex. */ 1478 if (reg.Register.Dimension) { 1479 int sel, chan; 1480 if (reg.Dimension.Indirect) { 1481 unsigned addr_reg; 1482 assert (reg.DimIndirect.File == TGSI_FILE_ADDRESS); 1483 1484 addr_reg = get_address_file_reg(ctx, reg.DimIndirect.Index); 1485 /* pull the value from index_reg */ 1486 sel = addr_reg; 1487 chan = 0; 1488 } else { 1489 sel = V_SQ_ALU_SRC_LITERAL; 1490 chan = reg.Dimension.Index; 1491 } 1492 1493 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1494 temp_reg, 0, 1495 stride_bytes_reg, stride_bytes_chan, 1496 sel, chan, 1497 temp_reg, 0); 1498 if (r) 1499 return r; 1500 } 1501 1502 if (reg.Register.File == TGSI_FILE_INPUT) { 1503 name = info->input_semantic_name; 1504 index = info->input_semantic_index; 1505 array_first = info->input_array_first; 1506 } else if (reg.Register.File == TGSI_FILE_OUTPUT) { 1507 name = info->output_semantic_name; 1508 index = info->output_semantic_index; 1509 array_first = info->output_array_first; 1510 } else { 1511 assert(0); 1512 return -1; 1513 } 1514 if (reg.Register.Indirect) { 1515 int addr_reg; 1516 int first; 1517 /* Add the relative address of the element. */ 1518 if (reg.Indirect.ArrayID) 1519 first = array_first[reg.Indirect.ArrayID]; 1520 else 1521 first = reg.Register.Index; 1522 1523 addr_reg = get_address_file_reg(ctx, reg.Indirect.Index); 1524 1525 /* pull the value from index_reg */ 1526 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1527 temp_reg, 0, 1528 V_SQ_ALU_SRC_LITERAL, 16, 1529 addr_reg, 0, 1530 temp_reg, 0); 1531 if (r) 1532 return r; 1533 1534 param = r600_get_lds_unique_index(name[first], 1535 index[first]); 1536 1537 } else { 1538 param = r600_get_lds_unique_index(name[reg.Register.Index], 1539 index[reg.Register.Index]); 1540 } 1541 1542 /* add to base_addr - passed in temp_reg.x */ 1543 if (param) { 1544 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1545 temp_reg, 0, 1546 temp_reg, 0, 1547 V_SQ_ALU_SRC_LITERAL, param * 16); 1548 if (r) 1549 return r; 1550 1551 } 1552 return 0; 1553} 1554 1555static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 1556 unsigned dst_reg) 1557{ 1558 struct r600_bytecode_alu alu; 1559 int r, i; 1560 1561 if ((ctx->bc->cf_last->ndw>>1) >= 0x60) 1562 ctx->bc->force_add_cf = 1; 1563 for (i = 1; i < 4; i++) { 1564 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1565 temp_reg, i, 1566 temp_reg, 0, 1567 V_SQ_ALU_SRC_LITERAL, 4 * i); 1568 } 1569 for (i = 0; i < 4; i++) { 1570 /* emit an LDS_READ_RET */ 1571 memset(&alu, 0, sizeof(alu)); 1572 alu.op = LDS_OP1_LDS_READ_RET; 1573 alu.src[0].sel = temp_reg; 1574 alu.src[0].chan = i; 1575 alu.src[1].sel = V_SQ_ALU_SRC_0; 1576 alu.src[2].sel = V_SQ_ALU_SRC_0; 1577 alu.dst.chan = 0; 1578 alu.is_lds_idx_op = true; 1579 alu.last = 1; 1580 r = r600_bytecode_add_alu(ctx->bc, &alu); 1581 if (r) 1582 return r; 1583 } 1584 for (i = 0; i < 4; i++) { 1585 /* then read from LDS_OQ_A_POP */ 1586 memset(&alu, 0, sizeof(alu)); 1587 1588 alu.op = ALU_OP1_MOV; 1589 alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; 1590 alu.src[0].chan = 0; 1591 alu.dst.sel = dst_reg; 1592 alu.dst.chan = i; 1593 alu.dst.write = 1; 1594 alu.last = 1; 1595 r = r600_bytecode_add_alu(ctx->bc, &alu); 1596 if (r) 1597 return r; 1598 } 1599 return 0; 1600} 1601 1602static int fetch_tes_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1603{ 1604 int r; 1605 unsigned temp_reg = r600_get_temp(ctx); 1606 1607 r = get_lds_offset0(ctx, 2, temp_reg, 1608 src->Register.Dimension ? false : true); 1609 if (r) 1610 return r; 1611 1612 /* the base address is now in temp.x */ 1613 r = r600_get_byte_address(ctx, temp_reg, 1614 NULL, src, ctx->tess_output_info, 1); 1615 if (r) 1616 return r; 1617 1618 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1619 if (r) 1620 return r; 1621 return 0; 1622} 1623 1624static int fetch_tcs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1625{ 1626 int r; 1627 unsigned temp_reg = r600_get_temp(ctx); 1628 1629 /* t.x = ips * r0.y */ 1630 r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 1631 temp_reg, 0, 1632 ctx->tess_input_info, 0, 1633 0, 1); 1634 1635 if (r) 1636 return r; 1637 1638 /* the base address is now in temp.x */ 1639 r = r600_get_byte_address(ctx, temp_reg, 1640 NULL, src, ctx->tess_input_info, 1); 1641 if (r) 1642 return r; 1643 1644 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1645 if (r) 1646 return r; 1647 return 0; 1648} 1649 1650static int fetch_tcs_output(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1651{ 1652 int r; 1653 unsigned temp_reg = r600_get_temp(ctx); 1654 1655 r = get_lds_offset0(ctx, 1, temp_reg, 1656 src->Register.Dimension ? false : true); 1657 if (r) 1658 return r; 1659 /* the base address is now in temp.x */ 1660 r = r600_get_byte_address(ctx, temp_reg, 1661 NULL, src, 1662 ctx->tess_output_info, 1); 1663 if (r) 1664 return r; 1665 1666 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1667 if (r) 1668 return r; 1669 return 0; 1670} 1671 1672static int tgsi_split_lds_inputs(struct r600_shader_ctx *ctx) 1673{ 1674 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1675 int i; 1676 1677 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1678 struct tgsi_full_src_register *src = &inst->Src[i]; 1679 1680 if (ctx->type == TGSI_PROCESSOR_TESS_EVAL && src->Register.File == TGSI_FILE_INPUT) { 1681 int treg = r600_get_temp(ctx); 1682 fetch_tes_input(ctx, src, treg); 1683 ctx->src[i].sel = treg; 1684 ctx->src[i].rel = 0; 1685 } 1686 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL && src->Register.File == TGSI_FILE_INPUT) { 1687 int treg = r600_get_temp(ctx); 1688 fetch_tcs_input(ctx, src, treg); 1689 ctx->src[i].sel = treg; 1690 ctx->src[i].rel = 0; 1691 } 1692 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL && src->Register.File == TGSI_FILE_OUTPUT) { 1693 int treg = r600_get_temp(ctx); 1694 fetch_tcs_output(ctx, src, treg); 1695 ctx->src[i].sel = treg; 1696 ctx->src[i].rel = 0; 1697 } 1698 } 1699 return 0; 1700} 1701 1702static int tgsi_split_constant(struct r600_shader_ctx *ctx) 1703{ 1704 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1705 struct r600_bytecode_alu alu; 1706 int i, j, k, nconst, r; 1707 1708 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 1709 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 1710 nconst++; 1711 } 1712 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 1713 } 1714 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 1715 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 1716 continue; 1717 } 1718 1719 if (ctx->src[i].rel) { 1720 int chan = inst->Src[i].Indirect.Swizzle; 1721 int treg = r600_get_temp(ctx); 1722 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg))) 1723 return r; 1724 1725 ctx->src[i].kc_bank = 0; 1726 ctx->src[i].kc_rel = 0; 1727 ctx->src[i].sel = treg; 1728 ctx->src[i].rel = 0; 1729 j--; 1730 } else if (j > 0) { 1731 int treg = r600_get_temp(ctx); 1732 for (k = 0; k < 4; k++) { 1733 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1734 alu.op = ALU_OP1_MOV; 1735 alu.src[0].sel = ctx->src[i].sel; 1736 alu.src[0].chan = k; 1737 alu.src[0].rel = ctx->src[i].rel; 1738 alu.src[0].kc_bank = ctx->src[i].kc_bank; 1739 alu.src[0].kc_rel = ctx->src[i].kc_rel; 1740 alu.dst.sel = treg; 1741 alu.dst.chan = k; 1742 alu.dst.write = 1; 1743 if (k == 3) 1744 alu.last = 1; 1745 r = r600_bytecode_add_alu(ctx->bc, &alu); 1746 if (r) 1747 return r; 1748 } 1749 ctx->src[i].sel = treg; 1750 ctx->src[i].rel =0; 1751 j--; 1752 } 1753 } 1754 return 0; 1755} 1756 1757/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 1758static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 1759{ 1760 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1761 struct r600_bytecode_alu alu; 1762 int i, j, k, nliteral, r; 1763 1764 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 1765 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1766 nliteral++; 1767 } 1768 } 1769 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 1770 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1771 int treg = r600_get_temp(ctx); 1772 for (k = 0; k < 4; k++) { 1773 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1774 alu.op = ALU_OP1_MOV; 1775 alu.src[0].sel = ctx->src[i].sel; 1776 alu.src[0].chan = k; 1777 alu.src[0].value = ctx->src[i].value[k]; 1778 alu.dst.sel = treg; 1779 alu.dst.chan = k; 1780 alu.dst.write = 1; 1781 if (k == 3) 1782 alu.last = 1; 1783 r = r600_bytecode_add_alu(ctx->bc, &alu); 1784 if (r) 1785 return r; 1786 } 1787 ctx->src[i].sel = treg; 1788 j--; 1789 } 1790 } 1791 return 0; 1792} 1793 1794static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 1795{ 1796 int i, r, count = ctx->shader->ninput; 1797 1798 for (i = 0; i < count; i++) { 1799 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1800 r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); 1801 if (r) 1802 return r; 1803 } 1804 } 1805 return 0; 1806} 1807 1808static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so, 1809 int stream, unsigned *stream_item_size) 1810{ 1811 unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; 1812 unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; 1813 int i, j, r; 1814 1815 /* Sanity checking. */ 1816 if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) { 1817 R600_ERR("Too many stream outputs: %d\n", so->num_outputs); 1818 r = -EINVAL; 1819 goto out_err; 1820 } 1821 for (i = 0; i < so->num_outputs; i++) { 1822 if (so->output[i].output_buffer >= 4) { 1823 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", 1824 so->output[i].output_buffer); 1825 r = -EINVAL; 1826 goto out_err; 1827 } 1828 } 1829 1830 /* Initialize locations where the outputs are stored. */ 1831 for (i = 0; i < so->num_outputs; i++) { 1832 1833 so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr; 1834 start_comp[i] = so->output[i].start_component; 1835 /* Lower outputs with dst_offset < start_component. 1836 * 1837 * We can only output 4D vectors with a write mask, e.g. we can 1838 * only output the W component at offset 3, etc. If we want 1839 * to store Y, Z, or W at buffer offset 0, we need to use MOV 1840 * to move it to X and output X. */ 1841 if (so->output[i].dst_offset < so->output[i].start_component) { 1842 unsigned tmp = r600_get_temp(ctx); 1843 1844 for (j = 0; j < so->output[i].num_components; j++) { 1845 struct r600_bytecode_alu alu; 1846 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1847 alu.op = ALU_OP1_MOV; 1848 alu.src[0].sel = so_gpr[i]; 1849 alu.src[0].chan = so->output[i].start_component + j; 1850 1851 alu.dst.sel = tmp; 1852 alu.dst.chan = j; 1853 alu.dst.write = 1; 1854 if (j == so->output[i].num_components - 1) 1855 alu.last = 1; 1856 r = r600_bytecode_add_alu(ctx->bc, &alu); 1857 if (r) 1858 return r; 1859 } 1860 start_comp[i] = 0; 1861 so_gpr[i] = tmp; 1862 } 1863 } 1864 1865 /* Write outputs to buffers. */ 1866 for (i = 0; i < so->num_outputs; i++) { 1867 struct r600_bytecode_output output; 1868 1869 if (stream != -1 && stream != so->output[i].output_buffer) 1870 continue; 1871 1872 memset(&output, 0, sizeof(struct r600_bytecode_output)); 1873 output.gpr = so_gpr[i]; 1874 output.elem_size = so->output[i].num_components - 1; 1875 if (output.elem_size == 2) 1876 output.elem_size = 3; // 3 not supported, write 4 with junk at end 1877 output.array_base = so->output[i].dst_offset - start_comp[i]; 1878 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 1879 output.burst_count = 1; 1880 /* array_size is an upper limit for the burst_count 1881 * with MEM_STREAM instructions */ 1882 output.array_size = 0xFFF; 1883 output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i]; 1884 1885 if (ctx->bc->chip_class >= EVERGREEN) { 1886 switch (so->output[i].output_buffer) { 1887 case 0: 1888 output.op = CF_OP_MEM_STREAM0_BUF0; 1889 break; 1890 case 1: 1891 output.op = CF_OP_MEM_STREAM0_BUF1; 1892 break; 1893 case 2: 1894 output.op = CF_OP_MEM_STREAM0_BUF2; 1895 break; 1896 case 3: 1897 output.op = CF_OP_MEM_STREAM0_BUF3; 1898 break; 1899 } 1900 output.op += so->output[i].stream * 4; 1901 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); 1902 ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4; 1903 } else { 1904 switch (so->output[i].output_buffer) { 1905 case 0: 1906 output.op = CF_OP_MEM_STREAM0; 1907 break; 1908 case 1: 1909 output.op = CF_OP_MEM_STREAM1; 1910 break; 1911 case 2: 1912 output.op = CF_OP_MEM_STREAM2; 1913 break; 1914 case 3: 1915 output.op = CF_OP_MEM_STREAM3; 1916 break; 1917 } 1918 ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer; 1919 } 1920 r = r600_bytecode_add_output(ctx->bc, &output); 1921 if (r) 1922 goto out_err; 1923 } 1924 return 0; 1925out_err: 1926 return r; 1927} 1928 1929static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) 1930{ 1931 struct r600_bytecode_alu alu; 1932 unsigned reg; 1933 1934 if (!ctx->shader->vs_out_edgeflag) 1935 return; 1936 1937 reg = ctx->shader->output[ctx->edgeflag_output].gpr; 1938 1939 /* clamp(x, 0, 1) */ 1940 memset(&alu, 0, sizeof(alu)); 1941 alu.op = ALU_OP1_MOV; 1942 alu.src[0].sel = reg; 1943 alu.dst.sel = reg; 1944 alu.dst.write = 1; 1945 alu.dst.clamp = 1; 1946 alu.last = 1; 1947 r600_bytecode_add_alu(ctx->bc, &alu); 1948 1949 memset(&alu, 0, sizeof(alu)); 1950 alu.op = ALU_OP1_FLT_TO_INT; 1951 alu.src[0].sel = reg; 1952 alu.dst.sel = reg; 1953 alu.dst.write = 1; 1954 alu.last = 1; 1955 r600_bytecode_add_alu(ctx->bc, &alu); 1956} 1957 1958static int generate_gs_copy_shader(struct r600_context *rctx, 1959 struct r600_pipe_shader *gs, 1960 struct pipe_stream_output_info *so) 1961{ 1962 struct r600_shader_ctx ctx = {}; 1963 struct r600_shader *gs_shader = &gs->shader; 1964 struct r600_pipe_shader *cshader; 1965 int ocnt = gs_shader->noutput; 1966 struct r600_bytecode_alu alu; 1967 struct r600_bytecode_vtx vtx; 1968 struct r600_bytecode_output output; 1969 struct r600_bytecode_cf *cf_jump, *cf_pop, 1970 *last_exp_pos = NULL, *last_exp_param = NULL; 1971 int i, j, next_clip_pos = 61, next_param = 0; 1972 int ring; 1973 1974 cshader = calloc(1, sizeof(struct r600_pipe_shader)); 1975 if (!cshader) 1976 return 0; 1977 1978 memcpy(cshader->shader.output, gs_shader->output, ocnt * 1979 sizeof(struct r600_shader_io)); 1980 1981 cshader->shader.noutput = ocnt; 1982 1983 ctx.shader = &cshader->shader; 1984 ctx.bc = &ctx.shader->bc; 1985 ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX; 1986 1987 r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, 1988 rctx->screen->has_compressed_msaa_texturing); 1989 1990 ctx.bc->isa = rctx->isa; 1991 1992 cf_jump = NULL; 1993 memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes)); 1994 1995 /* R0.x = R0.x & 0x3fffffff */ 1996 memset(&alu, 0, sizeof(alu)); 1997 alu.op = ALU_OP2_AND_INT; 1998 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1999 alu.src[1].value = 0x3fffffff; 2000 alu.dst.write = 1; 2001 r600_bytecode_add_alu(ctx.bc, &alu); 2002 2003 /* R0.y = R0.x >> 30 */ 2004 memset(&alu, 0, sizeof(alu)); 2005 alu.op = ALU_OP2_LSHR_INT; 2006 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2007 alu.src[1].value = 0x1e; 2008 alu.dst.chan = 1; 2009 alu.dst.write = 1; 2010 alu.last = 1; 2011 r600_bytecode_add_alu(ctx.bc, &alu); 2012 2013 /* fetch vertex data from GSVS ring */ 2014 for (i = 0; i < ocnt; ++i) { 2015 struct r600_shader_io *out = &ctx.shader->output[i]; 2016 2017 out->gpr = i + 1; 2018 out->ring_offset = i * 16; 2019 2020 memset(&vtx, 0, sizeof(vtx)); 2021 vtx.op = FETCH_OP_VFETCH; 2022 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 2023 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2024 vtx.mega_fetch_count = 16; 2025 vtx.offset = out->ring_offset; 2026 vtx.dst_gpr = out->gpr; 2027 vtx.src_gpr = 0; 2028 vtx.dst_sel_x = 0; 2029 vtx.dst_sel_y = 1; 2030 vtx.dst_sel_z = 2; 2031 vtx.dst_sel_w = 3; 2032 if (rctx->b.chip_class >= EVERGREEN) { 2033 vtx.use_const_fields = 1; 2034 } else { 2035 vtx.data_format = FMT_32_32_32_32_FLOAT; 2036 } 2037 2038 r600_bytecode_add_vtx(ctx.bc, &vtx); 2039 } 2040 ctx.temp_reg = i + 1; 2041 for (ring = 3; ring >= 0; --ring) { 2042 bool enabled = false; 2043 for (i = 0; i < so->num_outputs; i++) { 2044 if (so->output[i].stream == ring) { 2045 enabled = true; 2046 break; 2047 } 2048 } 2049 if (ring != 0 && !enabled) { 2050 cshader->shader.ring_item_sizes[ring] = 0; 2051 continue; 2052 } 2053 2054 if (cf_jump) { 2055 // Patch up jump label 2056 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2057 cf_pop = ctx.bc->cf_last; 2058 2059 cf_jump->cf_addr = cf_pop->id + 2; 2060 cf_jump->pop_count = 1; 2061 cf_pop->cf_addr = cf_pop->id + 2; 2062 cf_pop->pop_count = 1; 2063 } 2064 2065 /* PRED_SETE_INT __, R0.y, ring */ 2066 memset(&alu, 0, sizeof(alu)); 2067 alu.op = ALU_OP2_PRED_SETE_INT; 2068 alu.src[0].chan = 1; 2069 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2070 alu.src[1].value = ring; 2071 alu.execute_mask = 1; 2072 alu.update_pred = 1; 2073 alu.last = 1; 2074 r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); 2075 2076 r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); 2077 cf_jump = ctx.bc->cf_last; 2078 2079 if (enabled) 2080 emit_streamout(&ctx, so, ring, &cshader->shader.ring_item_sizes[ring]); 2081 cshader->shader.ring_item_sizes[ring] = ocnt * 16; 2082 } 2083 2084 /* bc adds nops - copy it */ 2085 if (ctx.bc->chip_class == R600) { 2086 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2087 alu.op = ALU_OP0_NOP; 2088 alu.last = 1; 2089 r600_bytecode_add_alu(ctx.bc, &alu); 2090 2091 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2092 } 2093 2094 /* export vertex data */ 2095 /* XXX factor out common code with r600_shader_from_tgsi ? */ 2096 for (i = 0; i < ocnt; ++i) { 2097 struct r600_shader_io *out = &ctx.shader->output[i]; 2098 bool instream0 = true; 2099 if (out->name == TGSI_SEMANTIC_CLIPVERTEX) 2100 continue; 2101 2102 for (j = 0; j < so->num_outputs; j++) { 2103 if (so->output[j].register_index == i) { 2104 if (so->output[j].stream == 0) 2105 break; 2106 if (so->output[j].stream > 0) 2107 instream0 = false; 2108 } 2109 } 2110 if (!instream0) 2111 continue; 2112 memset(&output, 0, sizeof(output)); 2113 output.gpr = out->gpr; 2114 output.elem_size = 3; 2115 output.swizzle_x = 0; 2116 output.swizzle_y = 1; 2117 output.swizzle_z = 2; 2118 output.swizzle_w = 3; 2119 output.burst_count = 1; 2120 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2121 output.op = CF_OP_EXPORT; 2122 switch (out->name) { 2123 case TGSI_SEMANTIC_POSITION: 2124 output.array_base = 60; 2125 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2126 break; 2127 2128 case TGSI_SEMANTIC_PSIZE: 2129 output.array_base = 61; 2130 if (next_clip_pos == 61) 2131 next_clip_pos = 62; 2132 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2133 output.swizzle_y = 7; 2134 output.swizzle_z = 7; 2135 output.swizzle_w = 7; 2136 ctx.shader->vs_out_misc_write = 1; 2137 ctx.shader->vs_out_point_size = 1; 2138 break; 2139 case TGSI_SEMANTIC_LAYER: 2140 if (out->spi_sid) { 2141 /* duplicate it as PARAM to pass to the pixel shader */ 2142 output.array_base = next_param++; 2143 r600_bytecode_add_output(ctx.bc, &output); 2144 last_exp_param = ctx.bc->cf_last; 2145 } 2146 output.array_base = 61; 2147 if (next_clip_pos == 61) 2148 next_clip_pos = 62; 2149 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2150 output.swizzle_x = 7; 2151 output.swizzle_y = 7; 2152 output.swizzle_z = 0; 2153 output.swizzle_w = 7; 2154 ctx.shader->vs_out_misc_write = 1; 2155 ctx.shader->vs_out_layer = 1; 2156 break; 2157 case TGSI_SEMANTIC_VIEWPORT_INDEX: 2158 if (out->spi_sid) { 2159 /* duplicate it as PARAM to pass to the pixel shader */ 2160 output.array_base = next_param++; 2161 r600_bytecode_add_output(ctx.bc, &output); 2162 last_exp_param = ctx.bc->cf_last; 2163 } 2164 output.array_base = 61; 2165 if (next_clip_pos == 61) 2166 next_clip_pos = 62; 2167 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2168 ctx.shader->vs_out_misc_write = 1; 2169 ctx.shader->vs_out_viewport = 1; 2170 output.swizzle_x = 7; 2171 output.swizzle_y = 7; 2172 output.swizzle_z = 7; 2173 output.swizzle_w = 0; 2174 break; 2175 case TGSI_SEMANTIC_CLIPDIST: 2176 /* spi_sid is 0 for clipdistance outputs that were generated 2177 * for clipvertex - we don't need to pass them to PS */ 2178 ctx.shader->clip_dist_write = gs->shader.clip_dist_write; 2179 if (out->spi_sid) { 2180 /* duplicate it as PARAM to pass to the pixel shader */ 2181 output.array_base = next_param++; 2182 r600_bytecode_add_output(ctx.bc, &output); 2183 last_exp_param = ctx.bc->cf_last; 2184 } 2185 output.array_base = next_clip_pos++; 2186 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2187 break; 2188 case TGSI_SEMANTIC_FOG: 2189 output.swizzle_y = 4; /* 0 */ 2190 output.swizzle_z = 4; /* 0 */ 2191 output.swizzle_w = 5; /* 1 */ 2192 break; 2193 default: 2194 output.array_base = next_param++; 2195 break; 2196 } 2197 r600_bytecode_add_output(ctx.bc, &output); 2198 if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) 2199 last_exp_param = ctx.bc->cf_last; 2200 else 2201 last_exp_pos = ctx.bc->cf_last; 2202 } 2203 2204 if (!last_exp_pos) { 2205 memset(&output, 0, sizeof(output)); 2206 output.gpr = 0; 2207 output.elem_size = 3; 2208 output.swizzle_x = 7; 2209 output.swizzle_y = 7; 2210 output.swizzle_z = 7; 2211 output.swizzle_w = 7; 2212 output.burst_count = 1; 2213 output.type = 2; 2214 output.op = CF_OP_EXPORT; 2215 output.array_base = 60; 2216 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2217 r600_bytecode_add_output(ctx.bc, &output); 2218 last_exp_pos = ctx.bc->cf_last; 2219 } 2220 2221 if (!last_exp_param) { 2222 memset(&output, 0, sizeof(output)); 2223 output.gpr = 0; 2224 output.elem_size = 3; 2225 output.swizzle_x = 7; 2226 output.swizzle_y = 7; 2227 output.swizzle_z = 7; 2228 output.swizzle_w = 7; 2229 output.burst_count = 1; 2230 output.type = 2; 2231 output.op = CF_OP_EXPORT; 2232 output.array_base = next_param++; 2233 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2234 r600_bytecode_add_output(ctx.bc, &output); 2235 last_exp_param = ctx.bc->cf_last; 2236 } 2237 2238 last_exp_pos->op = CF_OP_EXPORT_DONE; 2239 last_exp_param->op = CF_OP_EXPORT_DONE; 2240 2241 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2242 cf_pop = ctx.bc->cf_last; 2243 2244 cf_jump->cf_addr = cf_pop->id + 2; 2245 cf_jump->pop_count = 1; 2246 cf_pop->cf_addr = cf_pop->id + 2; 2247 cf_pop->pop_count = 1; 2248 2249 if (ctx.bc->chip_class == CAYMAN) 2250 cm_bytecode_add_cf_end(ctx.bc); 2251 else { 2252 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2253 ctx.bc->cf_last->end_of_program = 1; 2254 } 2255 2256 gs->gs_copy_shader = cshader; 2257 cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 2258 2259 ctx.bc->nstack = 1; 2260 2261 return r600_bytecode_build(ctx.bc); 2262} 2263 2264static int emit_inc_ring_offset(struct r600_shader_ctx *ctx, int idx, bool ind) 2265{ 2266 if (ind) { 2267 struct r600_bytecode_alu alu; 2268 int r; 2269 2270 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2271 alu.op = ALU_OP2_ADD_INT; 2272 alu.src[0].sel = ctx->gs_export_gpr_tregs[idx]; 2273 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2274 alu.src[1].value = ctx->gs_out_ring_offset >> 4; 2275 alu.dst.sel = ctx->gs_export_gpr_tregs[idx]; 2276 alu.dst.write = 1; 2277 alu.last = 1; 2278 r = r600_bytecode_add_alu(ctx->bc, &alu); 2279 if (r) 2280 return r; 2281 } 2282 return 0; 2283} 2284 2285static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind) 2286{ 2287 struct r600_bytecode_output output; 2288 int i, k, ring_offset; 2289 int effective_stream = stream == -1 ? 0 : stream; 2290 int idx = 0; 2291 2292 for (i = 0; i < ctx->shader->noutput; i++) { 2293 if (ctx->gs_for_vs) { 2294 /* for ES we need to lookup corresponding ring offset expected by GS 2295 * (map this output to GS input by name and sid) */ 2296 /* FIXME precompute offsets */ 2297 ring_offset = -1; 2298 for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { 2299 struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; 2300 struct r600_shader_io *out = &ctx->shader->output[i]; 2301 if (in->name == out->name && in->sid == out->sid) 2302 ring_offset = in->ring_offset; 2303 } 2304 2305 if (ring_offset == -1) 2306 continue; 2307 } else { 2308 ring_offset = idx * 16; 2309 idx++; 2310 } 2311 2312 if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) 2313 continue; 2314 /* next_ring_offset after parsing input decls contains total size of 2315 * single vertex data, gs_next_vertex - current vertex index */ 2316 if (!ind) 2317 ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex; 2318 2319 memset(&output, 0, sizeof(struct r600_bytecode_output)); 2320 output.gpr = ctx->shader->output[i].gpr; 2321 output.elem_size = 3; 2322 output.comp_mask = 0xF; 2323 output.burst_count = 1; 2324 2325 if (ind) 2326 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 2327 else 2328 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2329 2330 switch (stream) { 2331 default: 2332 case 0: 2333 output.op = CF_OP_MEM_RING; break; 2334 case 1: 2335 output.op = CF_OP_MEM_RING1; break; 2336 case 2: 2337 output.op = CF_OP_MEM_RING2; break; 2338 case 3: 2339 output.op = CF_OP_MEM_RING3; break; 2340 } 2341 2342 if (ind) { 2343 output.array_base = ring_offset >> 2; /* in dwords */ 2344 output.array_size = 0xfff; 2345 output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream]; 2346 } else 2347 output.array_base = ring_offset >> 2; /* in dwords */ 2348 r600_bytecode_add_output(ctx->bc, &output); 2349 } 2350 2351 ++ctx->gs_next_vertex; 2352 return 0; 2353} 2354 2355 2356static int r600_fetch_tess_io_info(struct r600_shader_ctx *ctx) 2357{ 2358 int r; 2359 struct r600_bytecode_vtx vtx; 2360 int temp_val = ctx->temp_reg; 2361 /* need to store the TCS output somewhere */ 2362 r = single_alu_op2(ctx, ALU_OP1_MOV, 2363 temp_val, 0, 2364 V_SQ_ALU_SRC_LITERAL, 0, 2365 0, 0); 2366 if (r) 2367 return r; 2368 2369 /* used by VS/TCS */ 2370 if (ctx->tess_input_info) { 2371 /* fetch tcs input values into resv space */ 2372 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2373 vtx.op = FETCH_OP_VFETCH; 2374 vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2375 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2376 vtx.mega_fetch_count = 16; 2377 vtx.data_format = FMT_32_32_32_32; 2378 vtx.num_format_all = 2; 2379 vtx.format_comp_all = 1; 2380 vtx.use_const_fields = 0; 2381 vtx.endian = r600_endian_swap(32); 2382 vtx.srf_mode_all = 1; 2383 vtx.offset = 0; 2384 vtx.dst_gpr = ctx->tess_input_info; 2385 vtx.dst_sel_x = 0; 2386 vtx.dst_sel_y = 1; 2387 vtx.dst_sel_z = 2; 2388 vtx.dst_sel_w = 3; 2389 vtx.src_gpr = temp_val; 2390 vtx.src_sel_x = 0; 2391 2392 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2393 if (r) 2394 return r; 2395 } 2396 2397 /* used by TCS/TES */ 2398 if (ctx->tess_output_info) { 2399 /* fetch tcs output values into resv space */ 2400 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2401 vtx.op = FETCH_OP_VFETCH; 2402 vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2403 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2404 vtx.mega_fetch_count = 16; 2405 vtx.data_format = FMT_32_32_32_32; 2406 vtx.num_format_all = 2; 2407 vtx.format_comp_all = 1; 2408 vtx.use_const_fields = 0; 2409 vtx.endian = r600_endian_swap(32); 2410 vtx.srf_mode_all = 1; 2411 vtx.offset = 16; 2412 vtx.dst_gpr = ctx->tess_output_info; 2413 vtx.dst_sel_x = 0; 2414 vtx.dst_sel_y = 1; 2415 vtx.dst_sel_z = 2; 2416 vtx.dst_sel_w = 3; 2417 vtx.src_gpr = temp_val; 2418 vtx.src_sel_x = 0; 2419 2420 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2421 if (r) 2422 return r; 2423 } 2424 return 0; 2425} 2426 2427static int emit_lds_vs_writes(struct r600_shader_ctx *ctx) 2428{ 2429 int i, j, r; 2430 int temp_reg; 2431 2432 /* fetch tcs input values into input_vals */ 2433 ctx->tess_input_info = r600_get_temp(ctx); 2434 ctx->tess_output_info = 0; 2435 r = r600_fetch_tess_io_info(ctx); 2436 if (r) 2437 return r; 2438 2439 temp_reg = r600_get_temp(ctx); 2440 /* dst reg contains LDS address stride * idx */ 2441 /* MUL vertexID, vertex_dw_stride */ 2442 r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 2443 temp_reg, 0, 2444 ctx->tess_input_info, 1, 2445 0, 1); /* rel id in r0.y? */ 2446 if (r) 2447 return r; 2448 2449 for (i = 0; i < ctx->shader->noutput; i++) { 2450 struct r600_bytecode_alu alu; 2451 int param = r600_get_lds_unique_index(ctx->shader->output[i].name, ctx->shader->output[i].sid); 2452 2453 if (param) { 2454 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2455 temp_reg, 1, 2456 temp_reg, 0, 2457 V_SQ_ALU_SRC_LITERAL, param * 16); 2458 if (r) 2459 return r; 2460 } 2461 2462 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2463 temp_reg, 2, 2464 temp_reg, param ? 1 : 0, 2465 V_SQ_ALU_SRC_LITERAL, 8); 2466 if (r) 2467 return r; 2468 2469 2470 for (j = 0; j < 2; j++) { 2471 int chan = (j == 1) ? 2 : (param ? 1 : 0); 2472 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2473 alu.op = LDS_OP3_LDS_WRITE_REL; 2474 alu.src[0].sel = temp_reg; 2475 alu.src[0].chan = chan; 2476 alu.src[1].sel = ctx->shader->output[i].gpr; 2477 alu.src[1].chan = j * 2; 2478 alu.src[2].sel = ctx->shader->output[i].gpr; 2479 alu.src[2].chan = (j * 2) + 1; 2480 alu.last = 1; 2481 alu.dst.chan = 0; 2482 alu.lds_idx = 1; 2483 alu.is_lds_idx_op = true; 2484 r = r600_bytecode_add_alu(ctx->bc, &alu); 2485 if (r) 2486 return r; 2487 } 2488 } 2489 return 0; 2490} 2491 2492static int r600_store_tcs_output(struct r600_shader_ctx *ctx) 2493{ 2494 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2495 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 2496 int i, r, lasti; 2497 int temp_reg = r600_get_temp(ctx); 2498 struct r600_bytecode_alu alu; 2499 unsigned write_mask = dst->Register.WriteMask; 2500 2501 if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT) 2502 return 0; 2503 2504 r = get_lds_offset0(ctx, 1, temp_reg, dst->Register.Dimension ? false : true); 2505 if (r) 2506 return r; 2507 2508 /* the base address is now in temp.x */ 2509 r = r600_get_byte_address(ctx, temp_reg, 2510 &inst->Dst[0], NULL, ctx->tess_output_info, 1); 2511 if (r) 2512 return r; 2513 2514 /* LDS write */ 2515 lasti = tgsi_last_instruction(write_mask); 2516 for (i = 1; i <= lasti; i++) { 2517 2518 if (!(write_mask & (1 << i))) 2519 continue; 2520 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2521 temp_reg, i, 2522 temp_reg, 0, 2523 V_SQ_ALU_SRC_LITERAL, 4 * i); 2524 if (r) 2525 return r; 2526 } 2527 2528 for (i = 0; i <= lasti; i++) { 2529 if (!(write_mask & (1 << i))) 2530 continue; 2531 2532 if ((i == 0 && ((write_mask & 3) == 3)) || 2533 (i == 2 && ((write_mask & 0xc) == 0xc))) { 2534 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2535 alu.op = LDS_OP3_LDS_WRITE_REL; 2536 alu.src[0].sel = temp_reg; 2537 alu.src[0].chan = i; 2538 2539 alu.src[1].sel = dst->Register.Index; 2540 alu.src[1].sel += ctx->file_offset[dst->Register.File]; 2541 alu.src[1].chan = i; 2542 2543 alu.src[2].sel = dst->Register.Index; 2544 alu.src[2].sel += ctx->file_offset[dst->Register.File]; 2545 alu.src[2].chan = i + 1; 2546 alu.lds_idx = 1; 2547 alu.dst.chan = 0; 2548 alu.last = 1; 2549 alu.is_lds_idx_op = true; 2550 r = r600_bytecode_add_alu(ctx->bc, &alu); 2551 if (r) 2552 return r; 2553 i += 1; 2554 continue; 2555 } 2556 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2557 alu.op = LDS_OP2_LDS_WRITE; 2558 alu.src[0].sel = temp_reg; 2559 alu.src[0].chan = i; 2560 2561 alu.src[1].sel = dst->Register.Index; 2562 alu.src[1].sel += ctx->file_offset[dst->Register.File]; 2563 alu.src[1].chan = i; 2564 2565 alu.src[2].sel = V_SQ_ALU_SRC_0; 2566 alu.dst.chan = 0; 2567 alu.last = 1; 2568 alu.is_lds_idx_op = true; 2569 r = r600_bytecode_add_alu(ctx->bc, &alu); 2570 if (r) 2571 return r; 2572 } 2573 return 0; 2574} 2575 2576static int r600_tess_factor_read(struct r600_shader_ctx *ctx, 2577 int output_idx) 2578{ 2579 int param; 2580 unsigned temp_reg = r600_get_temp(ctx); 2581 unsigned name = ctx->shader->output[output_idx].name; 2582 int dreg = ctx->shader->output[output_idx].gpr; 2583 int r; 2584 2585 param = r600_get_lds_unique_index(name, 0); 2586 r = get_lds_offset0(ctx, 1, temp_reg, true); 2587 if (r) 2588 return r; 2589 2590 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2591 temp_reg, 0, 2592 temp_reg, 0, 2593 V_SQ_ALU_SRC_LITERAL, param * 16); 2594 if (r) 2595 return r; 2596 2597 do_lds_fetch_values(ctx, temp_reg, dreg); 2598 return 0; 2599} 2600 2601static int r600_emit_tess_factor(struct r600_shader_ctx *ctx) 2602{ 2603 int i; 2604 int stride, outer_comps, inner_comps; 2605 int tessinner_idx = -1, tessouter_idx = -1; 2606 int r; 2607 int temp_reg = r600_get_temp(ctx); 2608 int treg[3] = {-1, -1, -1}; 2609 struct r600_bytecode_alu alu; 2610 struct r600_bytecode_cf *cf_jump, *cf_pop; 2611 2612 /* only execute factor emission for invocation 0 */ 2613 /* PRED_SETE_INT __, R0.x, 0 */ 2614 memset(&alu, 0, sizeof(alu)); 2615 alu.op = ALU_OP2_PRED_SETE_INT; 2616 alu.src[0].chan = 2; 2617 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2618 alu.execute_mask = 1; 2619 alu.update_pred = 1; 2620 alu.last = 1; 2621 r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE); 2622 2623 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 2624 cf_jump = ctx->bc->cf_last; 2625 2626 treg[0] = r600_get_temp(ctx); 2627 switch (ctx->shader->tcs_prim_mode) { 2628 case PIPE_PRIM_LINES: 2629 stride = 8; /* 2 dwords, 1 vec2 store */ 2630 outer_comps = 2; 2631 inner_comps = 0; 2632 break; 2633 case PIPE_PRIM_TRIANGLES: 2634 stride = 16; /* 4 dwords, 1 vec4 store */ 2635 outer_comps = 3; 2636 inner_comps = 1; 2637 treg[1] = r600_get_temp(ctx); 2638 break; 2639 case PIPE_PRIM_QUADS: 2640 stride = 24; /* 6 dwords, 2 stores (vec4 + vec2) */ 2641 outer_comps = 4; 2642 inner_comps = 2; 2643 treg[1] = r600_get_temp(ctx); 2644 treg[2] = r600_get_temp(ctx); 2645 break; 2646 default: 2647 assert(0); 2648 return -1; 2649 } 2650 2651 /* R0 is InvocationID, RelPatchID, PatchID, tf_base */ 2652 /* TF_WRITE takes index in R.x, value in R.y */ 2653 for (i = 0; i < ctx->shader->noutput; i++) { 2654 if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSINNER) 2655 tessinner_idx = i; 2656 if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSOUTER) 2657 tessouter_idx = i; 2658 } 2659 2660 if (tessouter_idx == -1) 2661 return -1; 2662 2663 if (tessinner_idx == -1 && inner_comps) 2664 return -1; 2665 2666 if (tessouter_idx != -1) { 2667 r = r600_tess_factor_read(ctx, tessouter_idx); 2668 if (r) 2669 return r; 2670 } 2671 2672 if (tessinner_idx != -1) { 2673 r = r600_tess_factor_read(ctx, tessinner_idx); 2674 if (r) 2675 return r; 2676 } 2677 2678 /* r.x = tf_base(r0.w) + relpatchid(r0.y) * tf_stride */ 2679 /* r.x = relpatchid(r0.y) * tf_stride */ 2680 2681 /* multiply incoming r0.y * stride - t.x = r0.y * stride */ 2682 /* add incoming r0.w to it: t.x = t.x + r0.w */ 2683 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 2684 temp_reg, 0, 2685 0, 1, 2686 V_SQ_ALU_SRC_LITERAL, stride, 2687 0, 3); 2688 if (r) 2689 return r; 2690 2691 for (i = 0; i < outer_comps + inner_comps; i++) { 2692 int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx; 2693 int out_comp = i >= outer_comps ? i - outer_comps : i; 2694 2695 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2696 treg[i / 2], (2 * (i % 2)), 2697 temp_reg, 0, 2698 V_SQ_ALU_SRC_LITERAL, 4 * i); 2699 if (r) 2700 return r; 2701 r = single_alu_op2(ctx, ALU_OP1_MOV, 2702 treg[i / 2], 1 + (2 * (i%2)), 2703 ctx->shader->output[out_idx].gpr, out_comp, 2704 0, 0); 2705 if (r) 2706 return r; 2707 } 2708 for (i = 0; i < outer_comps + inner_comps; i++) { 2709 struct r600_bytecode_gds gds; 2710 2711 memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 2712 gds.src_gpr = treg[i / 2]; 2713 gds.src_sel_x = 2 * (i % 2); 2714 gds.src_sel_y = 1 + (2 * (i % 2)); 2715 gds.src_sel_z = 4; 2716 gds.dst_sel_x = 7; 2717 gds.dst_sel_y = 7; 2718 gds.dst_sel_z = 7; 2719 gds.dst_sel_w = 7; 2720 gds.op = FETCH_OP_TF_WRITE; 2721 r = r600_bytecode_add_gds(ctx->bc, &gds); 2722 if (r) 2723 return r; 2724 } 2725 2726 // Patch up jump label 2727 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 2728 cf_pop = ctx->bc->cf_last; 2729 2730 cf_jump->cf_addr = cf_pop->id + 2; 2731 cf_jump->pop_count = 1; 2732 cf_pop->cf_addr = cf_pop->id + 2; 2733 cf_pop->pop_count = 1; 2734 2735 return 0; 2736} 2737 2738static int r600_shader_from_tgsi(struct r600_context *rctx, 2739 struct r600_pipe_shader *pipeshader, 2740 union r600_shader_key key) 2741{ 2742 struct r600_screen *rscreen = rctx->screen; 2743 struct r600_shader *shader = &pipeshader->shader; 2744 struct tgsi_token *tokens = pipeshader->selector->tokens; 2745 struct pipe_stream_output_info so = pipeshader->selector->so; 2746 struct tgsi_full_immediate *immediate; 2747 struct r600_shader_ctx ctx; 2748 struct r600_bytecode_output output[32]; 2749 unsigned output_done, noutput; 2750 unsigned opcode; 2751 int i, j, k, r = 0; 2752 int next_param_base = 0, next_clip_base; 2753 int max_color_exports = MAX2(key.ps.nr_cbufs, 1); 2754 /* Declarations used by llvm code */ 2755 bool use_llvm = false; 2756 bool indirect_gprs; 2757 bool ring_outputs = false; 2758 bool lds_outputs = false; 2759 bool lds_inputs = false; 2760 bool pos_emitted = false; 2761 2762#ifdef R600_USE_LLVM 2763 use_llvm = rscreen->b.debug_flags & DBG_LLVM; 2764#endif 2765 ctx.bc = &shader->bc; 2766 ctx.shader = shader; 2767 ctx.native_integers = true; 2768 2769 2770 r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, 2771 rscreen->has_compressed_msaa_texturing); 2772 ctx.tokens = tokens; 2773 tgsi_scan_shader(tokens, &ctx.info); 2774 shader->indirect_files = ctx.info.indirect_files; 2775 2776 shader->uses_doubles = ctx.info.uses_doubles; 2777 2778 indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER)); 2779 tgsi_parse_init(&ctx.parse, tokens); 2780 ctx.type = ctx.info.processor; 2781 shader->processor_type = ctx.type; 2782 ctx.bc->type = shader->processor_type; 2783 2784 switch (ctx.type) { 2785 case TGSI_PROCESSOR_VERTEX: 2786 shader->vs_as_gs_a = key.vs.as_gs_a; 2787 shader->vs_as_es = key.vs.as_es; 2788 shader->vs_as_ls = key.vs.as_ls; 2789 if (shader->vs_as_es) 2790 ring_outputs = true; 2791 if (shader->vs_as_ls) 2792 lds_outputs = true; 2793 break; 2794 case TGSI_PROCESSOR_GEOMETRY: 2795 ring_outputs = true; 2796 break; 2797 case TGSI_PROCESSOR_TESS_CTRL: 2798 shader->tcs_prim_mode = key.tcs.prim_mode; 2799 lds_outputs = true; 2800 lds_inputs = true; 2801 break; 2802 case TGSI_PROCESSOR_TESS_EVAL: 2803 shader->tes_as_es = key.tes.as_es; 2804 lds_inputs = true; 2805 if (shader->tes_as_es) 2806 ring_outputs = true; 2807 break; 2808 case TGSI_PROCESSOR_FRAGMENT: 2809 shader->two_side = key.ps.color_two_side; 2810 break; 2811 default: 2812 break; 2813 } 2814 2815 if (shader->vs_as_es || shader->tes_as_es) { 2816 ctx.gs_for_vs = &rctx->gs_shader->current->shader; 2817 } else { 2818 ctx.gs_for_vs = NULL; 2819 } 2820 2821 ctx.next_ring_offset = 0; 2822 ctx.gs_out_ring_offset = 0; 2823 ctx.gs_next_vertex = 0; 2824 ctx.gs_stream_output_info = &so; 2825 2826 ctx.face_gpr = -1; 2827 ctx.fixed_pt_position_gpr = -1; 2828 ctx.fragcoord_input = -1; 2829 ctx.colors_used = 0; 2830 ctx.clip_vertex_write = 0; 2831 2832 shader->nr_ps_color_exports = 0; 2833 shader->nr_ps_max_color_exports = 0; 2834 2835 2836 /* register allocations */ 2837 /* Values [0,127] correspond to GPR[0..127]. 2838 * Values [128,159] correspond to constant buffer bank 0 2839 * Values [160,191] correspond to constant buffer bank 1 2840 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 2841 * Values [256,287] correspond to constant buffer bank 2 (EG) 2842 * Values [288,319] correspond to constant buffer bank 3 (EG) 2843 * Other special values are shown in the list below. 2844 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 2845 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 2846 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 2847 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 2848 * 248 SQ_ALU_SRC_0: special constant 0.0. 2849 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 2850 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 2851 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 2852 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 2853 * 253 SQ_ALU_SRC_LITERAL: literal constant. 2854 * 254 SQ_ALU_SRC_PV: previous vector result. 2855 * 255 SQ_ALU_SRC_PS: previous scalar result. 2856 */ 2857 for (i = 0; i < TGSI_FILE_COUNT; i++) { 2858 ctx.file_offset[i] = 0; 2859 } 2860 2861#ifdef R600_USE_LLVM 2862 if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) { 2863 fprintf(stderr, "Warning: R600 LLVM backend does not support " 2864 "indirect adressing. Falling back to TGSI " 2865 "backend.\n"); 2866 use_llvm = 0; 2867 } 2868#endif 2869 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 2870 ctx.file_offset[TGSI_FILE_INPUT] = 1; 2871 if (!use_llvm) { 2872 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); 2873 } 2874 } 2875 if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { 2876 if (ctx.bc->chip_class >= EVERGREEN) 2877 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 2878 else 2879 ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); 2880 } 2881 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2882 /* FIXME 1 would be enough in some cases (3 or less input vertices) */ 2883 ctx.file_offset[TGSI_FILE_INPUT] = 2; 2884 } 2885 ctx.use_llvm = use_llvm; 2886 2887 if (use_llvm) { 2888 ctx.file_offset[TGSI_FILE_OUTPUT] = 2889 ctx.file_offset[TGSI_FILE_INPUT]; 2890 } else { 2891 ctx.file_offset[TGSI_FILE_OUTPUT] = 2892 ctx.file_offset[TGSI_FILE_INPUT] + 2893 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 2894 } 2895 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 2896 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 2897 2898 /* Outside the GPR range. This will be translated to one of the 2899 * kcache banks later. */ 2900 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 2901 2902 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 2903 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 2904 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 2905 ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1; 2906 ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2; 2907 2908 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) { 2909 ctx.tess_input_info = ctx.bc->ar_reg + 3; 2910 ctx.tess_output_info = ctx.bc->ar_reg + 4; 2911 ctx.temp_reg = ctx.bc->ar_reg + 5; 2912 } else if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) { 2913 ctx.tess_input_info = 0; 2914 ctx.tess_output_info = ctx.bc->ar_reg + 3; 2915 ctx.temp_reg = ctx.bc->ar_reg + 4; 2916 } else if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2917 ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3; 2918 ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4; 2919 ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5; 2920 ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6; 2921 ctx.temp_reg = ctx.bc->ar_reg + 7; 2922 } else { 2923 ctx.temp_reg = ctx.bc->ar_reg + 3; 2924 } 2925 2926 shader->max_arrays = 0; 2927 shader->num_arrays = 0; 2928 if (indirect_gprs) { 2929 2930 if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { 2931 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], 2932 ctx.file_offset[TGSI_FILE_OUTPUT] - 2933 ctx.file_offset[TGSI_FILE_INPUT], 2934 0x0F); 2935 } 2936 if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2937 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT], 2938 ctx.file_offset[TGSI_FILE_TEMPORARY] - 2939 ctx.file_offset[TGSI_FILE_OUTPUT], 2940 0x0F); 2941 } 2942 } 2943 2944 ctx.nliterals = 0; 2945 ctx.literals = NULL; 2946 2947 shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]; 2948 shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 2949 shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]; 2950 2951 if (shader->vs_as_gs_a) 2952 vs_add_primid_output(&ctx, key.vs.prim_id_out); 2953 2954 if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) 2955 r600_fetch_tess_io_info(&ctx); 2956 2957 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 2958 tgsi_parse_token(&ctx.parse); 2959 switch (ctx.parse.FullToken.Token.Type) { 2960 case TGSI_TOKEN_TYPE_IMMEDIATE: 2961 immediate = &ctx.parse.FullToken.FullImmediate; 2962 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 2963 if(ctx.literals == NULL) { 2964 r = -ENOMEM; 2965 goto out_err; 2966 } 2967 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 2968 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 2969 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 2970 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 2971 ctx.nliterals++; 2972 break; 2973 case TGSI_TOKEN_TYPE_DECLARATION: 2974 r = tgsi_declaration(&ctx); 2975 if (r) 2976 goto out_err; 2977 break; 2978 case TGSI_TOKEN_TYPE_INSTRUCTION: 2979 case TGSI_TOKEN_TYPE_PROPERTY: 2980 break; 2981 default: 2982 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 2983 r = -EINVAL; 2984 goto out_err; 2985 } 2986 } 2987 2988 shader->ring_item_sizes[0] = ctx.next_ring_offset; 2989 shader->ring_item_sizes[1] = 0; 2990 shader->ring_item_sizes[2] = 0; 2991 shader->ring_item_sizes[3] = 0; 2992 2993 /* Process two side if needed */ 2994 if (shader->two_side && ctx.colors_used) { 2995 int i, count = ctx.shader->ninput; 2996 unsigned next_lds_loc = ctx.shader->nlds; 2997 2998 /* additional inputs will be allocated right after the existing inputs, 2999 * we won't need them after the color selection, so we don't need to 3000 * reserve these gprs for the rest of the shader code and to adjust 3001 * output offsets etc. */ 3002 int gpr = ctx.file_offset[TGSI_FILE_INPUT] + 3003 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 3004 3005 /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */ 3006 if (ctx.face_gpr == -1) { 3007 i = ctx.shader->ninput++; 3008 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; 3009 ctx.shader->input[i].spi_sid = 0; 3010 ctx.shader->input[i].gpr = gpr++; 3011 ctx.face_gpr = ctx.shader->input[i].gpr; 3012 } 3013 3014 for (i = 0; i < count; i++) { 3015 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { 3016 int ni = ctx.shader->ninput++; 3017 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); 3018 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 3019 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); 3020 ctx.shader->input[ni].gpr = gpr++; 3021 // TGSI to LLVM needs to know the lds position of inputs. 3022 // Non LLVM path computes it later (in process_twoside_color) 3023 ctx.shader->input[ni].lds_pos = next_lds_loc++; 3024 ctx.shader->input[i].back_color_input = ni; 3025 if (ctx.bc->chip_class >= EVERGREEN) { 3026 if ((r = evergreen_interp_input(&ctx, ni))) 3027 return r; 3028 } 3029 } 3030 } 3031 } 3032 3033/* LLVM backend setup */ 3034#ifdef R600_USE_LLVM 3035 if (use_llvm) { 3036 struct radeon_llvm_context radeon_llvm_ctx; 3037 LLVMModuleRef mod; 3038 bool dump = r600_can_dump_shader(&rscreen->b, tokens); 3039 boolean use_kill = false; 3040 3041 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); 3042 radeon_llvm_ctx.type = ctx.type; 3043 radeon_llvm_ctx.two_side = shader->two_side; 3044 radeon_llvm_ctx.face_gpr = ctx.face_gpr; 3045 radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1; 3046 radeon_llvm_ctx.r600_inputs = ctx.shader->input; 3047 radeon_llvm_ctx.r600_outputs = ctx.shader->output; 3048 radeon_llvm_ctx.color_buffer_count = max_color_exports; 3049 radeon_llvm_ctx.chip_class = ctx.bc->chip_class; 3050 radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN); 3051 radeon_llvm_ctx.stream_outputs = &so; 3052 radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one; 3053 radeon_llvm_ctx.has_compressed_msaa_texturing = 3054 ctx.bc->has_compressed_msaa_texturing; 3055 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); 3056 ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp; 3057 ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers; 3058 3059 if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, dump)) { 3060 radeon_llvm_dispose(&radeon_llvm_ctx); 3061 use_llvm = 0; 3062 fprintf(stderr, "R600 LLVM backend failed to compile " 3063 "shader. Falling back to TGSI\n"); 3064 } else { 3065 ctx.file_offset[TGSI_FILE_OUTPUT] = 3066 ctx.file_offset[TGSI_FILE_INPUT]; 3067 } 3068 if (use_kill) 3069 ctx.shader->uses_kill = use_kill; 3070 radeon_llvm_dispose(&radeon_llvm_ctx); 3071 } 3072#endif 3073/* End of LLVM backend setup */ 3074 3075 if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) 3076 shader->nr_ps_max_color_exports = 8; 3077 3078 if (!use_llvm) { 3079 if (ctx.fragcoord_input >= 0) { 3080 if (ctx.bc->chip_class == CAYMAN) { 3081 for (j = 0 ; j < 4; j++) { 3082 struct r600_bytecode_alu alu; 3083 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3084 alu.op = ALU_OP1_RECIP_IEEE; 3085 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3086 alu.src[0].chan = 3; 3087 3088 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 3089 alu.dst.chan = j; 3090 alu.dst.write = (j == 3); 3091 alu.last = 1; 3092 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3093 return r; 3094 } 3095 } else { 3096 struct r600_bytecode_alu alu; 3097 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3098 alu.op = ALU_OP1_RECIP_IEEE; 3099 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3100 alu.src[0].chan = 3; 3101 3102 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 3103 alu.dst.chan = 3; 3104 alu.dst.write = 1; 3105 alu.last = 1; 3106 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3107 return r; 3108 } 3109 } 3110 3111 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3112 struct r600_bytecode_alu alu; 3113 int r; 3114 3115 /* GS thread with no output workaround - emit a cut at start of GS */ 3116 if (ctx.bc->chip_class == R600) 3117 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); 3118 3119 for (j = 0; j < 4; j++) { 3120 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3121 alu.op = ALU_OP1_MOV; 3122 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 3123 alu.src[0].value = 0; 3124 alu.dst.sel = ctx.gs_export_gpr_tregs[j]; 3125 alu.dst.write = 1; 3126 alu.last = 1; 3127 r = r600_bytecode_add_alu(ctx.bc, &alu); 3128 if (r) 3129 return r; 3130 } 3131 } 3132 3133 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) 3134 r600_fetch_tess_io_info(&ctx); 3135 3136 if (shader->two_side && ctx.colors_used) { 3137 if ((r = process_twoside_color_inputs(&ctx))) 3138 return r; 3139 } 3140 3141 tgsi_parse_init(&ctx.parse, tokens); 3142 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 3143 tgsi_parse_token(&ctx.parse); 3144 switch (ctx.parse.FullToken.Token.Type) { 3145 case TGSI_TOKEN_TYPE_INSTRUCTION: 3146 r = tgsi_is_supported(&ctx); 3147 if (r) 3148 goto out_err; 3149 ctx.max_driver_temp_used = 0; 3150 /* reserve first tmp for everyone */ 3151 r600_get_temp(&ctx); 3152 3153 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 3154 if ((r = tgsi_split_constant(&ctx))) 3155 goto out_err; 3156 if ((r = tgsi_split_literal_constant(&ctx))) 3157 goto out_err; 3158 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3159 if ((r = tgsi_split_gs_inputs(&ctx))) 3160 goto out_err; 3161 } else if (lds_inputs) { 3162 if ((r = tgsi_split_lds_inputs(&ctx))) 3163 goto out_err; 3164 } 3165 if (ctx.bc->chip_class == CAYMAN) 3166 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 3167 else if (ctx.bc->chip_class >= EVERGREEN) 3168 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 3169 else 3170 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 3171 r = ctx.inst_info->process(&ctx); 3172 if (r) 3173 goto out_err; 3174 3175 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) { 3176 r = r600_store_tcs_output(&ctx); 3177 if (r) 3178 goto out_err; 3179 } 3180 break; 3181 default: 3182 break; 3183 } 3184 } 3185 } 3186 3187 /* Reset the temporary register counter. */ 3188 ctx.max_driver_temp_used = 0; 3189 3190 noutput = shader->noutput; 3191 3192 if (!ring_outputs && ctx.clip_vertex_write) { 3193 unsigned clipdist_temp[2]; 3194 3195 clipdist_temp[0] = r600_get_temp(&ctx); 3196 clipdist_temp[1] = r600_get_temp(&ctx); 3197 3198 /* need to convert a clipvertex write into clipdistance writes and not export 3199 the clip vertex anymore */ 3200 3201 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 3202 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 3203 shader->output[noutput].gpr = clipdist_temp[0]; 3204 noutput++; 3205 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 3206 shader->output[noutput].gpr = clipdist_temp[1]; 3207 noutput++; 3208 3209 /* reset spi_sid for clipvertex output to avoid confusing spi */ 3210 shader->output[ctx.cv_output].spi_sid = 0; 3211 3212 shader->clip_dist_write = 0xFF; 3213 3214 for (i = 0; i < 8; i++) { 3215 int oreg = i >> 2; 3216 int ochan = i & 3; 3217 3218 for (j = 0; j < 4; j++) { 3219 struct r600_bytecode_alu alu; 3220 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3221 alu.op = ALU_OP2_DOT4; 3222 alu.src[0].sel = shader->output[ctx.cv_output].gpr; 3223 alu.src[0].chan = j; 3224 3225 alu.src[1].sel = 512 + i; 3226 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 3227 alu.src[1].chan = j; 3228 3229 alu.dst.sel = clipdist_temp[oreg]; 3230 alu.dst.chan = j; 3231 alu.dst.write = (j == ochan); 3232 if (j == 3) 3233 alu.last = 1; 3234 if (!use_llvm) 3235 r = r600_bytecode_add_alu(ctx.bc, &alu); 3236 if (r) 3237 return r; 3238 } 3239 } 3240 } 3241 3242 /* Add stream outputs. */ 3243 if (!use_llvm && so.num_outputs) { 3244 bool emit = false; 3245 if (!lds_outputs && !ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX) 3246 emit = true; 3247 if (!ring_outputs && ctx.type == TGSI_PROCESSOR_TESS_EVAL) 3248 emit = true; 3249 if (emit) 3250 emit_streamout(&ctx, &so, -1, NULL); 3251 } 3252 pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 3253 convert_edgeflag_to_int(&ctx); 3254 3255 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) 3256 r600_emit_tess_factor(&ctx); 3257 3258 if (lds_outputs) { 3259 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 3260 if (ctx.shader->noutput) 3261 emit_lds_vs_writes(&ctx); 3262 } 3263 } else if (ring_outputs) { 3264 if (shader->vs_as_es || shader->tes_as_es) { 3265 ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx); 3266 ctx.gs_export_gpr_tregs[1] = -1; 3267 ctx.gs_export_gpr_tregs[2] = -1; 3268 ctx.gs_export_gpr_tregs[3] = -1; 3269 3270 emit_gs_ring_writes(&ctx, &so, -1, FALSE); 3271 } 3272 } else { 3273 /* Export output */ 3274 next_clip_base = shader->vs_out_misc_write ? 62 : 61; 3275 3276 for (i = 0, j = 0; i < noutput; i++, j++) { 3277 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3278 output[j].gpr = shader->output[i].gpr; 3279 output[j].elem_size = 3; 3280 output[j].swizzle_x = 0; 3281 output[j].swizzle_y = 1; 3282 output[j].swizzle_z = 2; 3283 output[j].swizzle_w = 3; 3284 output[j].burst_count = 1; 3285 output[j].type = -1; 3286 output[j].op = CF_OP_EXPORT; 3287 switch (ctx.type) { 3288 case TGSI_PROCESSOR_VERTEX: 3289 case TGSI_PROCESSOR_TESS_EVAL: 3290 switch (shader->output[i].name) { 3291 case TGSI_SEMANTIC_POSITION: 3292 output[j].array_base = 60; 3293 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3294 pos_emitted = true; 3295 break; 3296 3297 case TGSI_SEMANTIC_PSIZE: 3298 output[j].array_base = 61; 3299 output[j].swizzle_y = 7; 3300 output[j].swizzle_z = 7; 3301 output[j].swizzle_w = 7; 3302 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3303 pos_emitted = true; 3304 break; 3305 case TGSI_SEMANTIC_EDGEFLAG: 3306 output[j].array_base = 61; 3307 output[j].swizzle_x = 7; 3308 output[j].swizzle_y = 0; 3309 output[j].swizzle_z = 7; 3310 output[j].swizzle_w = 7; 3311 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3312 pos_emitted = true; 3313 break; 3314 case TGSI_SEMANTIC_LAYER: 3315 /* spi_sid is 0 for outputs that are 3316 * not consumed by PS */ 3317 if (shader->output[i].spi_sid) { 3318 output[j].array_base = next_param_base++; 3319 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3320 j++; 3321 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3322 } 3323 output[j].array_base = 61; 3324 output[j].swizzle_x = 7; 3325 output[j].swizzle_y = 7; 3326 output[j].swizzle_z = 0; 3327 output[j].swizzle_w = 7; 3328 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3329 pos_emitted = true; 3330 break; 3331 case TGSI_SEMANTIC_VIEWPORT_INDEX: 3332 /* spi_sid is 0 for outputs that are 3333 * not consumed by PS */ 3334 if (shader->output[i].spi_sid) { 3335 output[j].array_base = next_param_base++; 3336 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3337 j++; 3338 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3339 } 3340 output[j].array_base = 61; 3341 output[j].swizzle_x = 7; 3342 output[j].swizzle_y = 7; 3343 output[j].swizzle_z = 7; 3344 output[j].swizzle_w = 0; 3345 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3346 pos_emitted = true; 3347 break; 3348 case TGSI_SEMANTIC_CLIPVERTEX: 3349 j--; 3350 break; 3351 case TGSI_SEMANTIC_CLIPDIST: 3352 output[j].array_base = next_clip_base++; 3353 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3354 pos_emitted = true; 3355 /* spi_sid is 0 for clipdistance outputs that were generated 3356 * for clipvertex - we don't need to pass them to PS */ 3357 if (shader->output[i].spi_sid) { 3358 j++; 3359 /* duplicate it as PARAM to pass to the pixel shader */ 3360 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3361 output[j].array_base = next_param_base++; 3362 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3363 } 3364 break; 3365 case TGSI_SEMANTIC_FOG: 3366 output[j].swizzle_y = 4; /* 0 */ 3367 output[j].swizzle_z = 4; /* 0 */ 3368 output[j].swizzle_w = 5; /* 1 */ 3369 break; 3370 case TGSI_SEMANTIC_PRIMID: 3371 output[j].swizzle_x = 2; 3372 output[j].swizzle_y = 4; /* 0 */ 3373 output[j].swizzle_z = 4; /* 0 */ 3374 output[j].swizzle_w = 4; /* 0 */ 3375 break; 3376 } 3377 3378 break; 3379 case TGSI_PROCESSOR_FRAGMENT: 3380 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 3381 /* never export more colors than the number of CBs */ 3382 if (shader->output[i].sid >= max_color_exports) { 3383 /* skip export */ 3384 j--; 3385 continue; 3386 } 3387 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 3388 output[j].array_base = shader->output[i].sid; 3389 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3390 shader->nr_ps_color_exports++; 3391 if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { 3392 for (k = 1; k < max_color_exports; k++) { 3393 j++; 3394 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3395 output[j].gpr = shader->output[i].gpr; 3396 output[j].elem_size = 3; 3397 output[j].swizzle_x = 0; 3398 output[j].swizzle_y = 1; 3399 output[j].swizzle_z = 2; 3400 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 3401 output[j].burst_count = 1; 3402 output[j].array_base = k; 3403 output[j].op = CF_OP_EXPORT; 3404 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3405 shader->nr_ps_color_exports++; 3406 } 3407 } 3408 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 3409 output[j].array_base = 61; 3410 output[j].swizzle_x = 2; 3411 output[j].swizzle_y = 7; 3412 output[j].swizzle_z = output[j].swizzle_w = 7; 3413 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3414 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 3415 output[j].array_base = 61; 3416 output[j].swizzle_x = 7; 3417 output[j].swizzle_y = 1; 3418 output[j].swizzle_z = output[j].swizzle_w = 7; 3419 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3420 } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 3421 output[j].array_base = 61; 3422 output[j].swizzle_x = 7; 3423 output[j].swizzle_y = 7; 3424 output[j].swizzle_z = 0; 3425 output[j].swizzle_w = 7; 3426 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3427 } else { 3428 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 3429 r = -EINVAL; 3430 goto out_err; 3431 } 3432 break; 3433 case TGSI_PROCESSOR_TESS_CTRL: 3434 break; 3435 default: 3436 R600_ERR("unsupported processor type %d\n", ctx.type); 3437 r = -EINVAL; 3438 goto out_err; 3439 } 3440 3441 if (output[j].type==-1) { 3442 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3443 output[j].array_base = next_param_base++; 3444 } 3445 } 3446 3447 /* add fake position export */ 3448 if ((ctx.type == TGSI_PROCESSOR_VERTEX || ctx.type == TGSI_PROCESSOR_TESS_EVAL) && pos_emitted == false) { 3449 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3450 output[j].gpr = 0; 3451 output[j].elem_size = 3; 3452 output[j].swizzle_x = 7; 3453 output[j].swizzle_y = 7; 3454 output[j].swizzle_z = 7; 3455 output[j].swizzle_w = 7; 3456 output[j].burst_count = 1; 3457 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3458 output[j].array_base = 60; 3459 output[j].op = CF_OP_EXPORT; 3460 j++; 3461 } 3462 3463 /* add fake param output for vertex shader if no param is exported */ 3464 if ((ctx.type == TGSI_PROCESSOR_VERTEX || ctx.type == TGSI_PROCESSOR_TESS_EVAL) && next_param_base == 0) { 3465 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3466 output[j].gpr = 0; 3467 output[j].elem_size = 3; 3468 output[j].swizzle_x = 7; 3469 output[j].swizzle_y = 7; 3470 output[j].swizzle_z = 7; 3471 output[j].swizzle_w = 7; 3472 output[j].burst_count = 1; 3473 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3474 output[j].array_base = 0; 3475 output[j].op = CF_OP_EXPORT; 3476 j++; 3477 } 3478 3479 /* add fake pixel export */ 3480 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { 3481 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3482 output[j].gpr = 0; 3483 output[j].elem_size = 3; 3484 output[j].swizzle_x = 7; 3485 output[j].swizzle_y = 7; 3486 output[j].swizzle_z = 7; 3487 output[j].swizzle_w = 7; 3488 output[j].burst_count = 1; 3489 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3490 output[j].array_base = 0; 3491 output[j].op = CF_OP_EXPORT; 3492 j++; 3493 shader->nr_ps_color_exports++; 3494 } 3495 3496 noutput = j; 3497 3498 /* set export done on last export of each type */ 3499 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 3500 if (!(output_done & (1 << output[i].type))) { 3501 output_done |= (1 << output[i].type); 3502 output[i].op = CF_OP_EXPORT_DONE; 3503 } 3504 } 3505 /* add output to bytecode */ 3506 if (!use_llvm) { 3507 for (i = 0; i < noutput; i++) { 3508 r = r600_bytecode_add_output(ctx.bc, &output[i]); 3509 if (r) 3510 goto out_err; 3511 } 3512 } 3513 } 3514 3515 /* add program end */ 3516 if (!use_llvm) { 3517 if (ctx.bc->chip_class == CAYMAN) 3518 cm_bytecode_add_cf_end(ctx.bc); 3519 else { 3520 const struct cf_op_info *last = NULL; 3521 3522 if (ctx.bc->cf_last) 3523 last = r600_isa_cf(ctx.bc->cf_last->op); 3524 3525 /* alu clause instructions don't have EOP bit, so add NOP */ 3526 if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS) 3527 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 3528 3529 ctx.bc->cf_last->end_of_program = 1; 3530 } 3531 } 3532 3533 /* check GPR limit - we have 124 = 128 - 4 3534 * (4 are reserved as alu clause temporary registers) */ 3535 if (ctx.bc->ngpr > 124) { 3536 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 3537 r = -ENOMEM; 3538 goto out_err; 3539 } 3540 3541 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3542 if ((r = generate_gs_copy_shader(rctx, pipeshader, &so))) 3543 return r; 3544 } 3545 3546 free(ctx.literals); 3547 tgsi_parse_free(&ctx.parse); 3548 return 0; 3549out_err: 3550 free(ctx.literals); 3551 tgsi_parse_free(&ctx.parse); 3552 return r; 3553} 3554 3555static int tgsi_unsupported(struct r600_shader_ctx *ctx) 3556{ 3557 const unsigned tgsi_opcode = 3558 ctx->parse.FullToken.FullInstruction.Instruction.Opcode; 3559 R600_ERR("%s tgsi opcode unsupported\n", 3560 tgsi_get_opcode_name(tgsi_opcode)); 3561 return -EINVAL; 3562} 3563 3564static int tgsi_end(struct r600_shader_ctx *ctx) 3565{ 3566 return 0; 3567} 3568 3569static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 3570 const struct r600_shader_src *shader_src, 3571 unsigned chan) 3572{ 3573 bc_src->sel = shader_src->sel; 3574 bc_src->chan = shader_src->swizzle[chan]; 3575 bc_src->neg = shader_src->neg; 3576 bc_src->abs = shader_src->abs; 3577 bc_src->rel = shader_src->rel; 3578 bc_src->value = shader_src->value[bc_src->chan]; 3579 bc_src->kc_bank = shader_src->kc_bank; 3580 bc_src->kc_rel = shader_src->kc_rel; 3581} 3582 3583static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 3584{ 3585 bc_src->abs = 1; 3586 bc_src->neg = 0; 3587} 3588 3589static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 3590{ 3591 bc_src->neg = !bc_src->neg; 3592} 3593 3594static void tgsi_dst(struct r600_shader_ctx *ctx, 3595 const struct tgsi_full_dst_register *tgsi_dst, 3596 unsigned swizzle, 3597 struct r600_bytecode_alu_dst *r600_dst) 3598{ 3599 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3600 3601 r600_dst->sel = tgsi_dst->Register.Index; 3602 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 3603 r600_dst->chan = swizzle; 3604 r600_dst->write = 1; 3605 if (inst->Instruction.Saturate) { 3606 r600_dst->clamp = 1; 3607 } 3608 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) { 3609 if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) { 3610 return; 3611 } 3612 } 3613 if (tgsi_dst->Register.Indirect) 3614 r600_dst->rel = V_SQ_REL_RELATIVE; 3615 3616} 3617 3618static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap) 3619{ 3620 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3621 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3622 struct r600_bytecode_alu alu; 3623 int i, j, r, lasti = tgsi_last_instruction(write_mask); 3624 int use_tmp = 0; 3625 3626 if (singledest) { 3627 switch (write_mask) { 3628 case 0x1: 3629 write_mask = 0x3; 3630 break; 3631 case 0x2: 3632 use_tmp = 1; 3633 write_mask = 0x3; 3634 break; 3635 case 0x4: 3636 write_mask = 0xc; 3637 break; 3638 case 0x8: 3639 write_mask = 0xc; 3640 use_tmp = 3; 3641 break; 3642 } 3643 } 3644 3645 lasti = tgsi_last_instruction(write_mask); 3646 for (i = 0; i <= lasti; i++) { 3647 3648 if (!(write_mask & (1 << i))) 3649 continue; 3650 3651 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3652 3653 if (singledest) { 3654 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3655 if (use_tmp) { 3656 alu.dst.sel = ctx->temp_reg; 3657 alu.dst.chan = i; 3658 alu.dst.write = 1; 3659 } 3660 if (i == 1 || i == 3) 3661 alu.dst.write = 0; 3662 } else 3663 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3664 3665 alu.op = ctx->inst_info->op; 3666 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) { 3667 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3668 } else if (!swap) { 3669 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3670 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 3671 } 3672 } else { 3673 r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i)); 3674 r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i)); 3675 } 3676 3677 /* handle some special cases */ 3678 if (i == 1 || i == 3) { 3679 switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { 3680 case TGSI_OPCODE_SUB: 3681 r600_bytecode_src_toggle_neg(&alu.src[1]); 3682 break; 3683 case TGSI_OPCODE_DABS: 3684 r600_bytecode_src_set_abs(&alu.src[0]); 3685 break; 3686 default: 3687 break; 3688 } 3689 } 3690 if (i == lasti) { 3691 alu.last = 1; 3692 } 3693 r = r600_bytecode_add_alu(ctx->bc, &alu); 3694 if (r) 3695 return r; 3696 } 3697 3698 if (use_tmp) { 3699 write_mask = inst->Dst[0].Register.WriteMask; 3700 3701 /* move result from temp to dst */ 3702 for (i = 0; i <= lasti; i++) { 3703 if (!(write_mask & (1 << i))) 3704 continue; 3705 3706 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3707 alu.op = ALU_OP1_MOV; 3708 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3709 alu.src[0].sel = ctx->temp_reg; 3710 alu.src[0].chan = use_tmp - 1; 3711 alu.last = (i == lasti); 3712 3713 r = r600_bytecode_add_alu(ctx->bc, &alu); 3714 if (r) 3715 return r; 3716 } 3717 } 3718 return 0; 3719} 3720 3721static int tgsi_op2_64(struct r600_shader_ctx *ctx) 3722{ 3723 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3724 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3725 /* confirm writemasking */ 3726 if ((write_mask & 0x3) != 0x3 && 3727 (write_mask & 0xc) != 0xc) { 3728 fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask); 3729 return -1; 3730 } 3731 return tgsi_op2_64_params(ctx, false, false); 3732} 3733 3734static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx) 3735{ 3736 return tgsi_op2_64_params(ctx, true, false); 3737} 3738 3739static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx) 3740{ 3741 return tgsi_op2_64_params(ctx, true, true); 3742} 3743 3744static int tgsi_op3_64(struct r600_shader_ctx *ctx) 3745{ 3746 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3747 struct r600_bytecode_alu alu; 3748 int i, j, r; 3749 int lasti = 3; 3750 int tmp = r600_get_temp(ctx); 3751 3752 for (i = 0; i < lasti + 1; i++) { 3753 3754 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3755 alu.op = ctx->inst_info->op; 3756 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3757 r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1); 3758 } 3759 3760 if (inst->Dst[0].Register.WriteMask & (1 << i)) 3761 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3762 else 3763 alu.dst.sel = tmp; 3764 3765 alu.dst.chan = i; 3766 alu.is_op3 = 1; 3767 if (i == lasti) { 3768 alu.last = 1; 3769 } 3770 r = r600_bytecode_add_alu(ctx->bc, &alu); 3771 if (r) 3772 return r; 3773 } 3774 return 0; 3775} 3776 3777static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 3778{ 3779 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3780 struct r600_bytecode_alu alu; 3781 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3782 int i, j, r, lasti = tgsi_last_instruction(write_mask); 3783 /* use temp register if trans_only and more than one dst component */ 3784 int use_tmp = trans_only && (write_mask ^ (1 << lasti)); 3785 3786 for (i = 0; i <= lasti; i++) { 3787 if (!(write_mask & (1 << i))) 3788 continue; 3789 3790 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3791 if (use_tmp) { 3792 alu.dst.sel = ctx->temp_reg; 3793 alu.dst.chan = i; 3794 alu.dst.write = 1; 3795 } else 3796 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3797 3798 alu.op = ctx->inst_info->op; 3799 if (!swap) { 3800 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3801 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3802 } 3803 } else { 3804 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3805 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3806 } 3807 /* handle some special cases */ 3808 switch (inst->Instruction.Opcode) { 3809 case TGSI_OPCODE_SUB: 3810 r600_bytecode_src_toggle_neg(&alu.src[1]); 3811 break; 3812 case TGSI_OPCODE_ABS: 3813 r600_bytecode_src_set_abs(&alu.src[0]); 3814 break; 3815 default: 3816 break; 3817 } 3818 if (i == lasti || trans_only) { 3819 alu.last = 1; 3820 } 3821 r = r600_bytecode_add_alu(ctx->bc, &alu); 3822 if (r) 3823 return r; 3824 } 3825 3826 if (use_tmp) { 3827 /* move result from temp to dst */ 3828 for (i = 0; i <= lasti; i++) { 3829 if (!(write_mask & (1 << i))) 3830 continue; 3831 3832 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3833 alu.op = ALU_OP1_MOV; 3834 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3835 alu.src[0].sel = ctx->temp_reg; 3836 alu.src[0].chan = i; 3837 alu.last = (i == lasti); 3838 3839 r = r600_bytecode_add_alu(ctx->bc, &alu); 3840 if (r) 3841 return r; 3842 } 3843 } 3844 return 0; 3845} 3846 3847static int tgsi_op2(struct r600_shader_ctx *ctx) 3848{ 3849 return tgsi_op2_s(ctx, 0, 0); 3850} 3851 3852static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 3853{ 3854 return tgsi_op2_s(ctx, 1, 0); 3855} 3856 3857static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 3858{ 3859 return tgsi_op2_s(ctx, 0, 1); 3860} 3861 3862static int tgsi_ineg(struct r600_shader_ctx *ctx) 3863{ 3864 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3865 struct r600_bytecode_alu alu; 3866 int i, r; 3867 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3868 3869 for (i = 0; i < lasti + 1; i++) { 3870 3871 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3872 continue; 3873 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3874 alu.op = ctx->inst_info->op; 3875 3876 alu.src[0].sel = V_SQ_ALU_SRC_0; 3877 3878 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3879 3880 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3881 3882 if (i == lasti) { 3883 alu.last = 1; 3884 } 3885 r = r600_bytecode_add_alu(ctx->bc, &alu); 3886 if (r) 3887 return r; 3888 } 3889 return 0; 3890 3891} 3892 3893static int tgsi_dneg(struct r600_shader_ctx *ctx) 3894{ 3895 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3896 struct r600_bytecode_alu alu; 3897 int i, r; 3898 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3899 3900 for (i = 0; i < lasti + 1; i++) { 3901 3902 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3903 continue; 3904 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3905 alu.op = ALU_OP1_MOV; 3906 3907 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3908 3909 if (i == 1 || i == 3) 3910 r600_bytecode_src_toggle_neg(&alu.src[0]); 3911 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3912 3913 if (i == lasti) { 3914 alu.last = 1; 3915 } 3916 r = r600_bytecode_add_alu(ctx->bc, &alu); 3917 if (r) 3918 return r; 3919 } 3920 return 0; 3921 3922} 3923 3924static int tgsi_dfracexp(struct r600_shader_ctx *ctx) 3925{ 3926 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3927 struct r600_bytecode_alu alu; 3928 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3929 int i, j, r; 3930 int firsti = write_mask == 0xc ? 2 : 0; 3931 3932 for (i = 0; i <= 3; i++) { 3933 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3934 alu.op = ctx->inst_info->op; 3935 3936 alu.dst.sel = ctx->temp_reg; 3937 alu.dst.chan = i; 3938 alu.dst.write = 1; 3939 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3940 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 3941 } 3942 3943 if (i == 3) 3944 alu.last = 1; 3945 3946 r = r600_bytecode_add_alu(ctx->bc, &alu); 3947 if (r) 3948 return r; 3949 } 3950 3951 /* MOV first two channels to writemask dst0 */ 3952 for (i = 0; i <= 1; i++) { 3953 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3954 alu.op = ALU_OP1_MOV; 3955 alu.src[0].chan = i + 2; 3956 alu.src[0].sel = ctx->temp_reg; 3957 3958 tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst); 3959 alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1; 3960 alu.last = 1; 3961 r = r600_bytecode_add_alu(ctx->bc, &alu); 3962 if (r) 3963 return r; 3964 } 3965 3966 for (i = 0; i <= 3; i++) { 3967 if (inst->Dst[1].Register.WriteMask & (1 << i)) { 3968 /* MOV third channels to writemask dst1 */ 3969 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3970 alu.op = ALU_OP1_MOV; 3971 alu.src[0].chan = 1; 3972 alu.src[0].sel = ctx->temp_reg; 3973 3974 tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst); 3975 alu.last = 1; 3976 r = r600_bytecode_add_alu(ctx->bc, &alu); 3977 if (r) 3978 return r; 3979 break; 3980 } 3981 } 3982 return 0; 3983} 3984 3985 3986static int egcm_int_to_double(struct r600_shader_ctx *ctx) 3987{ 3988 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3989 struct r600_bytecode_alu alu; 3990 int i, r; 3991 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3992 3993 assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D || 3994 inst->Instruction.Opcode == TGSI_OPCODE_U2D); 3995 3996 for (i = 0; i <= (lasti+1)/2; i++) { 3997 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3998 alu.op = ctx->inst_info->op; 3999 4000 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4001 alu.dst.sel = ctx->temp_reg; 4002 alu.dst.chan = i; 4003 alu.dst.write = 1; 4004 alu.last = 1; 4005 4006 r = r600_bytecode_add_alu(ctx->bc, &alu); 4007 if (r) 4008 return r; 4009 } 4010 4011 for (i = 0; i <= lasti; i++) { 4012 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4013 alu.op = ALU_OP1_FLT32_TO_FLT64; 4014 4015 alu.src[0].chan = i/2; 4016 if (i%2 == 0) 4017 alu.src[0].sel = ctx->temp_reg; 4018 else { 4019 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 4020 alu.src[0].value = 0x0; 4021 } 4022 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4023 alu.last = i == lasti; 4024 4025 r = r600_bytecode_add_alu(ctx->bc, &alu); 4026 if (r) 4027 return r; 4028 } 4029 4030 return 0; 4031} 4032 4033static int egcm_double_to_int(struct r600_shader_ctx *ctx) 4034{ 4035 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4036 struct r600_bytecode_alu alu; 4037 int i, r; 4038 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4039 4040 assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I || 4041 inst->Instruction.Opcode == TGSI_OPCODE_D2U); 4042 4043 for (i = 0; i <= lasti; i++) { 4044 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4045 alu.op = ALU_OP1_FLT64_TO_FLT32; 4046 4047 r600_bytecode_src(&alu.src[0], &ctx->src[0], fp64_switch(i)); 4048 alu.dst.chan = i; 4049 alu.dst.sel = ctx->temp_reg; 4050 alu.dst.write = i%2 == 0; 4051 alu.last = i == lasti; 4052 4053 r = r600_bytecode_add_alu(ctx->bc, &alu); 4054 if (r) 4055 return r; 4056 } 4057 4058 for (i = 0; i <= (lasti+1)/2; i++) { 4059 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4060 alu.op = ctx->inst_info->op; 4061 4062 alu.src[0].chan = i*2; 4063 alu.src[0].sel = ctx->temp_reg; 4064 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4065 alu.last = 1; 4066 4067 r = r600_bytecode_add_alu(ctx->bc, &alu); 4068 if (r) 4069 return r; 4070 } 4071 4072 return 0; 4073} 4074 4075static int cayman_emit_double_instr(struct r600_shader_ctx *ctx) 4076{ 4077 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4078 int i, r; 4079 struct r600_bytecode_alu alu; 4080 int last_slot = 3; 4081 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4082 int t1 = ctx->temp_reg; 4083 4084 /* these have to write the result to X/Y by the looks of it */ 4085 for (i = 0 ; i < last_slot; i++) { 4086 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4087 alu.op = ctx->inst_info->op; 4088 4089 /* should only be one src regs */ 4090 assert (inst->Instruction.NumSrcRegs == 1); 4091 4092 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 4093 r600_bytecode_src(&alu.src[1], &ctx->src[0], 0); 4094 4095 /* RSQ should take the absolute value of src */ 4096 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ || 4097 ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) { 4098 r600_bytecode_src_set_abs(&alu.src[1]); 4099 } 4100 alu.dst.sel = t1; 4101 alu.dst.chan = i; 4102 alu.dst.write = (i == 0 || i == 1); 4103 4104 if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1) 4105 alu.last = 1; 4106 r = r600_bytecode_add_alu(ctx->bc, &alu); 4107 if (r) 4108 return r; 4109 } 4110 4111 for (i = 0 ; i <= lasti; i++) { 4112 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4113 continue; 4114 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4115 alu.op = ALU_OP1_MOV; 4116 alu.src[0].sel = t1; 4117 alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1; 4118 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4119 alu.dst.write = 1; 4120 if (i == lasti) 4121 alu.last = 1; 4122 r = r600_bytecode_add_alu(ctx->bc, &alu); 4123 if (r) 4124 return r; 4125 } 4126 return 0; 4127} 4128 4129static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 4130{ 4131 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4132 int i, j, r; 4133 struct r600_bytecode_alu alu; 4134 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4135 4136 for (i = 0 ; i < last_slot; i++) { 4137 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4138 alu.op = ctx->inst_info->op; 4139 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4140 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 4141 4142 /* RSQ should take the absolute value of src */ 4143 if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) { 4144 r600_bytecode_src_set_abs(&alu.src[j]); 4145 } 4146 } 4147 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4148 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4149 4150 if (i == last_slot - 1) 4151 alu.last = 1; 4152 r = r600_bytecode_add_alu(ctx->bc, &alu); 4153 if (r) 4154 return r; 4155 } 4156 return 0; 4157} 4158 4159static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 4160{ 4161 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4162 int i, j, k, r; 4163 struct r600_bytecode_alu alu; 4164 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4165 int t1 = ctx->temp_reg; 4166 4167 for (k = 0; k <= lasti; k++) { 4168 if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 4169 continue; 4170 4171 for (i = 0 ; i < 4; i++) { 4172 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4173 alu.op = ctx->inst_info->op; 4174 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4175 r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 4176 } 4177 alu.dst.sel = t1; 4178 alu.dst.chan = i; 4179 alu.dst.write = (i == k); 4180 if (i == 3) 4181 alu.last = 1; 4182 r = r600_bytecode_add_alu(ctx->bc, &alu); 4183 if (r) 4184 return r; 4185 } 4186 } 4187 4188 for (i = 0 ; i <= lasti; i++) { 4189 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4190 continue; 4191 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4192 alu.op = ALU_OP1_MOV; 4193 alu.src[0].sel = t1; 4194 alu.src[0].chan = i; 4195 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4196 alu.dst.write = 1; 4197 if (i == lasti) 4198 alu.last = 1; 4199 r = r600_bytecode_add_alu(ctx->bc, &alu); 4200 if (r) 4201 return r; 4202 } 4203 4204 return 0; 4205} 4206 4207 4208static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) 4209{ 4210 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4211 int i, j, k, r; 4212 struct r600_bytecode_alu alu; 4213 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4214 int t1 = ctx->temp_reg; 4215 4216 for (k = 0; k < 2; k++) { 4217 if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2)))) 4218 continue; 4219 4220 for (i = 0; i < 4; i++) { 4221 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4222 alu.op = ctx->inst_info->op; 4223 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4224 r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));; 4225 } 4226 alu.dst.sel = t1; 4227 alu.dst.chan = i; 4228 alu.dst.write = 1; 4229 if (i == 3) 4230 alu.last = 1; 4231 r = r600_bytecode_add_alu(ctx->bc, &alu); 4232 if (r) 4233 return r; 4234 } 4235 } 4236 4237 for (i = 0; i <= lasti; i++) { 4238 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4239 continue; 4240 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4241 alu.op = ALU_OP1_MOV; 4242 alu.src[0].sel = t1; 4243 alu.src[0].chan = i; 4244 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4245 alu.dst.write = 1; 4246 if (i == lasti) 4247 alu.last = 1; 4248 r = r600_bytecode_add_alu(ctx->bc, &alu); 4249 if (r) 4250 return r; 4251 } 4252 4253 return 0; 4254} 4255 4256/* 4257 * r600 - trunc to -PI..PI range 4258 * r700 - normalize by dividing by 2PI 4259 * see fdo bug 27901 4260 */ 4261static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 4262{ 4263 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 4264 static float double_pi = 3.1415926535 * 2; 4265 static float neg_pi = -3.1415926535; 4266 4267 int r; 4268 struct r600_bytecode_alu alu; 4269 4270 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4271 alu.op = ALU_OP3_MULADD; 4272 alu.is_op3 = 1; 4273 4274 alu.dst.chan = 0; 4275 alu.dst.sel = ctx->temp_reg; 4276 alu.dst.write = 1; 4277 4278 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4279 4280 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4281 alu.src[1].chan = 0; 4282 alu.src[1].value = *(uint32_t *)&half_inv_pi; 4283 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 4284 alu.src[2].chan = 0; 4285 alu.last = 1; 4286 r = r600_bytecode_add_alu(ctx->bc, &alu); 4287 if (r) 4288 return r; 4289 4290 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4291 alu.op = ALU_OP1_FRACT; 4292 4293 alu.dst.chan = 0; 4294 alu.dst.sel = ctx->temp_reg; 4295 alu.dst.write = 1; 4296 4297 alu.src[0].sel = ctx->temp_reg; 4298 alu.src[0].chan = 0; 4299 alu.last = 1; 4300 r = r600_bytecode_add_alu(ctx->bc, &alu); 4301 if (r) 4302 return r; 4303 4304 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4305 alu.op = ALU_OP3_MULADD; 4306 alu.is_op3 = 1; 4307 4308 alu.dst.chan = 0; 4309 alu.dst.sel = ctx->temp_reg; 4310 alu.dst.write = 1; 4311 4312 alu.src[0].sel = ctx->temp_reg; 4313 alu.src[0].chan = 0; 4314 4315 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4316 alu.src[1].chan = 0; 4317 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 4318 alu.src[2].chan = 0; 4319 4320 if (ctx->bc->chip_class == R600) { 4321 alu.src[1].value = *(uint32_t *)&double_pi; 4322 alu.src[2].value = *(uint32_t *)&neg_pi; 4323 } else { 4324 alu.src[1].sel = V_SQ_ALU_SRC_1; 4325 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 4326 alu.src[2].neg = 1; 4327 } 4328 4329 alu.last = 1; 4330 r = r600_bytecode_add_alu(ctx->bc, &alu); 4331 if (r) 4332 return r; 4333 return 0; 4334} 4335 4336static int cayman_trig(struct r600_shader_ctx *ctx) 4337{ 4338 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4339 struct r600_bytecode_alu alu; 4340 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4341 int i, r; 4342 4343 r = tgsi_setup_trig(ctx); 4344 if (r) 4345 return r; 4346 4347 4348 for (i = 0; i < last_slot; i++) { 4349 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4350 alu.op = ctx->inst_info->op; 4351 alu.dst.chan = i; 4352 4353 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4354 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4355 4356 alu.src[0].sel = ctx->temp_reg; 4357 alu.src[0].chan = 0; 4358 if (i == last_slot - 1) 4359 alu.last = 1; 4360 r = r600_bytecode_add_alu(ctx->bc, &alu); 4361 if (r) 4362 return r; 4363 } 4364 return 0; 4365} 4366 4367static int tgsi_trig(struct r600_shader_ctx *ctx) 4368{ 4369 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4370 struct r600_bytecode_alu alu; 4371 int i, r; 4372 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4373 4374 r = tgsi_setup_trig(ctx); 4375 if (r) 4376 return r; 4377 4378 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4379 alu.op = ctx->inst_info->op; 4380 alu.dst.chan = 0; 4381 alu.dst.sel = ctx->temp_reg; 4382 alu.dst.write = 1; 4383 4384 alu.src[0].sel = ctx->temp_reg; 4385 alu.src[0].chan = 0; 4386 alu.last = 1; 4387 r = r600_bytecode_add_alu(ctx->bc, &alu); 4388 if (r) 4389 return r; 4390 4391 /* replicate result */ 4392 for (i = 0; i < lasti + 1; i++) { 4393 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4394 continue; 4395 4396 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4397 alu.op = ALU_OP1_MOV; 4398 4399 alu.src[0].sel = ctx->temp_reg; 4400 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4401 if (i == lasti) 4402 alu.last = 1; 4403 r = r600_bytecode_add_alu(ctx->bc, &alu); 4404 if (r) 4405 return r; 4406 } 4407 return 0; 4408} 4409 4410static int tgsi_scs(struct r600_shader_ctx *ctx) 4411{ 4412 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4413 struct r600_bytecode_alu alu; 4414 int i, r; 4415 4416 /* We'll only need the trig stuff if we are going to write to the 4417 * X or Y components of the destination vector. 4418 */ 4419 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 4420 r = tgsi_setup_trig(ctx); 4421 if (r) 4422 return r; 4423 } 4424 4425 /* dst.x = COS */ 4426 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4427 if (ctx->bc->chip_class == CAYMAN) { 4428 for (i = 0 ; i < 3; i++) { 4429 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4430 alu.op = ALU_OP1_COS; 4431 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4432 4433 if (i == 0) 4434 alu.dst.write = 1; 4435 else 4436 alu.dst.write = 0; 4437 alu.src[0].sel = ctx->temp_reg; 4438 alu.src[0].chan = 0; 4439 if (i == 2) 4440 alu.last = 1; 4441 r = r600_bytecode_add_alu(ctx->bc, &alu); 4442 if (r) 4443 return r; 4444 } 4445 } else { 4446 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4447 alu.op = ALU_OP1_COS; 4448 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4449 4450 alu.src[0].sel = ctx->temp_reg; 4451 alu.src[0].chan = 0; 4452 alu.last = 1; 4453 r = r600_bytecode_add_alu(ctx->bc, &alu); 4454 if (r) 4455 return r; 4456 } 4457 } 4458 4459 /* dst.y = SIN */ 4460 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4461 if (ctx->bc->chip_class == CAYMAN) { 4462 for (i = 0 ; i < 3; i++) { 4463 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4464 alu.op = ALU_OP1_SIN; 4465 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4466 if (i == 1) 4467 alu.dst.write = 1; 4468 else 4469 alu.dst.write = 0; 4470 alu.src[0].sel = ctx->temp_reg; 4471 alu.src[0].chan = 0; 4472 if (i == 2) 4473 alu.last = 1; 4474 r = r600_bytecode_add_alu(ctx->bc, &alu); 4475 if (r) 4476 return r; 4477 } 4478 } else { 4479 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4480 alu.op = ALU_OP1_SIN; 4481 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 4482 4483 alu.src[0].sel = ctx->temp_reg; 4484 alu.src[0].chan = 0; 4485 alu.last = 1; 4486 r = r600_bytecode_add_alu(ctx->bc, &alu); 4487 if (r) 4488 return r; 4489 } 4490 } 4491 4492 /* dst.z = 0.0; */ 4493 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4494 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4495 4496 alu.op = ALU_OP1_MOV; 4497 4498 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 4499 4500 alu.src[0].sel = V_SQ_ALU_SRC_0; 4501 alu.src[0].chan = 0; 4502 4503 alu.last = 1; 4504 4505 r = r600_bytecode_add_alu(ctx->bc, &alu); 4506 if (r) 4507 return r; 4508 } 4509 4510 /* dst.w = 1.0; */ 4511 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4512 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4513 4514 alu.op = ALU_OP1_MOV; 4515 4516 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 4517 4518 alu.src[0].sel = V_SQ_ALU_SRC_1; 4519 alu.src[0].chan = 0; 4520 4521 alu.last = 1; 4522 4523 r = r600_bytecode_add_alu(ctx->bc, &alu); 4524 if (r) 4525 return r; 4526 } 4527 4528 return 0; 4529} 4530 4531static int tgsi_kill(struct r600_shader_ctx *ctx) 4532{ 4533 const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4534 struct r600_bytecode_alu alu; 4535 int i, r; 4536 4537 for (i = 0; i < 4; i++) { 4538 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4539 alu.op = ctx->inst_info->op; 4540 4541 alu.dst.chan = i; 4542 4543 alu.src[0].sel = V_SQ_ALU_SRC_0; 4544 4545 if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) { 4546 alu.src[1].sel = V_SQ_ALU_SRC_1; 4547 alu.src[1].neg = 1; 4548 } else { 4549 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4550 } 4551 if (i == 3) { 4552 alu.last = 1; 4553 } 4554 r = r600_bytecode_add_alu(ctx->bc, &alu); 4555 if (r) 4556 return r; 4557 } 4558 4559 /* kill must be last in ALU */ 4560 ctx->bc->force_add_cf = 1; 4561 ctx->shader->uses_kill = TRUE; 4562 return 0; 4563} 4564 4565static int tgsi_lit(struct r600_shader_ctx *ctx) 4566{ 4567 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4568 struct r600_bytecode_alu alu; 4569 int r; 4570 4571 /* tmp.x = max(src.y, 0.0) */ 4572 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4573 alu.op = ALU_OP2_MAX; 4574 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 4575 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 4576 alu.src[1].chan = 1; 4577 4578 alu.dst.sel = ctx->temp_reg; 4579 alu.dst.chan = 0; 4580 alu.dst.write = 1; 4581 4582 alu.last = 1; 4583 r = r600_bytecode_add_alu(ctx->bc, &alu); 4584 if (r) 4585 return r; 4586 4587 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 4588 { 4589 int chan; 4590 int sel; 4591 int i; 4592 4593 if (ctx->bc->chip_class == CAYMAN) { 4594 for (i = 0; i < 3; i++) { 4595 /* tmp.z = log(tmp.x) */ 4596 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4597 alu.op = ALU_OP1_LOG_CLAMPED; 4598 alu.src[0].sel = ctx->temp_reg; 4599 alu.src[0].chan = 0; 4600 alu.dst.sel = ctx->temp_reg; 4601 alu.dst.chan = i; 4602 if (i == 2) { 4603 alu.dst.write = 1; 4604 alu.last = 1; 4605 } else 4606 alu.dst.write = 0; 4607 4608 r = r600_bytecode_add_alu(ctx->bc, &alu); 4609 if (r) 4610 return r; 4611 } 4612 } else { 4613 /* tmp.z = log(tmp.x) */ 4614 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4615 alu.op = ALU_OP1_LOG_CLAMPED; 4616 alu.src[0].sel = ctx->temp_reg; 4617 alu.src[0].chan = 0; 4618 alu.dst.sel = ctx->temp_reg; 4619 alu.dst.chan = 2; 4620 alu.dst.write = 1; 4621 alu.last = 1; 4622 r = r600_bytecode_add_alu(ctx->bc, &alu); 4623 if (r) 4624 return r; 4625 } 4626 4627 chan = alu.dst.chan; 4628 sel = alu.dst.sel; 4629 4630 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 4631 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4632 alu.op = ALU_OP3_MUL_LIT; 4633 alu.src[0].sel = sel; 4634 alu.src[0].chan = chan; 4635 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 4636 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 4637 alu.dst.sel = ctx->temp_reg; 4638 alu.dst.chan = 0; 4639 alu.dst.write = 1; 4640 alu.is_op3 = 1; 4641 alu.last = 1; 4642 r = r600_bytecode_add_alu(ctx->bc, &alu); 4643 if (r) 4644 return r; 4645 4646 if (ctx->bc->chip_class == CAYMAN) { 4647 for (i = 0; i < 3; i++) { 4648 /* dst.z = exp(tmp.x) */ 4649 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4650 alu.op = ALU_OP1_EXP_IEEE; 4651 alu.src[0].sel = ctx->temp_reg; 4652 alu.src[0].chan = 0; 4653 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4654 if (i == 2) { 4655 alu.dst.write = 1; 4656 alu.last = 1; 4657 } else 4658 alu.dst.write = 0; 4659 r = r600_bytecode_add_alu(ctx->bc, &alu); 4660 if (r) 4661 return r; 4662 } 4663 } else { 4664 /* dst.z = exp(tmp.x) */ 4665 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4666 alu.op = ALU_OP1_EXP_IEEE; 4667 alu.src[0].sel = ctx->temp_reg; 4668 alu.src[0].chan = 0; 4669 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 4670 alu.last = 1; 4671 r = r600_bytecode_add_alu(ctx->bc, &alu); 4672 if (r) 4673 return r; 4674 } 4675 } 4676 4677 /* dst.x, <- 1.0 */ 4678 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4679 alu.op = ALU_OP1_MOV; 4680 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 4681 alu.src[0].chan = 0; 4682 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4683 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 4684 r = r600_bytecode_add_alu(ctx->bc, &alu); 4685 if (r) 4686 return r; 4687 4688 /* dst.y = max(src.x, 0.0) */ 4689 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4690 alu.op = ALU_OP2_MAX; 4691 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4692 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 4693 alu.src[1].chan = 0; 4694 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 4695 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 4696 r = r600_bytecode_add_alu(ctx->bc, &alu); 4697 if (r) 4698 return r; 4699 4700 /* dst.w, <- 1.0 */ 4701 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4702 alu.op = ALU_OP1_MOV; 4703 alu.src[0].sel = V_SQ_ALU_SRC_1; 4704 alu.src[0].chan = 0; 4705 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 4706 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 4707 alu.last = 1; 4708 r = r600_bytecode_add_alu(ctx->bc, &alu); 4709 if (r) 4710 return r; 4711 4712 return 0; 4713} 4714 4715static int tgsi_rsq(struct r600_shader_ctx *ctx) 4716{ 4717 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4718 struct r600_bytecode_alu alu; 4719 int i, r; 4720 4721 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4722 4723 /* XXX: 4724 * For state trackers other than OpenGL, we'll want to use 4725 * _RECIPSQRT_IEEE instead. 4726 */ 4727 alu.op = ALU_OP1_RECIPSQRT_CLAMPED; 4728 4729 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 4730 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 4731 r600_bytecode_src_set_abs(&alu.src[i]); 4732 } 4733 alu.dst.sel = ctx->temp_reg; 4734 alu.dst.write = 1; 4735 alu.last = 1; 4736 r = r600_bytecode_add_alu(ctx->bc, &alu); 4737 if (r) 4738 return r; 4739 /* replicate result */ 4740 return tgsi_helper_tempx_replicate(ctx); 4741} 4742 4743static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 4744{ 4745 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4746 struct r600_bytecode_alu alu; 4747 int i, r; 4748 4749 for (i = 0; i < 4; i++) { 4750 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4751 alu.src[0].sel = ctx->temp_reg; 4752 alu.op = ALU_OP1_MOV; 4753 alu.dst.chan = i; 4754 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4755 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4756 if (i == 3) 4757 alu.last = 1; 4758 r = r600_bytecode_add_alu(ctx->bc, &alu); 4759 if (r) 4760 return r; 4761 } 4762 return 0; 4763} 4764 4765static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 4766{ 4767 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4768 struct r600_bytecode_alu alu; 4769 int i, r; 4770 4771 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4772 alu.op = ctx->inst_info->op; 4773 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 4774 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 4775 } 4776 alu.dst.sel = ctx->temp_reg; 4777 alu.dst.write = 1; 4778 alu.last = 1; 4779 r = r600_bytecode_add_alu(ctx->bc, &alu); 4780 if (r) 4781 return r; 4782 /* replicate result */ 4783 return tgsi_helper_tempx_replicate(ctx); 4784} 4785 4786static int cayman_pow(struct r600_shader_ctx *ctx) 4787{ 4788 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4789 int i, r; 4790 struct r600_bytecode_alu alu; 4791 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4792 4793 for (i = 0; i < 3; i++) { 4794 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4795 alu.op = ALU_OP1_LOG_IEEE; 4796 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4797 alu.dst.sel = ctx->temp_reg; 4798 alu.dst.chan = i; 4799 alu.dst.write = 1; 4800 if (i == 2) 4801 alu.last = 1; 4802 r = r600_bytecode_add_alu(ctx->bc, &alu); 4803 if (r) 4804 return r; 4805 } 4806 4807 /* b * LOG2(a) */ 4808 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4809 alu.op = ALU_OP2_MUL; 4810 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 4811 alu.src[1].sel = ctx->temp_reg; 4812 alu.dst.sel = ctx->temp_reg; 4813 alu.dst.write = 1; 4814 alu.last = 1; 4815 r = r600_bytecode_add_alu(ctx->bc, &alu); 4816 if (r) 4817 return r; 4818 4819 for (i = 0; i < last_slot; i++) { 4820 /* POW(a,b) = EXP2(b * LOG2(a))*/ 4821 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4822 alu.op = ALU_OP1_EXP_IEEE; 4823 alu.src[0].sel = ctx->temp_reg; 4824 4825 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4826 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4827 if (i == last_slot - 1) 4828 alu.last = 1; 4829 r = r600_bytecode_add_alu(ctx->bc, &alu); 4830 if (r) 4831 return r; 4832 } 4833 return 0; 4834} 4835 4836static int tgsi_pow(struct r600_shader_ctx *ctx) 4837{ 4838 struct r600_bytecode_alu alu; 4839 int r; 4840 4841 /* LOG2(a) */ 4842 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4843 alu.op = ALU_OP1_LOG_IEEE; 4844 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4845 alu.dst.sel = ctx->temp_reg; 4846 alu.dst.write = 1; 4847 alu.last = 1; 4848 r = r600_bytecode_add_alu(ctx->bc, &alu); 4849 if (r) 4850 return r; 4851 /* b * LOG2(a) */ 4852 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4853 alu.op = ALU_OP2_MUL; 4854 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 4855 alu.src[1].sel = ctx->temp_reg; 4856 alu.dst.sel = ctx->temp_reg; 4857 alu.dst.write = 1; 4858 alu.last = 1; 4859 r = r600_bytecode_add_alu(ctx->bc, &alu); 4860 if (r) 4861 return r; 4862 /* POW(a,b) = EXP2(b * LOG2(a))*/ 4863 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4864 alu.op = ALU_OP1_EXP_IEEE; 4865 alu.src[0].sel = ctx->temp_reg; 4866 alu.dst.sel = ctx->temp_reg; 4867 alu.dst.write = 1; 4868 alu.last = 1; 4869 r = r600_bytecode_add_alu(ctx->bc, &alu); 4870 if (r) 4871 return r; 4872 return tgsi_helper_tempx_replicate(ctx); 4873} 4874 4875static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 4876{ 4877 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4878 struct r600_bytecode_alu alu; 4879 int i, r, j; 4880 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4881 int tmp0 = ctx->temp_reg; 4882 int tmp1 = r600_get_temp(ctx); 4883 int tmp2 = r600_get_temp(ctx); 4884 int tmp3 = r600_get_temp(ctx); 4885 /* Unsigned path: 4886 * 4887 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 4888 * 4889 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 4890 * 2. tmp0.z = lo (tmp0.x * src2) 4891 * 3. tmp0.w = -tmp0.z 4892 * 4. tmp0.y = hi (tmp0.x * src2) 4893 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 4894 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 4895 * 7. tmp1.x = tmp0.x - tmp0.w 4896 * 8. tmp1.y = tmp0.x + tmp0.w 4897 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 4898 * 10. tmp0.z = hi(tmp0.x * src1) = q 4899 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 4900 * 4901 * 12. tmp0.w = src1 - tmp0.y = r 4902 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 4903 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 4904 * 4905 * if DIV 4906 * 4907 * 15. tmp1.z = tmp0.z + 1 = q + 1 4908 * 16. tmp1.w = tmp0.z - 1 = q - 1 4909 * 4910 * else MOD 4911 * 4912 * 15. tmp1.z = tmp0.w - src2 = r - src2 4913 * 16. tmp1.w = tmp0.w + src2 = r + src2 4914 * 4915 * endif 4916 * 4917 * 17. tmp1.x = tmp1.x & tmp1.y 4918 * 4919 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 4920 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 4921 * 4922 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 4923 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 4924 * 4925 * Signed path: 4926 * 4927 * Same as unsigned, using abs values of the operands, 4928 * and fixing the sign of the result in the end. 4929 */ 4930 4931 for (i = 0; i < 4; i++) { 4932 if (!(write_mask & (1<<i))) 4933 continue; 4934 4935 if (signed_op) { 4936 4937 /* tmp2.x = -src0 */ 4938 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4939 alu.op = ALU_OP2_SUB_INT; 4940 4941 alu.dst.sel = tmp2; 4942 alu.dst.chan = 0; 4943 alu.dst.write = 1; 4944 4945 alu.src[0].sel = V_SQ_ALU_SRC_0; 4946 4947 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4948 4949 alu.last = 1; 4950 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4951 return r; 4952 4953 /* tmp2.y = -src1 */ 4954 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4955 alu.op = ALU_OP2_SUB_INT; 4956 4957 alu.dst.sel = tmp2; 4958 alu.dst.chan = 1; 4959 alu.dst.write = 1; 4960 4961 alu.src[0].sel = V_SQ_ALU_SRC_0; 4962 4963 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4964 4965 alu.last = 1; 4966 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4967 return r; 4968 4969 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 4970 /* it will be a sign of the quotient */ 4971 if (!mod) { 4972 4973 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4974 alu.op = ALU_OP2_XOR_INT; 4975 4976 alu.dst.sel = tmp2; 4977 alu.dst.chan = 2; 4978 alu.dst.write = 1; 4979 4980 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4981 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4982 4983 alu.last = 1; 4984 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4985 return r; 4986 } 4987 4988 /* tmp2.x = |src0| */ 4989 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4990 alu.op = ALU_OP3_CNDGE_INT; 4991 alu.is_op3 = 1; 4992 4993 alu.dst.sel = tmp2; 4994 alu.dst.chan = 0; 4995 alu.dst.write = 1; 4996 4997 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4998 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4999 alu.src[2].sel = tmp2; 5000 alu.src[2].chan = 0; 5001 5002 alu.last = 1; 5003 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5004 return r; 5005 5006 /* tmp2.y = |src1| */ 5007 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5008 alu.op = ALU_OP3_CNDGE_INT; 5009 alu.is_op3 = 1; 5010 5011 alu.dst.sel = tmp2; 5012 alu.dst.chan = 1; 5013 alu.dst.write = 1; 5014 5015 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5016 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5017 alu.src[2].sel = tmp2; 5018 alu.src[2].chan = 1; 5019 5020 alu.last = 1; 5021 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5022 return r; 5023 5024 } 5025 5026 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 5027 if (ctx->bc->chip_class == CAYMAN) { 5028 /* tmp3.x = u2f(src2) */ 5029 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5030 alu.op = ALU_OP1_UINT_TO_FLT; 5031 5032 alu.dst.sel = tmp3; 5033 alu.dst.chan = 0; 5034 alu.dst.write = 1; 5035 5036 if (signed_op) { 5037 alu.src[0].sel = tmp2; 5038 alu.src[0].chan = 1; 5039 } else { 5040 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5041 } 5042 5043 alu.last = 1; 5044 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5045 return r; 5046 5047 /* tmp0.x = recip(tmp3.x) */ 5048 for (j = 0 ; j < 3; j++) { 5049 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5050 alu.op = ALU_OP1_RECIP_IEEE; 5051 5052 alu.dst.sel = tmp0; 5053 alu.dst.chan = j; 5054 alu.dst.write = (j == 0); 5055 5056 alu.src[0].sel = tmp3; 5057 alu.src[0].chan = 0; 5058 5059 if (j == 2) 5060 alu.last = 1; 5061 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5062 return r; 5063 } 5064 5065 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5066 alu.op = ALU_OP2_MUL; 5067 5068 alu.src[0].sel = tmp0; 5069 alu.src[0].chan = 0; 5070 5071 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 5072 alu.src[1].value = 0x4f800000; 5073 5074 alu.dst.sel = tmp3; 5075 alu.dst.write = 1; 5076 alu.last = 1; 5077 r = r600_bytecode_add_alu(ctx->bc, &alu); 5078 if (r) 5079 return r; 5080 5081 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5082 alu.op = ALU_OP1_FLT_TO_UINT; 5083 5084 alu.dst.sel = tmp0; 5085 alu.dst.chan = 0; 5086 alu.dst.write = 1; 5087 5088 alu.src[0].sel = tmp3; 5089 alu.src[0].chan = 0; 5090 5091 alu.last = 1; 5092 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5093 return r; 5094 5095 } else { 5096 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5097 alu.op = ALU_OP1_RECIP_UINT; 5098 5099 alu.dst.sel = tmp0; 5100 alu.dst.chan = 0; 5101 alu.dst.write = 1; 5102 5103 if (signed_op) { 5104 alu.src[0].sel = tmp2; 5105 alu.src[0].chan = 1; 5106 } else { 5107 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5108 } 5109 5110 alu.last = 1; 5111 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5112 return r; 5113 } 5114 5115 /* 2. tmp0.z = lo (tmp0.x * src2) */ 5116 if (ctx->bc->chip_class == CAYMAN) { 5117 for (j = 0 ; j < 4; j++) { 5118 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5119 alu.op = ALU_OP2_MULLO_UINT; 5120 5121 alu.dst.sel = tmp0; 5122 alu.dst.chan = j; 5123 alu.dst.write = (j == 2); 5124 5125 alu.src[0].sel = tmp0; 5126 alu.src[0].chan = 0; 5127 if (signed_op) { 5128 alu.src[1].sel = tmp2; 5129 alu.src[1].chan = 1; 5130 } else { 5131 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5132 } 5133 5134 alu.last = (j == 3); 5135 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5136 return r; 5137 } 5138 } else { 5139 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5140 alu.op = ALU_OP2_MULLO_UINT; 5141 5142 alu.dst.sel = tmp0; 5143 alu.dst.chan = 2; 5144 alu.dst.write = 1; 5145 5146 alu.src[0].sel = tmp0; 5147 alu.src[0].chan = 0; 5148 if (signed_op) { 5149 alu.src[1].sel = tmp2; 5150 alu.src[1].chan = 1; 5151 } else { 5152 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5153 } 5154 5155 alu.last = 1; 5156 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5157 return r; 5158 } 5159 5160 /* 3. tmp0.w = -tmp0.z */ 5161 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5162 alu.op = ALU_OP2_SUB_INT; 5163 5164 alu.dst.sel = tmp0; 5165 alu.dst.chan = 3; 5166 alu.dst.write = 1; 5167 5168 alu.src[0].sel = V_SQ_ALU_SRC_0; 5169 alu.src[1].sel = tmp0; 5170 alu.src[1].chan = 2; 5171 5172 alu.last = 1; 5173 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5174 return r; 5175 5176 /* 4. tmp0.y = hi (tmp0.x * src2) */ 5177 if (ctx->bc->chip_class == CAYMAN) { 5178 for (j = 0 ; j < 4; j++) { 5179 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5180 alu.op = ALU_OP2_MULHI_UINT; 5181 5182 alu.dst.sel = tmp0; 5183 alu.dst.chan = j; 5184 alu.dst.write = (j == 1); 5185 5186 alu.src[0].sel = tmp0; 5187 alu.src[0].chan = 0; 5188 5189 if (signed_op) { 5190 alu.src[1].sel = tmp2; 5191 alu.src[1].chan = 1; 5192 } else { 5193 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5194 } 5195 alu.last = (j == 3); 5196 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5197 return r; 5198 } 5199 } else { 5200 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5201 alu.op = ALU_OP2_MULHI_UINT; 5202 5203 alu.dst.sel = tmp0; 5204 alu.dst.chan = 1; 5205 alu.dst.write = 1; 5206 5207 alu.src[0].sel = tmp0; 5208 alu.src[0].chan = 0; 5209 5210 if (signed_op) { 5211 alu.src[1].sel = tmp2; 5212 alu.src[1].chan = 1; 5213 } else { 5214 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5215 } 5216 5217 alu.last = 1; 5218 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5219 return r; 5220 } 5221 5222 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 5223 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5224 alu.op = ALU_OP3_CNDE_INT; 5225 alu.is_op3 = 1; 5226 5227 alu.dst.sel = tmp0; 5228 alu.dst.chan = 2; 5229 alu.dst.write = 1; 5230 5231 alu.src[0].sel = tmp0; 5232 alu.src[0].chan = 1; 5233 alu.src[1].sel = tmp0; 5234 alu.src[1].chan = 3; 5235 alu.src[2].sel = tmp0; 5236 alu.src[2].chan = 2; 5237 5238 alu.last = 1; 5239 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5240 return r; 5241 5242 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 5243 if (ctx->bc->chip_class == CAYMAN) { 5244 for (j = 0 ; j < 4; j++) { 5245 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5246 alu.op = ALU_OP2_MULHI_UINT; 5247 5248 alu.dst.sel = tmp0; 5249 alu.dst.chan = j; 5250 alu.dst.write = (j == 3); 5251 5252 alu.src[0].sel = tmp0; 5253 alu.src[0].chan = 2; 5254 5255 alu.src[1].sel = tmp0; 5256 alu.src[1].chan = 0; 5257 5258 alu.last = (j == 3); 5259 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5260 return r; 5261 } 5262 } else { 5263 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5264 alu.op = ALU_OP2_MULHI_UINT; 5265 5266 alu.dst.sel = tmp0; 5267 alu.dst.chan = 3; 5268 alu.dst.write = 1; 5269 5270 alu.src[0].sel = tmp0; 5271 alu.src[0].chan = 2; 5272 5273 alu.src[1].sel = tmp0; 5274 alu.src[1].chan = 0; 5275 5276 alu.last = 1; 5277 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5278 return r; 5279 } 5280 5281 /* 7. tmp1.x = tmp0.x - tmp0.w */ 5282 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5283 alu.op = ALU_OP2_SUB_INT; 5284 5285 alu.dst.sel = tmp1; 5286 alu.dst.chan = 0; 5287 alu.dst.write = 1; 5288 5289 alu.src[0].sel = tmp0; 5290 alu.src[0].chan = 0; 5291 alu.src[1].sel = tmp0; 5292 alu.src[1].chan = 3; 5293 5294 alu.last = 1; 5295 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5296 return r; 5297 5298 /* 8. tmp1.y = tmp0.x + tmp0.w */ 5299 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5300 alu.op = ALU_OP2_ADD_INT; 5301 5302 alu.dst.sel = tmp1; 5303 alu.dst.chan = 1; 5304 alu.dst.write = 1; 5305 5306 alu.src[0].sel = tmp0; 5307 alu.src[0].chan = 0; 5308 alu.src[1].sel = tmp0; 5309 alu.src[1].chan = 3; 5310 5311 alu.last = 1; 5312 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5313 return r; 5314 5315 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 5316 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5317 alu.op = ALU_OP3_CNDE_INT; 5318 alu.is_op3 = 1; 5319 5320 alu.dst.sel = tmp0; 5321 alu.dst.chan = 0; 5322 alu.dst.write = 1; 5323 5324 alu.src[0].sel = tmp0; 5325 alu.src[0].chan = 1; 5326 alu.src[1].sel = tmp1; 5327 alu.src[1].chan = 1; 5328 alu.src[2].sel = tmp1; 5329 alu.src[2].chan = 0; 5330 5331 alu.last = 1; 5332 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5333 return r; 5334 5335 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 5336 if (ctx->bc->chip_class == CAYMAN) { 5337 for (j = 0 ; j < 4; j++) { 5338 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5339 alu.op = ALU_OP2_MULHI_UINT; 5340 5341 alu.dst.sel = tmp0; 5342 alu.dst.chan = j; 5343 alu.dst.write = (j == 2); 5344 5345 alu.src[0].sel = tmp0; 5346 alu.src[0].chan = 0; 5347 5348 if (signed_op) { 5349 alu.src[1].sel = tmp2; 5350 alu.src[1].chan = 0; 5351 } else { 5352 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5353 } 5354 5355 alu.last = (j == 3); 5356 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5357 return r; 5358 } 5359 } else { 5360 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5361 alu.op = ALU_OP2_MULHI_UINT; 5362 5363 alu.dst.sel = tmp0; 5364 alu.dst.chan = 2; 5365 alu.dst.write = 1; 5366 5367 alu.src[0].sel = tmp0; 5368 alu.src[0].chan = 0; 5369 5370 if (signed_op) { 5371 alu.src[1].sel = tmp2; 5372 alu.src[1].chan = 0; 5373 } else { 5374 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5375 } 5376 5377 alu.last = 1; 5378 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5379 return r; 5380 } 5381 5382 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 5383 if (ctx->bc->chip_class == CAYMAN) { 5384 for (j = 0 ; j < 4; j++) { 5385 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5386 alu.op = ALU_OP2_MULLO_UINT; 5387 5388 alu.dst.sel = tmp0; 5389 alu.dst.chan = j; 5390 alu.dst.write = (j == 1); 5391 5392 if (signed_op) { 5393 alu.src[0].sel = tmp2; 5394 alu.src[0].chan = 1; 5395 } else { 5396 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5397 } 5398 5399 alu.src[1].sel = tmp0; 5400 alu.src[1].chan = 2; 5401 5402 alu.last = (j == 3); 5403 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5404 return r; 5405 } 5406 } else { 5407 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5408 alu.op = ALU_OP2_MULLO_UINT; 5409 5410 alu.dst.sel = tmp0; 5411 alu.dst.chan = 1; 5412 alu.dst.write = 1; 5413 5414 if (signed_op) { 5415 alu.src[0].sel = tmp2; 5416 alu.src[0].chan = 1; 5417 } else { 5418 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5419 } 5420 5421 alu.src[1].sel = tmp0; 5422 alu.src[1].chan = 2; 5423 5424 alu.last = 1; 5425 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5426 return r; 5427 } 5428 5429 /* 12. tmp0.w = src1 - tmp0.y = r */ 5430 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5431 alu.op = ALU_OP2_SUB_INT; 5432 5433 alu.dst.sel = tmp0; 5434 alu.dst.chan = 3; 5435 alu.dst.write = 1; 5436 5437 if (signed_op) { 5438 alu.src[0].sel = tmp2; 5439 alu.src[0].chan = 0; 5440 } else { 5441 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5442 } 5443 5444 alu.src[1].sel = tmp0; 5445 alu.src[1].chan = 1; 5446 5447 alu.last = 1; 5448 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5449 return r; 5450 5451 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 5452 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5453 alu.op = ALU_OP2_SETGE_UINT; 5454 5455 alu.dst.sel = tmp1; 5456 alu.dst.chan = 0; 5457 alu.dst.write = 1; 5458 5459 alu.src[0].sel = tmp0; 5460 alu.src[0].chan = 3; 5461 if (signed_op) { 5462 alu.src[1].sel = tmp2; 5463 alu.src[1].chan = 1; 5464 } else { 5465 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5466 } 5467 5468 alu.last = 1; 5469 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5470 return r; 5471 5472 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 5473 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5474 alu.op = ALU_OP2_SETGE_UINT; 5475 5476 alu.dst.sel = tmp1; 5477 alu.dst.chan = 1; 5478 alu.dst.write = 1; 5479 5480 if (signed_op) { 5481 alu.src[0].sel = tmp2; 5482 alu.src[0].chan = 0; 5483 } else { 5484 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5485 } 5486 5487 alu.src[1].sel = tmp0; 5488 alu.src[1].chan = 1; 5489 5490 alu.last = 1; 5491 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5492 return r; 5493 5494 if (mod) { /* UMOD */ 5495 5496 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 5497 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5498 alu.op = ALU_OP2_SUB_INT; 5499 5500 alu.dst.sel = tmp1; 5501 alu.dst.chan = 2; 5502 alu.dst.write = 1; 5503 5504 alu.src[0].sel = tmp0; 5505 alu.src[0].chan = 3; 5506 5507 if (signed_op) { 5508 alu.src[1].sel = tmp2; 5509 alu.src[1].chan = 1; 5510 } else { 5511 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5512 } 5513 5514 alu.last = 1; 5515 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5516 return r; 5517 5518 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 5519 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5520 alu.op = ALU_OP2_ADD_INT; 5521 5522 alu.dst.sel = tmp1; 5523 alu.dst.chan = 3; 5524 alu.dst.write = 1; 5525 5526 alu.src[0].sel = tmp0; 5527 alu.src[0].chan = 3; 5528 if (signed_op) { 5529 alu.src[1].sel = tmp2; 5530 alu.src[1].chan = 1; 5531 } else { 5532 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5533 } 5534 5535 alu.last = 1; 5536 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5537 return r; 5538 5539 } else { /* UDIV */ 5540 5541 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 5542 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5543 alu.op = ALU_OP2_ADD_INT; 5544 5545 alu.dst.sel = tmp1; 5546 alu.dst.chan = 2; 5547 alu.dst.write = 1; 5548 5549 alu.src[0].sel = tmp0; 5550 alu.src[0].chan = 2; 5551 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 5552 5553 alu.last = 1; 5554 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5555 return r; 5556 5557 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 5558 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5559 alu.op = ALU_OP2_ADD_INT; 5560 5561 alu.dst.sel = tmp1; 5562 alu.dst.chan = 3; 5563 alu.dst.write = 1; 5564 5565 alu.src[0].sel = tmp0; 5566 alu.src[0].chan = 2; 5567 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 5568 5569 alu.last = 1; 5570 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5571 return r; 5572 5573 } 5574 5575 /* 17. tmp1.x = tmp1.x & tmp1.y */ 5576 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5577 alu.op = ALU_OP2_AND_INT; 5578 5579 alu.dst.sel = tmp1; 5580 alu.dst.chan = 0; 5581 alu.dst.write = 1; 5582 5583 alu.src[0].sel = tmp1; 5584 alu.src[0].chan = 0; 5585 alu.src[1].sel = tmp1; 5586 alu.src[1].chan = 1; 5587 5588 alu.last = 1; 5589 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5590 return r; 5591 5592 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 5593 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 5594 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5595 alu.op = ALU_OP3_CNDE_INT; 5596 alu.is_op3 = 1; 5597 5598 alu.dst.sel = tmp0; 5599 alu.dst.chan = 2; 5600 alu.dst.write = 1; 5601 5602 alu.src[0].sel = tmp1; 5603 alu.src[0].chan = 0; 5604 alu.src[1].sel = tmp0; 5605 alu.src[1].chan = mod ? 3 : 2; 5606 alu.src[2].sel = tmp1; 5607 alu.src[2].chan = 2; 5608 5609 alu.last = 1; 5610 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5611 return r; 5612 5613 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 5614 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5615 alu.op = ALU_OP3_CNDE_INT; 5616 alu.is_op3 = 1; 5617 5618 if (signed_op) { 5619 alu.dst.sel = tmp0; 5620 alu.dst.chan = 2; 5621 alu.dst.write = 1; 5622 } else { 5623 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5624 } 5625 5626 alu.src[0].sel = tmp1; 5627 alu.src[0].chan = 1; 5628 alu.src[1].sel = tmp1; 5629 alu.src[1].chan = 3; 5630 alu.src[2].sel = tmp0; 5631 alu.src[2].chan = 2; 5632 5633 alu.last = 1; 5634 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5635 return r; 5636 5637 if (signed_op) { 5638 5639 /* fix the sign of the result */ 5640 5641 if (mod) { 5642 5643 /* tmp0.x = -tmp0.z */ 5644 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5645 alu.op = ALU_OP2_SUB_INT; 5646 5647 alu.dst.sel = tmp0; 5648 alu.dst.chan = 0; 5649 alu.dst.write = 1; 5650 5651 alu.src[0].sel = V_SQ_ALU_SRC_0; 5652 alu.src[1].sel = tmp0; 5653 alu.src[1].chan = 2; 5654 5655 alu.last = 1; 5656 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5657 return r; 5658 5659 /* sign of the remainder is the same as the sign of src0 */ 5660 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 5661 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5662 alu.op = ALU_OP3_CNDGE_INT; 5663 alu.is_op3 = 1; 5664 5665 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5666 5667 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5668 alu.src[1].sel = tmp0; 5669 alu.src[1].chan = 2; 5670 alu.src[2].sel = tmp0; 5671 alu.src[2].chan = 0; 5672 5673 alu.last = 1; 5674 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5675 return r; 5676 5677 } else { 5678 5679 /* tmp0.x = -tmp0.z */ 5680 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5681 alu.op = ALU_OP2_SUB_INT; 5682 5683 alu.dst.sel = tmp0; 5684 alu.dst.chan = 0; 5685 alu.dst.write = 1; 5686 5687 alu.src[0].sel = V_SQ_ALU_SRC_0; 5688 alu.src[1].sel = tmp0; 5689 alu.src[1].chan = 2; 5690 5691 alu.last = 1; 5692 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5693 return r; 5694 5695 /* fix the quotient sign (same as the sign of src0*src1) */ 5696 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 5697 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5698 alu.op = ALU_OP3_CNDGE_INT; 5699 alu.is_op3 = 1; 5700 5701 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5702 5703 alu.src[0].sel = tmp2; 5704 alu.src[0].chan = 2; 5705 alu.src[1].sel = tmp0; 5706 alu.src[1].chan = 2; 5707 alu.src[2].sel = tmp0; 5708 alu.src[2].chan = 0; 5709 5710 alu.last = 1; 5711 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5712 return r; 5713 } 5714 } 5715 } 5716 return 0; 5717} 5718 5719static int tgsi_udiv(struct r600_shader_ctx *ctx) 5720{ 5721 return tgsi_divmod(ctx, 0, 0); 5722} 5723 5724static int tgsi_umod(struct r600_shader_ctx *ctx) 5725{ 5726 return tgsi_divmod(ctx, 1, 0); 5727} 5728 5729static int tgsi_idiv(struct r600_shader_ctx *ctx) 5730{ 5731 return tgsi_divmod(ctx, 0, 1); 5732} 5733 5734static int tgsi_imod(struct r600_shader_ctx *ctx) 5735{ 5736 return tgsi_divmod(ctx, 1, 1); 5737} 5738 5739 5740static int tgsi_f2i(struct r600_shader_ctx *ctx) 5741{ 5742 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5743 struct r600_bytecode_alu alu; 5744 int i, r; 5745 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5746 int last_inst = tgsi_last_instruction(write_mask); 5747 5748 for (i = 0; i < 4; i++) { 5749 if (!(write_mask & (1<<i))) 5750 continue; 5751 5752 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5753 alu.op = ALU_OP1_TRUNC; 5754 5755 alu.dst.sel = ctx->temp_reg; 5756 alu.dst.chan = i; 5757 alu.dst.write = 1; 5758 5759 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5760 if (i == last_inst) 5761 alu.last = 1; 5762 r = r600_bytecode_add_alu(ctx->bc, &alu); 5763 if (r) 5764 return r; 5765 } 5766 5767 for (i = 0; i < 4; i++) { 5768 if (!(write_mask & (1<<i))) 5769 continue; 5770 5771 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5772 alu.op = ctx->inst_info->op; 5773 5774 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5775 5776 alu.src[0].sel = ctx->temp_reg; 5777 alu.src[0].chan = i; 5778 5779 if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT) 5780 alu.last = 1; 5781 r = r600_bytecode_add_alu(ctx->bc, &alu); 5782 if (r) 5783 return r; 5784 } 5785 5786 return 0; 5787} 5788 5789static int tgsi_iabs(struct r600_shader_ctx *ctx) 5790{ 5791 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5792 struct r600_bytecode_alu alu; 5793 int i, r; 5794 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5795 int last_inst = tgsi_last_instruction(write_mask); 5796 5797 /* tmp = -src */ 5798 for (i = 0; i < 4; i++) { 5799 if (!(write_mask & (1<<i))) 5800 continue; 5801 5802 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5803 alu.op = ALU_OP2_SUB_INT; 5804 5805 alu.dst.sel = ctx->temp_reg; 5806 alu.dst.chan = i; 5807 alu.dst.write = 1; 5808 5809 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5810 alu.src[0].sel = V_SQ_ALU_SRC_0; 5811 5812 if (i == last_inst) 5813 alu.last = 1; 5814 r = r600_bytecode_add_alu(ctx->bc, &alu); 5815 if (r) 5816 return r; 5817 } 5818 5819 /* dst = (src >= 0 ? src : tmp) */ 5820 for (i = 0; i < 4; i++) { 5821 if (!(write_mask & (1<<i))) 5822 continue; 5823 5824 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5825 alu.op = ALU_OP3_CNDGE_INT; 5826 alu.is_op3 = 1; 5827 alu.dst.write = 1; 5828 5829 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5830 5831 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5832 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5833 alu.src[2].sel = ctx->temp_reg; 5834 alu.src[2].chan = i; 5835 5836 if (i == last_inst) 5837 alu.last = 1; 5838 r = r600_bytecode_add_alu(ctx->bc, &alu); 5839 if (r) 5840 return r; 5841 } 5842 return 0; 5843} 5844 5845static int tgsi_issg(struct r600_shader_ctx *ctx) 5846{ 5847 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5848 struct r600_bytecode_alu alu; 5849 int i, r; 5850 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5851 int last_inst = tgsi_last_instruction(write_mask); 5852 5853 /* tmp = (src >= 0 ? src : -1) */ 5854 for (i = 0; i < 4; i++) { 5855 if (!(write_mask & (1<<i))) 5856 continue; 5857 5858 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5859 alu.op = ALU_OP3_CNDGE_INT; 5860 alu.is_op3 = 1; 5861 5862 alu.dst.sel = ctx->temp_reg; 5863 alu.dst.chan = i; 5864 alu.dst.write = 1; 5865 5866 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5867 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5868 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 5869 5870 if (i == last_inst) 5871 alu.last = 1; 5872 r = r600_bytecode_add_alu(ctx->bc, &alu); 5873 if (r) 5874 return r; 5875 } 5876 5877 /* dst = (tmp > 0 ? 1 : tmp) */ 5878 for (i = 0; i < 4; i++) { 5879 if (!(write_mask & (1<<i))) 5880 continue; 5881 5882 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5883 alu.op = ALU_OP3_CNDGT_INT; 5884 alu.is_op3 = 1; 5885 alu.dst.write = 1; 5886 5887 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5888 5889 alu.src[0].sel = ctx->temp_reg; 5890 alu.src[0].chan = i; 5891 5892 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 5893 5894 alu.src[2].sel = ctx->temp_reg; 5895 alu.src[2].chan = i; 5896 5897 if (i == last_inst) 5898 alu.last = 1; 5899 r = r600_bytecode_add_alu(ctx->bc, &alu); 5900 if (r) 5901 return r; 5902 } 5903 return 0; 5904} 5905 5906 5907 5908static int tgsi_ssg(struct r600_shader_ctx *ctx) 5909{ 5910 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5911 struct r600_bytecode_alu alu; 5912 int i, r; 5913 5914 /* tmp = (src > 0 ? 1 : src) */ 5915 for (i = 0; i < 4; i++) { 5916 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5917 alu.op = ALU_OP3_CNDGT; 5918 alu.is_op3 = 1; 5919 5920 alu.dst.sel = ctx->temp_reg; 5921 alu.dst.chan = i; 5922 5923 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5924 alu.src[1].sel = V_SQ_ALU_SRC_1; 5925 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 5926 5927 if (i == 3) 5928 alu.last = 1; 5929 r = r600_bytecode_add_alu(ctx->bc, &alu); 5930 if (r) 5931 return r; 5932 } 5933 5934 /* dst = (-tmp > 0 ? -1 : tmp) */ 5935 for (i = 0; i < 4; i++) { 5936 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5937 alu.op = ALU_OP3_CNDGT; 5938 alu.is_op3 = 1; 5939 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5940 5941 alu.src[0].sel = ctx->temp_reg; 5942 alu.src[0].chan = i; 5943 alu.src[0].neg = 1; 5944 5945 alu.src[1].sel = V_SQ_ALU_SRC_1; 5946 alu.src[1].neg = 1; 5947 5948 alu.src[2].sel = ctx->temp_reg; 5949 alu.src[2].chan = i; 5950 5951 if (i == 3) 5952 alu.last = 1; 5953 r = r600_bytecode_add_alu(ctx->bc, &alu); 5954 if (r) 5955 return r; 5956 } 5957 return 0; 5958} 5959 5960static int tgsi_bfi(struct r600_shader_ctx *ctx) 5961{ 5962 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5963 struct r600_bytecode_alu alu; 5964 int i, r, t1, t2; 5965 5966 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5967 int last_inst = tgsi_last_instruction(write_mask); 5968 5969 t1 = ctx->temp_reg; 5970 5971 for (i = 0; i < 4; i++) { 5972 if (!(write_mask & (1<<i))) 5973 continue; 5974 5975 /* create mask tmp */ 5976 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5977 alu.op = ALU_OP2_BFM_INT; 5978 alu.dst.sel = t1; 5979 alu.dst.chan = i; 5980 alu.dst.write = 1; 5981 alu.last = i == last_inst; 5982 5983 r600_bytecode_src(&alu.src[0], &ctx->src[3], i); 5984 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 5985 5986 r = r600_bytecode_add_alu(ctx->bc, &alu); 5987 if (r) 5988 return r; 5989 } 5990 5991 t2 = r600_get_temp(ctx); 5992 5993 for (i = 0; i < 4; i++) { 5994 if (!(write_mask & (1<<i))) 5995 continue; 5996 5997 /* shift insert left */ 5998 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5999 alu.op = ALU_OP2_LSHL_INT; 6000 alu.dst.sel = t2; 6001 alu.dst.chan = i; 6002 alu.dst.write = 1; 6003 alu.last = i == last_inst; 6004 6005 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 6006 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6007 6008 r = r600_bytecode_add_alu(ctx->bc, &alu); 6009 if (r) 6010 return r; 6011 } 6012 6013 for (i = 0; i < 4; i++) { 6014 if (!(write_mask & (1<<i))) 6015 continue; 6016 6017 /* actual bitfield insert */ 6018 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6019 alu.op = ALU_OP3_BFI_INT; 6020 alu.is_op3 = 1; 6021 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6022 alu.dst.chan = i; 6023 alu.dst.write = 1; 6024 alu.last = i == last_inst; 6025 6026 alu.src[0].sel = t1; 6027 alu.src[0].chan = i; 6028 alu.src[1].sel = t2; 6029 alu.src[1].chan = i; 6030 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 6031 6032 r = r600_bytecode_add_alu(ctx->bc, &alu); 6033 if (r) 6034 return r; 6035 } 6036 6037 return 0; 6038} 6039 6040static int tgsi_msb(struct r600_shader_ctx *ctx) 6041{ 6042 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6043 struct r600_bytecode_alu alu; 6044 int i, r, t1, t2; 6045 6046 unsigned write_mask = inst->Dst[0].Register.WriteMask; 6047 int last_inst = tgsi_last_instruction(write_mask); 6048 6049 assert(ctx->inst_info->op == ALU_OP1_FFBH_INT || 6050 ctx->inst_info->op == ALU_OP1_FFBH_UINT); 6051 6052 t1 = ctx->temp_reg; 6053 6054 /* bit position is indexed from lsb by TGSI, and from msb by the hardware */ 6055 for (i = 0; i < 4; i++) { 6056 if (!(write_mask & (1<<i))) 6057 continue; 6058 6059 /* t1 = FFBH_INT / FFBH_UINT */ 6060 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6061 alu.op = ctx->inst_info->op; 6062 alu.dst.sel = t1; 6063 alu.dst.chan = i; 6064 alu.dst.write = 1; 6065 alu.last = i == last_inst; 6066 6067 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6068 6069 r = r600_bytecode_add_alu(ctx->bc, &alu); 6070 if (r) 6071 return r; 6072 } 6073 6074 t2 = r600_get_temp(ctx); 6075 6076 for (i = 0; i < 4; i++) { 6077 if (!(write_mask & (1<<i))) 6078 continue; 6079 6080 /* t2 = 31 - t1 */ 6081 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6082 alu.op = ALU_OP2_SUB_INT; 6083 alu.dst.sel = t2; 6084 alu.dst.chan = i; 6085 alu.dst.write = 1; 6086 alu.last = i == last_inst; 6087 6088 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 6089 alu.src[0].value = 31; 6090 alu.src[1].sel = t1; 6091 alu.src[1].chan = i; 6092 6093 r = r600_bytecode_add_alu(ctx->bc, &alu); 6094 if (r) 6095 return r; 6096 } 6097 6098 for (i = 0; i < 4; i++) { 6099 if (!(write_mask & (1<<i))) 6100 continue; 6101 6102 /* result = t1 >= 0 ? t2 : t1 */ 6103 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6104 alu.op = ALU_OP3_CNDGE_INT; 6105 alu.is_op3 = 1; 6106 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6107 alu.dst.chan = i; 6108 alu.dst.write = 1; 6109 alu.last = i == last_inst; 6110 6111 alu.src[0].sel = t1; 6112 alu.src[0].chan = i; 6113 alu.src[1].sel = t2; 6114 alu.src[1].chan = i; 6115 alu.src[2].sel = t1; 6116 alu.src[2].chan = i; 6117 6118 r = r600_bytecode_add_alu(ctx->bc, &alu); 6119 if (r) 6120 return r; 6121 } 6122 6123 return 0; 6124} 6125 6126static int tgsi_interp_egcm(struct r600_shader_ctx *ctx) 6127{ 6128 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6129 struct r600_bytecode_alu alu; 6130 int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti; 6131 unsigned location; 6132 int input; 6133 6134 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); 6135 6136 input = inst->Src[0].Register.Index; 6137 6138 /* Interpolators have been marked for use already by allocate_system_value_inputs */ 6139 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6140 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6141 location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */ 6142 } 6143 else { 6144 location = TGSI_INTERPOLATE_LOC_CENTROID; 6145 } 6146 6147 k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location); 6148 if (k < 0) 6149 k = 0; 6150 interp_gpr = ctx->eg_interpolators[k].ij_index / 2; 6151 interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2); 6152 6153 /* NOTE: currently offset is not perspective correct */ 6154 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6155 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6156 int sample_gpr = -1; 6157 int gradientsH, gradientsV; 6158 struct r600_bytecode_tex tex; 6159 6160 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6161 sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]); 6162 } 6163 6164 gradientsH = r600_get_temp(ctx); 6165 gradientsV = r600_get_temp(ctx); 6166 for (i = 0; i < 2; i++) { 6167 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6168 tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V; 6169 tex.src_gpr = interp_gpr; 6170 tex.src_sel_x = interp_base_chan + 0; 6171 tex.src_sel_y = interp_base_chan + 1; 6172 tex.src_sel_z = 0; 6173 tex.src_sel_w = 0; 6174 tex.dst_gpr = i == 0 ? gradientsH : gradientsV; 6175 tex.dst_sel_x = 0; 6176 tex.dst_sel_y = 1; 6177 tex.dst_sel_z = 7; 6178 tex.dst_sel_w = 7; 6179 tex.inst_mod = 1; // Use per pixel gradient calculation 6180 tex.sampler_id = 0; 6181 tex.resource_id = tex.sampler_id; 6182 r = r600_bytecode_add_tex(ctx->bc, &tex); 6183 if (r) 6184 return r; 6185 } 6186 6187 for (i = 0; i < 2; i++) { 6188 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6189 alu.op = ALU_OP3_MULADD; 6190 alu.is_op3 = 1; 6191 alu.src[0].sel = gradientsH; 6192 alu.src[0].chan = i; 6193 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6194 alu.src[1].sel = sample_gpr; 6195 alu.src[1].chan = 2; 6196 } 6197 else { 6198 r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 6199 } 6200 alu.src[2].sel = interp_gpr; 6201 alu.src[2].chan = interp_base_chan + i; 6202 alu.dst.sel = ctx->temp_reg; 6203 alu.dst.chan = i; 6204 alu.last = i == 1; 6205 6206 r = r600_bytecode_add_alu(ctx->bc, &alu); 6207 if (r) 6208 return r; 6209 } 6210 6211 for (i = 0; i < 2; i++) { 6212 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6213 alu.op = ALU_OP3_MULADD; 6214 alu.is_op3 = 1; 6215 alu.src[0].sel = gradientsV; 6216 alu.src[0].chan = i; 6217 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6218 alu.src[1].sel = sample_gpr; 6219 alu.src[1].chan = 3; 6220 } 6221 else { 6222 r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 6223 } 6224 alu.src[2].sel = ctx->temp_reg; 6225 alu.src[2].chan = i; 6226 alu.dst.sel = ctx->temp_reg; 6227 alu.dst.chan = i; 6228 alu.last = i == 1; 6229 6230 r = r600_bytecode_add_alu(ctx->bc, &alu); 6231 if (r) 6232 return r; 6233 } 6234 } 6235 6236 tmp = r600_get_temp(ctx); 6237 for (i = 0; i < 8; i++) { 6238 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6239 alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY; 6240 6241 alu.dst.sel = tmp; 6242 if ((i > 1 && i < 6)) { 6243 alu.dst.write = 1; 6244 } 6245 else { 6246 alu.dst.write = 0; 6247 } 6248 alu.dst.chan = i % 4; 6249 6250 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6251 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6252 alu.src[0].sel = ctx->temp_reg; 6253 alu.src[0].chan = 1 - (i % 2); 6254 } else { 6255 alu.src[0].sel = interp_gpr; 6256 alu.src[0].chan = interp_base_chan + 1 - (i % 2); 6257 } 6258 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 6259 alu.src[1].chan = 0; 6260 6261 alu.last = i % 4 == 3; 6262 alu.bank_swizzle_force = SQ_ALU_VEC_210; 6263 6264 r = r600_bytecode_add_alu(ctx->bc, &alu); 6265 if (r) 6266 return r; 6267 } 6268 6269 // INTERP can't swizzle dst 6270 lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6271 for (i = 0; i <= lasti; i++) { 6272 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6273 continue; 6274 6275 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6276 alu.op = ALU_OP1_MOV; 6277 alu.src[0].sel = tmp; 6278 alu.src[0].chan = ctx->src[0].swizzle[i]; 6279 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6280 alu.dst.write = 1; 6281 alu.last = i == lasti; 6282 r = r600_bytecode_add_alu(ctx->bc, &alu); 6283 if (r) 6284 return r; 6285 } 6286 6287 return 0; 6288} 6289 6290 6291static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 6292{ 6293 struct r600_bytecode_alu alu; 6294 int i, r; 6295 6296 for (i = 0; i < 4; i++) { 6297 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6298 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 6299 alu.op = ALU_OP0_NOP; 6300 alu.dst.chan = i; 6301 } else { 6302 alu.op = ALU_OP1_MOV; 6303 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6304 alu.src[0].sel = ctx->temp_reg; 6305 alu.src[0].chan = i; 6306 } 6307 if (i == 3) { 6308 alu.last = 1; 6309 } 6310 r = r600_bytecode_add_alu(ctx->bc, &alu); 6311 if (r) 6312 return r; 6313 } 6314 return 0; 6315} 6316 6317static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, 6318 unsigned temp, int chan, 6319 struct r600_bytecode_alu_src *bc_src, 6320 const struct r600_shader_src *shader_src) 6321{ 6322 struct r600_bytecode_alu alu; 6323 int r; 6324 6325 r600_bytecode_src(bc_src, shader_src, chan); 6326 6327 /* op3 operands don't support abs modifier */ 6328 if (bc_src->abs) { 6329 assert(temp!=0); /* we actually need the extra register, make sure it is allocated. */ 6330 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6331 alu.op = ALU_OP1_MOV; 6332 alu.dst.sel = temp; 6333 alu.dst.chan = chan; 6334 alu.dst.write = 1; 6335 6336 alu.src[0] = *bc_src; 6337 alu.last = true; // sufficient? 6338 r = r600_bytecode_add_alu(ctx->bc, &alu); 6339 if (r) 6340 return r; 6341 6342 memset(bc_src, 0, sizeof(*bc_src)); 6343 bc_src->sel = temp; 6344 bc_src->chan = chan; 6345 } 6346 return 0; 6347} 6348 6349static int tgsi_op3(struct r600_shader_ctx *ctx) 6350{ 6351 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6352 struct r600_bytecode_alu alu; 6353 int i, j, r; 6354 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6355 int temp_regs[4]; 6356 6357 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6358 temp_regs[j] = 0; 6359 if (ctx->src[j].abs) 6360 temp_regs[j] = r600_get_temp(ctx); 6361 } 6362 for (i = 0; i < lasti + 1; i++) { 6363 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6364 continue; 6365 6366 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6367 alu.op = ctx->inst_info->op; 6368 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6369 r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]); 6370 if (r) 6371 return r; 6372 } 6373 6374 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6375 alu.dst.chan = i; 6376 alu.dst.write = 1; 6377 alu.is_op3 = 1; 6378 if (i == lasti) { 6379 alu.last = 1; 6380 } 6381 r = r600_bytecode_add_alu(ctx->bc, &alu); 6382 if (r) 6383 return r; 6384 } 6385 return 0; 6386} 6387 6388static int tgsi_dp(struct r600_shader_ctx *ctx) 6389{ 6390 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6391 struct r600_bytecode_alu alu; 6392 int i, j, r; 6393 6394 for (i = 0; i < 4; i++) { 6395 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6396 alu.op = ctx->inst_info->op; 6397 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6398 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 6399 } 6400 6401 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6402 alu.dst.chan = i; 6403 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 6404 /* handle some special cases */ 6405 switch (inst->Instruction.Opcode) { 6406 case TGSI_OPCODE_DP2: 6407 if (i > 1) { 6408 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 6409 alu.src[0].chan = alu.src[1].chan = 0; 6410 } 6411 break; 6412 case TGSI_OPCODE_DP3: 6413 if (i > 2) { 6414 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 6415 alu.src[0].chan = alu.src[1].chan = 0; 6416 } 6417 break; 6418 case TGSI_OPCODE_DPH: 6419 if (i == 3) { 6420 alu.src[0].sel = V_SQ_ALU_SRC_1; 6421 alu.src[0].chan = 0; 6422 alu.src[0].neg = 0; 6423 } 6424 break; 6425 default: 6426 break; 6427 } 6428 if (i == 3) { 6429 alu.last = 1; 6430 } 6431 r = r600_bytecode_add_alu(ctx->bc, &alu); 6432 if (r) 6433 return r; 6434 } 6435 return 0; 6436} 6437 6438static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 6439 unsigned index) 6440{ 6441 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6442 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 6443 inst->Src[index].Register.File != TGSI_FILE_INPUT && 6444 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 6445 ctx->src[index].neg || ctx->src[index].abs || 6446 (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == TGSI_PROCESSOR_GEOMETRY); 6447} 6448 6449static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 6450 unsigned index) 6451{ 6452 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6453 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 6454} 6455 6456static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) 6457{ 6458 struct r600_bytecode_vtx vtx; 6459 struct r600_bytecode_alu alu; 6460 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6461 int src_gpr, r, i; 6462 int id = tgsi_tex_get_src_gpr(ctx, 1); 6463 6464 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 6465 if (src_requires_loading) { 6466 for (i = 0; i < 4; i++) { 6467 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6468 alu.op = ALU_OP1_MOV; 6469 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6470 alu.dst.sel = ctx->temp_reg; 6471 alu.dst.chan = i; 6472 if (i == 3) 6473 alu.last = 1; 6474 alu.dst.write = 1; 6475 r = r600_bytecode_add_alu(ctx->bc, &alu); 6476 if (r) 6477 return r; 6478 } 6479 src_gpr = ctx->temp_reg; 6480 } 6481 6482 memset(&vtx, 0, sizeof(vtx)); 6483 vtx.op = FETCH_OP_VFETCH; 6484 vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; 6485 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 6486 vtx.src_gpr = src_gpr; 6487 vtx.mega_fetch_count = 16; 6488 vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 6489 vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 6490 vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 6491 vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 6492 vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 6493 vtx.use_const_fields = 1; 6494 6495 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 6496 return r; 6497 6498 if (ctx->bc->chip_class >= EVERGREEN) 6499 return 0; 6500 6501 for (i = 0; i < 4; i++) { 6502 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6503 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6504 continue; 6505 6506 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6507 alu.op = ALU_OP2_AND_INT; 6508 6509 alu.dst.chan = i; 6510 alu.dst.sel = vtx.dst_gpr; 6511 alu.dst.write = 1; 6512 6513 alu.src[0].sel = vtx.dst_gpr; 6514 alu.src[0].chan = i; 6515 6516 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL; 6517 alu.src[1].sel += (id * 2); 6518 alu.src[1].chan = i % 4; 6519 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6520 6521 if (i == lasti) 6522 alu.last = 1; 6523 r = r600_bytecode_add_alu(ctx->bc, &alu); 6524 if (r) 6525 return r; 6526 } 6527 6528 if (inst->Dst[0].Register.WriteMask & 3) { 6529 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6530 alu.op = ALU_OP2_OR_INT; 6531 6532 alu.dst.chan = 3; 6533 alu.dst.sel = vtx.dst_gpr; 6534 alu.dst.write = 1; 6535 6536 alu.src[0].sel = vtx.dst_gpr; 6537 alu.src[0].chan = 3; 6538 6539 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1; 6540 alu.src[1].chan = 0; 6541 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6542 6543 alu.last = 1; 6544 r = r600_bytecode_add_alu(ctx->bc, &alu); 6545 if (r) 6546 return r; 6547 } 6548 return 0; 6549} 6550 6551static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) 6552{ 6553 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6554 struct r600_bytecode_alu alu; 6555 int r; 6556 int id = tgsi_tex_get_src_gpr(ctx, 1); 6557 6558 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6559 alu.op = ALU_OP1_MOV; 6560 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 6561 if (ctx->bc->chip_class >= EVERGREEN) { 6562 /* channel 0 or 2 of each word */ 6563 alu.src[0].sel += (id / 2); 6564 alu.src[0].chan = (id % 2) * 2; 6565 } else { 6566 /* r600 we have them at channel 2 of the second dword */ 6567 alu.src[0].sel += (id * 2) + 1; 6568 alu.src[0].chan = 1; 6569 } 6570 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6571 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 6572 alu.last = 1; 6573 r = r600_bytecode_add_alu(ctx->bc, &alu); 6574 if (r) 6575 return r; 6576 return 0; 6577} 6578 6579static int tgsi_tex(struct r600_shader_ctx *ctx) 6580{ 6581 static float one_point_five = 1.5f; 6582 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6583 struct r600_bytecode_tex tex; 6584 struct r600_bytecode_alu alu; 6585 unsigned src_gpr; 6586 int r, i, j; 6587 int opcode; 6588 bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing && 6589 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 6590 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || 6591 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); 6592 6593 bool txf_add_offsets = inst->Texture.NumOffsets && 6594 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 6595 inst->Texture.Texture != TGSI_TEXTURE_BUFFER; 6596 6597 /* Texture fetch instructions can only use gprs as source. 6598 * Also they cannot negate the source or take the absolute value */ 6599 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && 6600 inst->Instruction.Opcode != TGSI_OPCODE_TXQS && 6601 tgsi_tex_src_requires_loading(ctx, 0)) || 6602 read_compressed_msaa || txf_add_offsets; 6603 6604 boolean src_loaded = FALSE; 6605 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; 6606 int8_t offset_x = 0, offset_y = 0, offset_z = 0; 6607 boolean has_txq_cube_array_z = false; 6608 unsigned sampler_index_mode; 6609 6610 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && 6611 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6612 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) 6613 if (inst->Dst[0].Register.WriteMask & 4) { 6614 ctx->shader->has_txq_cube_array_z_comp = true; 6615 has_txq_cube_array_z = true; 6616 } 6617 6618 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 6619 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 6620 inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || 6621 inst->Instruction.Opcode == TGSI_OPCODE_TG4) 6622 sampler_src_reg = 2; 6623 6624 /* TGSI moves the sampler to src reg 3 for TXD */ 6625 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) 6626 sampler_src_reg = 3; 6627 6628 sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 6629 6630 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 6631 6632 if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { 6633 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { 6634 ctx->shader->uses_tex_buffers = true; 6635 return r600_do_buffer_txq(ctx); 6636 } 6637 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 6638 if (ctx->bc->chip_class < EVERGREEN) 6639 ctx->shader->uses_tex_buffers = true; 6640 return do_vtx_fetch_inst(ctx, src_requires_loading); 6641 } 6642 } 6643 6644 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 6645 int out_chan; 6646 /* Add perspective divide */ 6647 if (ctx->bc->chip_class == CAYMAN) { 6648 out_chan = 2; 6649 for (i = 0; i < 3; i++) { 6650 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6651 alu.op = ALU_OP1_RECIP_IEEE; 6652 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6653 6654 alu.dst.sel = ctx->temp_reg; 6655 alu.dst.chan = i; 6656 if (i == 2) 6657 alu.last = 1; 6658 if (out_chan == i) 6659 alu.dst.write = 1; 6660 r = r600_bytecode_add_alu(ctx->bc, &alu); 6661 if (r) 6662 return r; 6663 } 6664 6665 } else { 6666 out_chan = 3; 6667 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6668 alu.op = ALU_OP1_RECIP_IEEE; 6669 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6670 6671 alu.dst.sel = ctx->temp_reg; 6672 alu.dst.chan = out_chan; 6673 alu.last = 1; 6674 alu.dst.write = 1; 6675 r = r600_bytecode_add_alu(ctx->bc, &alu); 6676 if (r) 6677 return r; 6678 } 6679 6680 for (i = 0; i < 3; i++) { 6681 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6682 alu.op = ALU_OP2_MUL; 6683 alu.src[0].sel = ctx->temp_reg; 6684 alu.src[0].chan = out_chan; 6685 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6686 alu.dst.sel = ctx->temp_reg; 6687 alu.dst.chan = i; 6688 alu.dst.write = 1; 6689 r = r600_bytecode_add_alu(ctx->bc, &alu); 6690 if (r) 6691 return r; 6692 } 6693 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6694 alu.op = ALU_OP1_MOV; 6695 alu.src[0].sel = V_SQ_ALU_SRC_1; 6696 alu.src[0].chan = 0; 6697 alu.dst.sel = ctx->temp_reg; 6698 alu.dst.chan = 3; 6699 alu.last = 1; 6700 alu.dst.write = 1; 6701 r = r600_bytecode_add_alu(ctx->bc, &alu); 6702 if (r) 6703 return r; 6704 src_loaded = TRUE; 6705 src_gpr = ctx->temp_reg; 6706 } 6707 6708 6709 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 6710 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6711 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6712 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 6713 inst->Instruction.Opcode != TGSI_OPCODE_TXQ && 6714 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { 6715 6716 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 6717 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 6718 6719 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 6720 for (i = 0; i < 4; i++) { 6721 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6722 alu.op = ALU_OP2_CUBE; 6723 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 6724 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 6725 alu.dst.sel = ctx->temp_reg; 6726 alu.dst.chan = i; 6727 if (i == 3) 6728 alu.last = 1; 6729 alu.dst.write = 1; 6730 r = r600_bytecode_add_alu(ctx->bc, &alu); 6731 if (r) 6732 return r; 6733 } 6734 6735 /* tmp1.z = RCP_e(|tmp1.z|) */ 6736 if (ctx->bc->chip_class == CAYMAN) { 6737 for (i = 0; i < 3; i++) { 6738 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6739 alu.op = ALU_OP1_RECIP_IEEE; 6740 alu.src[0].sel = ctx->temp_reg; 6741 alu.src[0].chan = 2; 6742 alu.src[0].abs = 1; 6743 alu.dst.sel = ctx->temp_reg; 6744 alu.dst.chan = i; 6745 if (i == 2) 6746 alu.dst.write = 1; 6747 if (i == 2) 6748 alu.last = 1; 6749 r = r600_bytecode_add_alu(ctx->bc, &alu); 6750 if (r) 6751 return r; 6752 } 6753 } else { 6754 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6755 alu.op = ALU_OP1_RECIP_IEEE; 6756 alu.src[0].sel = ctx->temp_reg; 6757 alu.src[0].chan = 2; 6758 alu.src[0].abs = 1; 6759 alu.dst.sel = ctx->temp_reg; 6760 alu.dst.chan = 2; 6761 alu.dst.write = 1; 6762 alu.last = 1; 6763 r = r600_bytecode_add_alu(ctx->bc, &alu); 6764 if (r) 6765 return r; 6766 } 6767 6768 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 6769 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 6770 * muladd has no writemask, have to use another temp 6771 */ 6772 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6773 alu.op = ALU_OP3_MULADD; 6774 alu.is_op3 = 1; 6775 6776 alu.src[0].sel = ctx->temp_reg; 6777 alu.src[0].chan = 0; 6778 alu.src[1].sel = ctx->temp_reg; 6779 alu.src[1].chan = 2; 6780 6781 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 6782 alu.src[2].chan = 0; 6783 alu.src[2].value = *(uint32_t *)&one_point_five; 6784 6785 alu.dst.sel = ctx->temp_reg; 6786 alu.dst.chan = 0; 6787 alu.dst.write = 1; 6788 6789 r = r600_bytecode_add_alu(ctx->bc, &alu); 6790 if (r) 6791 return r; 6792 6793 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6794 alu.op = ALU_OP3_MULADD; 6795 alu.is_op3 = 1; 6796 6797 alu.src[0].sel = ctx->temp_reg; 6798 alu.src[0].chan = 1; 6799 alu.src[1].sel = ctx->temp_reg; 6800 alu.src[1].chan = 2; 6801 6802 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 6803 alu.src[2].chan = 0; 6804 alu.src[2].value = *(uint32_t *)&one_point_five; 6805 6806 alu.dst.sel = ctx->temp_reg; 6807 alu.dst.chan = 1; 6808 alu.dst.write = 1; 6809 6810 alu.last = 1; 6811 r = r600_bytecode_add_alu(ctx->bc, &alu); 6812 if (r) 6813 return r; 6814 /* write initial compare value into Z component 6815 - W src 0 for shadow cube 6816 - X src 1 for shadow cube array */ 6817 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6818 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6819 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6820 alu.op = ALU_OP1_MOV; 6821 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) 6822 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 6823 else 6824 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6825 alu.dst.sel = ctx->temp_reg; 6826 alu.dst.chan = 2; 6827 alu.dst.write = 1; 6828 alu.last = 1; 6829 r = r600_bytecode_add_alu(ctx->bc, &alu); 6830 if (r) 6831 return r; 6832 } 6833 6834 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6835 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6836 if (ctx->bc->chip_class >= EVERGREEN) { 6837 int mytmp = r600_get_temp(ctx); 6838 static const float eight = 8.0f; 6839 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6840 alu.op = ALU_OP1_MOV; 6841 alu.src[0].sel = ctx->temp_reg; 6842 alu.src[0].chan = 3; 6843 alu.dst.sel = mytmp; 6844 alu.dst.chan = 0; 6845 alu.dst.write = 1; 6846 alu.last = 1; 6847 r = r600_bytecode_add_alu(ctx->bc, &alu); 6848 if (r) 6849 return r; 6850 6851 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ 6852 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6853 alu.op = ALU_OP3_MULADD; 6854 alu.is_op3 = 1; 6855 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6856 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6857 alu.src[1].chan = 0; 6858 alu.src[1].value = *(uint32_t *)&eight; 6859 alu.src[2].sel = mytmp; 6860 alu.src[2].chan = 0; 6861 alu.dst.sel = ctx->temp_reg; 6862 alu.dst.chan = 3; 6863 alu.dst.write = 1; 6864 alu.last = 1; 6865 r = r600_bytecode_add_alu(ctx->bc, &alu); 6866 if (r) 6867 return r; 6868 } else if (ctx->bc->chip_class < EVERGREEN) { 6869 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6870 tex.op = FETCH_OP_SET_CUBEMAP_INDEX; 6871 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6872 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6873 tex.src_gpr = r600_get_temp(ctx); 6874 tex.src_sel_x = 0; 6875 tex.src_sel_y = 0; 6876 tex.src_sel_z = 0; 6877 tex.src_sel_w = 0; 6878 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 6879 tex.coord_type_x = 1; 6880 tex.coord_type_y = 1; 6881 tex.coord_type_z = 1; 6882 tex.coord_type_w = 1; 6883 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6884 alu.op = ALU_OP1_MOV; 6885 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6886 alu.dst.sel = tex.src_gpr; 6887 alu.dst.chan = 0; 6888 alu.last = 1; 6889 alu.dst.write = 1; 6890 r = r600_bytecode_add_alu(ctx->bc, &alu); 6891 if (r) 6892 return r; 6893 6894 r = r600_bytecode_add_tex(ctx->bc, &tex); 6895 if (r) 6896 return r; 6897 } 6898 6899 } 6900 6901 /* for cube forms of lod and bias we need to route things */ 6902 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || 6903 inst->Instruction.Opcode == TGSI_OPCODE_TXL || 6904 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 6905 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { 6906 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6907 alu.op = ALU_OP1_MOV; 6908 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 6909 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 6910 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 6911 else 6912 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6913 alu.dst.sel = ctx->temp_reg; 6914 alu.dst.chan = 2; 6915 alu.last = 1; 6916 alu.dst.write = 1; 6917 r = r600_bytecode_add_alu(ctx->bc, &alu); 6918 if (r) 6919 return r; 6920 } 6921 6922 src_loaded = TRUE; 6923 src_gpr = ctx->temp_reg; 6924 } 6925 6926 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 6927 int temp_h = 0, temp_v = 0; 6928 int start_val = 0; 6929 6930 /* if we've already loaded the src (i.e. CUBE don't reload it). */ 6931 if (src_loaded == TRUE) 6932 start_val = 1; 6933 else 6934 src_loaded = TRUE; 6935 for (i = start_val; i < 3; i++) { 6936 int treg = r600_get_temp(ctx); 6937 6938 if (i == 0) 6939 src_gpr = treg; 6940 else if (i == 1) 6941 temp_h = treg; 6942 else 6943 temp_v = treg; 6944 6945 for (j = 0; j < 4; j++) { 6946 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6947 alu.op = ALU_OP1_MOV; 6948 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 6949 alu.dst.sel = treg; 6950 alu.dst.chan = j; 6951 if (j == 3) 6952 alu.last = 1; 6953 alu.dst.write = 1; 6954 r = r600_bytecode_add_alu(ctx->bc, &alu); 6955 if (r) 6956 return r; 6957 } 6958 } 6959 for (i = 1; i < 3; i++) { 6960 /* set gradients h/v */ 6961 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6962 tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H : 6963 FETCH_OP_SET_GRADIENTS_V; 6964 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6965 tex.sampler_index_mode = sampler_index_mode; 6966 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6967 tex.resource_index_mode = sampler_index_mode; 6968 6969 tex.src_gpr = (i == 1) ? temp_h : temp_v; 6970 tex.src_sel_x = 0; 6971 tex.src_sel_y = 1; 6972 tex.src_sel_z = 2; 6973 tex.src_sel_w = 3; 6974 6975 tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */ 6976 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 6977 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 6978 tex.coord_type_x = 1; 6979 tex.coord_type_y = 1; 6980 tex.coord_type_z = 1; 6981 tex.coord_type_w = 1; 6982 } 6983 r = r600_bytecode_add_tex(ctx->bc, &tex); 6984 if (r) 6985 return r; 6986 } 6987 } 6988 6989 if (src_requires_loading && !src_loaded) { 6990 for (i = 0; i < 4; i++) { 6991 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6992 alu.op = ALU_OP1_MOV; 6993 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6994 alu.dst.sel = ctx->temp_reg; 6995 alu.dst.chan = i; 6996 if (i == 3) 6997 alu.last = 1; 6998 alu.dst.write = 1; 6999 r = r600_bytecode_add_alu(ctx->bc, &alu); 7000 if (r) 7001 return r; 7002 } 7003 src_loaded = TRUE; 7004 src_gpr = ctx->temp_reg; 7005 } 7006 7007 /* get offset values */ 7008 if (inst->Texture.NumOffsets) { 7009 assert(inst->Texture.NumOffsets == 1); 7010 7011 /* The texture offset feature doesn't work with the TXF instruction 7012 * and must be emulated by adding the offset to the texture coordinates. */ 7013 if (txf_add_offsets) { 7014 const struct tgsi_texture_offset *off = inst->TexOffsets; 7015 7016 switch (inst->Texture.Texture) { 7017 case TGSI_TEXTURE_3D: 7018 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7019 alu.op = ALU_OP2_ADD_INT; 7020 alu.src[0].sel = src_gpr; 7021 alu.src[0].chan = 2; 7022 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7023 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ]; 7024 alu.dst.sel = src_gpr; 7025 alu.dst.chan = 2; 7026 alu.dst.write = 1; 7027 alu.last = 1; 7028 r = r600_bytecode_add_alu(ctx->bc, &alu); 7029 if (r) 7030 return r; 7031 /* fall through */ 7032 7033 case TGSI_TEXTURE_2D: 7034 case TGSI_TEXTURE_SHADOW2D: 7035 case TGSI_TEXTURE_RECT: 7036 case TGSI_TEXTURE_SHADOWRECT: 7037 case TGSI_TEXTURE_2D_ARRAY: 7038 case TGSI_TEXTURE_SHADOW2D_ARRAY: 7039 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7040 alu.op = ALU_OP2_ADD_INT; 7041 alu.src[0].sel = src_gpr; 7042 alu.src[0].chan = 1; 7043 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7044 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY]; 7045 alu.dst.sel = src_gpr; 7046 alu.dst.chan = 1; 7047 alu.dst.write = 1; 7048 alu.last = 1; 7049 r = r600_bytecode_add_alu(ctx->bc, &alu); 7050 if (r) 7051 return r; 7052 /* fall through */ 7053 7054 case TGSI_TEXTURE_1D: 7055 case TGSI_TEXTURE_SHADOW1D: 7056 case TGSI_TEXTURE_1D_ARRAY: 7057 case TGSI_TEXTURE_SHADOW1D_ARRAY: 7058 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7059 alu.op = ALU_OP2_ADD_INT; 7060 alu.src[0].sel = src_gpr; 7061 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7062 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX]; 7063 alu.dst.sel = src_gpr; 7064 alu.dst.write = 1; 7065 alu.last = 1; 7066 r = r600_bytecode_add_alu(ctx->bc, &alu); 7067 if (r) 7068 return r; 7069 break; 7070 /* texture offsets do not apply to other texture targets */ 7071 } 7072 } else { 7073 switch (inst->Texture.Texture) { 7074 case TGSI_TEXTURE_3D: 7075 offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 7076 /* fallthrough */ 7077 case TGSI_TEXTURE_2D: 7078 case TGSI_TEXTURE_SHADOW2D: 7079 case TGSI_TEXTURE_RECT: 7080 case TGSI_TEXTURE_SHADOWRECT: 7081 case TGSI_TEXTURE_2D_ARRAY: 7082 case TGSI_TEXTURE_SHADOW2D_ARRAY: 7083 offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 7084 /* fallthrough */ 7085 case TGSI_TEXTURE_1D: 7086 case TGSI_TEXTURE_SHADOW1D: 7087 case TGSI_TEXTURE_1D_ARRAY: 7088 case TGSI_TEXTURE_SHADOW1D_ARRAY: 7089 offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 7090 } 7091 } 7092 } 7093 7094 /* Obtain the sample index for reading a compressed MSAA color texture. 7095 * To read the FMASK, we use the ldfptr instruction, which tells us 7096 * where the samples are stored. 7097 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, 7098 * which is the identity mapping. Each nibble says which physical sample 7099 * should be fetched to get that sample. 7100 * 7101 * Assume src.z contains the sample index. It should be modified like this: 7102 * src.z = (ldfptr() >> (src.z * 4)) & 0xF; 7103 * Then fetch the texel with src. 7104 */ 7105 if (read_compressed_msaa) { 7106 unsigned sample_chan = 3; 7107 unsigned temp = r600_get_temp(ctx); 7108 assert(src_loaded); 7109 7110 /* temp.w = ldfptr() */ 7111 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7112 tex.op = FETCH_OP_LD; 7113 tex.inst_mod = 1; /* to indicate this is ldfptr */ 7114 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7115 tex.sampler_index_mode = sampler_index_mode; 7116 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7117 tex.resource_index_mode = sampler_index_mode; 7118 tex.src_gpr = src_gpr; 7119 tex.dst_gpr = temp; 7120 tex.dst_sel_x = 7; /* mask out these components */ 7121 tex.dst_sel_y = 7; 7122 tex.dst_sel_z = 7; 7123 tex.dst_sel_w = 0; /* store X */ 7124 tex.src_sel_x = 0; 7125 tex.src_sel_y = 1; 7126 tex.src_sel_z = 2; 7127 tex.src_sel_w = 3; 7128 tex.offset_x = offset_x; 7129 tex.offset_y = offset_y; 7130 tex.offset_z = offset_z; 7131 r = r600_bytecode_add_tex(ctx->bc, &tex); 7132 if (r) 7133 return r; 7134 7135 /* temp.x = sample_index*4 */ 7136 if (ctx->bc->chip_class == CAYMAN) { 7137 for (i = 0 ; i < 4; i++) { 7138 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7139 alu.op = ALU_OP2_MULLO_INT; 7140 alu.src[0].sel = src_gpr; 7141 alu.src[0].chan = sample_chan; 7142 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7143 alu.src[1].value = 4; 7144 alu.dst.sel = temp; 7145 alu.dst.chan = i; 7146 alu.dst.write = i == 0; 7147 if (i == 3) 7148 alu.last = 1; 7149 r = r600_bytecode_add_alu(ctx->bc, &alu); 7150 if (r) 7151 return r; 7152 } 7153 } else { 7154 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7155 alu.op = ALU_OP2_MULLO_INT; 7156 alu.src[0].sel = src_gpr; 7157 alu.src[0].chan = sample_chan; 7158 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7159 alu.src[1].value = 4; 7160 alu.dst.sel = temp; 7161 alu.dst.chan = 0; 7162 alu.dst.write = 1; 7163 alu.last = 1; 7164 r = r600_bytecode_add_alu(ctx->bc, &alu); 7165 if (r) 7166 return r; 7167 } 7168 7169 /* sample_index = temp.w >> temp.x */ 7170 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7171 alu.op = ALU_OP2_LSHR_INT; 7172 alu.src[0].sel = temp; 7173 alu.src[0].chan = 3; 7174 alu.src[1].sel = temp; 7175 alu.src[1].chan = 0; 7176 alu.dst.sel = src_gpr; 7177 alu.dst.chan = sample_chan; 7178 alu.dst.write = 1; 7179 alu.last = 1; 7180 r = r600_bytecode_add_alu(ctx->bc, &alu); 7181 if (r) 7182 return r; 7183 7184 /* sample_index & 0xF */ 7185 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7186 alu.op = ALU_OP2_AND_INT; 7187 alu.src[0].sel = src_gpr; 7188 alu.src[0].chan = sample_chan; 7189 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7190 alu.src[1].value = 0xF; 7191 alu.dst.sel = src_gpr; 7192 alu.dst.chan = sample_chan; 7193 alu.dst.write = 1; 7194 alu.last = 1; 7195 r = r600_bytecode_add_alu(ctx->bc, &alu); 7196 if (r) 7197 return r; 7198#if 0 7199 /* visualize the FMASK */ 7200 for (i = 0; i < 4; i++) { 7201 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7202 alu.op = ALU_OP1_INT_TO_FLT; 7203 alu.src[0].sel = src_gpr; 7204 alu.src[0].chan = sample_chan; 7205 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 7206 alu.dst.chan = i; 7207 alu.dst.write = 1; 7208 alu.last = 1; 7209 r = r600_bytecode_add_alu(ctx->bc, &alu); 7210 if (r) 7211 return r; 7212 } 7213 return 0; 7214#endif 7215 } 7216 7217 /* does this shader want a num layers from TXQ for a cube array? */ 7218 if (has_txq_cube_array_z) { 7219 int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7220 7221 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7222 alu.op = ALU_OP1_MOV; 7223 7224 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 7225 if (ctx->bc->chip_class >= EVERGREEN) { 7226 /* channel 1 or 3 of each word */ 7227 alu.src[0].sel += (id / 2); 7228 alu.src[0].chan = ((id % 2) * 2) + 1; 7229 } else { 7230 /* r600 we have them at channel 2 of the second dword */ 7231 alu.src[0].sel += (id * 2) + 1; 7232 alu.src[0].chan = 2; 7233 } 7234 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 7235 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 7236 alu.last = 1; 7237 r = r600_bytecode_add_alu(ctx->bc, &alu); 7238 if (r) 7239 return r; 7240 /* disable writemask from texture instruction */ 7241 inst->Dst[0].Register.WriteMask &= ~4; 7242 } 7243 7244 opcode = ctx->inst_info->op; 7245 if (opcode == FETCH_OP_GATHER4 && 7246 inst->TexOffsets[0].File != TGSI_FILE_NULL && 7247 inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) { 7248 opcode = FETCH_OP_GATHER4_O; 7249 7250 /* GATHER4_O/GATHER4_C_O use offset values loaded by 7251 SET_TEXTURE_OFFSETS instruction. The immediate offset values 7252 encoded in the instruction are ignored. */ 7253 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7254 tex.op = FETCH_OP_SET_TEXTURE_OFFSETS; 7255 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7256 tex.sampler_index_mode = sampler_index_mode; 7257 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7258 tex.resource_index_mode = sampler_index_mode; 7259 7260 tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index; 7261 tex.src_sel_x = inst->TexOffsets[0].SwizzleX; 7262 tex.src_sel_y = inst->TexOffsets[0].SwizzleY; 7263 tex.src_sel_z = inst->TexOffsets[0].SwizzleZ; 7264 tex.src_sel_w = 4; 7265 7266 tex.dst_sel_x = 7; 7267 tex.dst_sel_y = 7; 7268 tex.dst_sel_z = 7; 7269 tex.dst_sel_w = 7; 7270 7271 r = r600_bytecode_add_tex(ctx->bc, &tex); 7272 if (r) 7273 return r; 7274 } 7275 7276 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 7277 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 7278 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 7279 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7280 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 7281 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 7282 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7283 switch (opcode) { 7284 case FETCH_OP_SAMPLE: 7285 opcode = FETCH_OP_SAMPLE_C; 7286 break; 7287 case FETCH_OP_SAMPLE_L: 7288 opcode = FETCH_OP_SAMPLE_C_L; 7289 break; 7290 case FETCH_OP_SAMPLE_LB: 7291 opcode = FETCH_OP_SAMPLE_C_LB; 7292 break; 7293 case FETCH_OP_SAMPLE_G: 7294 opcode = FETCH_OP_SAMPLE_C_G; 7295 break; 7296 /* Texture gather variants */ 7297 case FETCH_OP_GATHER4: 7298 opcode = FETCH_OP_GATHER4_C; 7299 break; 7300 case FETCH_OP_GATHER4_O: 7301 opcode = FETCH_OP_GATHER4_C_O; 7302 break; 7303 } 7304 } 7305 7306 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7307 tex.op = opcode; 7308 7309 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7310 tex.sampler_index_mode = sampler_index_mode; 7311 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7312 tex.resource_index_mode = sampler_index_mode; 7313 tex.src_gpr = src_gpr; 7314 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 7315 7316 if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE || 7317 inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) { 7318 tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */ 7319 } 7320 7321 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) { 7322 int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]; 7323 tex.inst_mod = texture_component_select; 7324 7325 if (ctx->bc->chip_class == CAYMAN) { 7326 /* GATHER4 result order is different from TGSI TG4 */ 7327 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 0 : 7; 7328 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 1 : 7; 7329 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 2 : 7; 7330 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7331 } else { 7332 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7333 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 7334 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7335 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7336 } 7337 } 7338 else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) { 7339 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7340 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7341 tex.dst_sel_z = 7; 7342 tex.dst_sel_w = 7; 7343 } 7344 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 7345 tex.dst_sel_x = 3; 7346 tex.dst_sel_y = 7; 7347 tex.dst_sel_z = 7; 7348 tex.dst_sel_w = 7; 7349 } 7350 else { 7351 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7352 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7353 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 7354 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7355 } 7356 7357 7358 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ || 7359 inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 7360 tex.src_sel_x = 4; 7361 tex.src_sel_y = 4; 7362 tex.src_sel_z = 4; 7363 tex.src_sel_w = 4; 7364 } else if (src_loaded) { 7365 tex.src_sel_x = 0; 7366 tex.src_sel_y = 1; 7367 tex.src_sel_z = 2; 7368 tex.src_sel_w = 3; 7369 } else { 7370 tex.src_sel_x = ctx->src[0].swizzle[0]; 7371 tex.src_sel_y = ctx->src[0].swizzle[1]; 7372 tex.src_sel_z = ctx->src[0].swizzle[2]; 7373 tex.src_sel_w = ctx->src[0].swizzle[3]; 7374 tex.src_rel = ctx->src[0].rel; 7375 } 7376 7377 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || 7378 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7379 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7380 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7381 tex.src_sel_x = 1; 7382 tex.src_sel_y = 0; 7383 tex.src_sel_z = 3; 7384 tex.src_sel_w = 2; /* route Z compare or Lod value into W */ 7385 } 7386 7387 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 7388 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 7389 tex.coord_type_x = 1; 7390 tex.coord_type_y = 1; 7391 } 7392 tex.coord_type_z = 1; 7393 tex.coord_type_w = 1; 7394 7395 tex.offset_x = offset_x; 7396 tex.offset_y = offset_y; 7397 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && 7398 (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 7399 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) { 7400 tex.offset_z = 0; 7401 } 7402 else { 7403 tex.offset_z = offset_z; 7404 } 7405 7406 /* Put the depth for comparison in W. 7407 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 7408 * Some instructions expect the depth in Z. */ 7409 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 7410 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 7411 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 7412 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 7413 opcode != FETCH_OP_SAMPLE_C_L && 7414 opcode != FETCH_OP_SAMPLE_C_LB) { 7415 tex.src_sel_w = tex.src_sel_z; 7416 } 7417 7418 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 7419 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 7420 if (opcode == FETCH_OP_SAMPLE_C_L || 7421 opcode == FETCH_OP_SAMPLE_C_LB) { 7422 /* the array index is read from Y */ 7423 tex.coord_type_y = 0; 7424 } else { 7425 /* the array index is read from Z */ 7426 tex.coord_type_z = 0; 7427 tex.src_sel_z = tex.src_sel_y; 7428 } 7429 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 7430 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 7431 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7432 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 7433 (ctx->bc->chip_class >= EVERGREEN))) 7434 /* the array index is read from Z */ 7435 tex.coord_type_z = 0; 7436 7437 /* mask unused source components */ 7438 if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { 7439 switch (inst->Texture.Texture) { 7440 case TGSI_TEXTURE_2D: 7441 case TGSI_TEXTURE_RECT: 7442 tex.src_sel_z = 7; 7443 tex.src_sel_w = 7; 7444 break; 7445 case TGSI_TEXTURE_1D_ARRAY: 7446 tex.src_sel_y = 7; 7447 tex.src_sel_w = 7; 7448 break; 7449 case TGSI_TEXTURE_1D: 7450 tex.src_sel_y = 7; 7451 tex.src_sel_z = 7; 7452 tex.src_sel_w = 7; 7453 break; 7454 } 7455 } 7456 7457 r = r600_bytecode_add_tex(ctx->bc, &tex); 7458 if (r) 7459 return r; 7460 7461 /* add shadow ambient support - gallium doesn't do it yet */ 7462 return 0; 7463} 7464 7465static int tgsi_lrp(struct r600_shader_ctx *ctx) 7466{ 7467 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7468 struct r600_bytecode_alu alu; 7469 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7470 unsigned i, temp_regs[2]; 7471 int r; 7472 7473 /* optimize if it's just an equal balance */ 7474 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 7475 for (i = 0; i < lasti + 1; i++) { 7476 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7477 continue; 7478 7479 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7480 alu.op = ALU_OP2_ADD; 7481 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 7482 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7483 alu.omod = 3; 7484 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7485 alu.dst.chan = i; 7486 if (i == lasti) { 7487 alu.last = 1; 7488 } 7489 r = r600_bytecode_add_alu(ctx->bc, &alu); 7490 if (r) 7491 return r; 7492 } 7493 return 0; 7494 } 7495 7496 /* 1 - src0 */ 7497 for (i = 0; i < lasti + 1; i++) { 7498 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7499 continue; 7500 7501 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7502 alu.op = ALU_OP2_ADD; 7503 alu.src[0].sel = V_SQ_ALU_SRC_1; 7504 alu.src[0].chan = 0; 7505 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 7506 r600_bytecode_src_toggle_neg(&alu.src[1]); 7507 alu.dst.sel = ctx->temp_reg; 7508 alu.dst.chan = i; 7509 if (i == lasti) { 7510 alu.last = 1; 7511 } 7512 alu.dst.write = 1; 7513 r = r600_bytecode_add_alu(ctx->bc, &alu); 7514 if (r) 7515 return r; 7516 } 7517 7518 /* (1 - src0) * src2 */ 7519 for (i = 0; i < lasti + 1; i++) { 7520 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7521 continue; 7522 7523 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7524 alu.op = ALU_OP2_MUL; 7525 alu.src[0].sel = ctx->temp_reg; 7526 alu.src[0].chan = i; 7527 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7528 alu.dst.sel = ctx->temp_reg; 7529 alu.dst.chan = i; 7530 if (i == lasti) { 7531 alu.last = 1; 7532 } 7533 alu.dst.write = 1; 7534 r = r600_bytecode_add_alu(ctx->bc, &alu); 7535 if (r) 7536 return r; 7537 } 7538 7539 /* src0 * src1 + (1 - src0) * src2 */ 7540 if (ctx->src[0].abs) 7541 temp_regs[0] = r600_get_temp(ctx); 7542 else 7543 temp_regs[0] = 0; 7544 if (ctx->src[1].abs) 7545 temp_regs[1] = r600_get_temp(ctx); 7546 else 7547 temp_regs[1] = 0; 7548 7549 for (i = 0; i < lasti + 1; i++) { 7550 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7551 continue; 7552 7553 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7554 alu.op = ALU_OP3_MULADD; 7555 alu.is_op3 = 1; 7556 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 7557 if (r) 7558 return r; 7559 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[1]); 7560 if (r) 7561 return r; 7562 alu.src[2].sel = ctx->temp_reg; 7563 alu.src[2].chan = i; 7564 7565 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7566 alu.dst.chan = i; 7567 if (i == lasti) { 7568 alu.last = 1; 7569 } 7570 r = r600_bytecode_add_alu(ctx->bc, &alu); 7571 if (r) 7572 return r; 7573 } 7574 return 0; 7575} 7576 7577static int tgsi_cmp(struct r600_shader_ctx *ctx) 7578{ 7579 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7580 struct r600_bytecode_alu alu; 7581 int i, r, j; 7582 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7583 int temp_regs[3]; 7584 7585 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 7586 temp_regs[j] = 0; 7587 if (ctx->src[j].abs) 7588 temp_regs[j] = r600_get_temp(ctx); 7589 } 7590 7591 for (i = 0; i < lasti + 1; i++) { 7592 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7593 continue; 7594 7595 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7596 alu.op = ALU_OP3_CNDGE; 7597 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 7598 if (r) 7599 return r; 7600 r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]); 7601 if (r) 7602 return r; 7603 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]); 7604 if (r) 7605 return r; 7606 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7607 alu.dst.chan = i; 7608 alu.dst.write = 1; 7609 alu.is_op3 = 1; 7610 if (i == lasti) 7611 alu.last = 1; 7612 r = r600_bytecode_add_alu(ctx->bc, &alu); 7613 if (r) 7614 return r; 7615 } 7616 return 0; 7617} 7618 7619static int tgsi_ucmp(struct r600_shader_ctx *ctx) 7620{ 7621 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7622 struct r600_bytecode_alu alu; 7623 int i, r; 7624 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7625 7626 for (i = 0; i < lasti + 1; i++) { 7627 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7628 continue; 7629 7630 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7631 alu.op = ALU_OP3_CNDE_INT; 7632 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7633 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7634 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 7635 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7636 alu.dst.chan = i; 7637 alu.dst.write = 1; 7638 alu.is_op3 = 1; 7639 if (i == lasti) 7640 alu.last = 1; 7641 r = r600_bytecode_add_alu(ctx->bc, &alu); 7642 if (r) 7643 return r; 7644 } 7645 return 0; 7646} 7647 7648static int tgsi_xpd(struct r600_shader_ctx *ctx) 7649{ 7650 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7651 static const unsigned int src0_swizzle[] = {2, 0, 1}; 7652 static const unsigned int src1_swizzle[] = {1, 2, 0}; 7653 struct r600_bytecode_alu alu; 7654 uint32_t use_temp = 0; 7655 int i, r; 7656 7657 if (inst->Dst[0].Register.WriteMask != 0xf) 7658 use_temp = 1; 7659 7660 for (i = 0; i < 4; i++) { 7661 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7662 alu.op = ALU_OP2_MUL; 7663 if (i < 3) { 7664 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 7665 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 7666 } else { 7667 alu.src[0].sel = V_SQ_ALU_SRC_0; 7668 alu.src[0].chan = i; 7669 alu.src[1].sel = V_SQ_ALU_SRC_0; 7670 alu.src[1].chan = i; 7671 } 7672 7673 alu.dst.sel = ctx->temp_reg; 7674 alu.dst.chan = i; 7675 alu.dst.write = 1; 7676 7677 if (i == 3) 7678 alu.last = 1; 7679 r = r600_bytecode_add_alu(ctx->bc, &alu); 7680 if (r) 7681 return r; 7682 } 7683 7684 for (i = 0; i < 4; i++) { 7685 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7686 alu.op = ALU_OP3_MULADD; 7687 7688 if (i < 3) { 7689 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 7690 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 7691 } else { 7692 alu.src[0].sel = V_SQ_ALU_SRC_0; 7693 alu.src[0].chan = i; 7694 alu.src[1].sel = V_SQ_ALU_SRC_0; 7695 alu.src[1].chan = i; 7696 } 7697 7698 alu.src[2].sel = ctx->temp_reg; 7699 alu.src[2].neg = 1; 7700 alu.src[2].chan = i; 7701 7702 if (use_temp) 7703 alu.dst.sel = ctx->temp_reg; 7704 else 7705 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7706 alu.dst.chan = i; 7707 alu.dst.write = 1; 7708 alu.is_op3 = 1; 7709 if (i == 3) 7710 alu.last = 1; 7711 r = r600_bytecode_add_alu(ctx->bc, &alu); 7712 if (r) 7713 return r; 7714 } 7715 if (use_temp) 7716 return tgsi_helper_copy(ctx, inst); 7717 return 0; 7718} 7719 7720static int tgsi_exp(struct r600_shader_ctx *ctx) 7721{ 7722 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7723 struct r600_bytecode_alu alu; 7724 int r; 7725 int i; 7726 7727 /* result.x = 2^floor(src); */ 7728 if (inst->Dst[0].Register.WriteMask & 1) { 7729 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7730 7731 alu.op = ALU_OP1_FLOOR; 7732 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7733 7734 alu.dst.sel = ctx->temp_reg; 7735 alu.dst.chan = 0; 7736 alu.dst.write = 1; 7737 alu.last = 1; 7738 r = r600_bytecode_add_alu(ctx->bc, &alu); 7739 if (r) 7740 return r; 7741 7742 if (ctx->bc->chip_class == CAYMAN) { 7743 for (i = 0; i < 3; i++) { 7744 alu.op = ALU_OP1_EXP_IEEE; 7745 alu.src[0].sel = ctx->temp_reg; 7746 alu.src[0].chan = 0; 7747 7748 alu.dst.sel = ctx->temp_reg; 7749 alu.dst.chan = i; 7750 alu.dst.write = i == 0; 7751 alu.last = i == 2; 7752 r = r600_bytecode_add_alu(ctx->bc, &alu); 7753 if (r) 7754 return r; 7755 } 7756 } else { 7757 alu.op = ALU_OP1_EXP_IEEE; 7758 alu.src[0].sel = ctx->temp_reg; 7759 alu.src[0].chan = 0; 7760 7761 alu.dst.sel = ctx->temp_reg; 7762 alu.dst.chan = 0; 7763 alu.dst.write = 1; 7764 alu.last = 1; 7765 r = r600_bytecode_add_alu(ctx->bc, &alu); 7766 if (r) 7767 return r; 7768 } 7769 } 7770 7771 /* result.y = tmp - floor(tmp); */ 7772 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 7773 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7774 7775 alu.op = ALU_OP1_FRACT; 7776 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7777 7778 alu.dst.sel = ctx->temp_reg; 7779#if 0 7780 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7781 if (r) 7782 return r; 7783#endif 7784 alu.dst.write = 1; 7785 alu.dst.chan = 1; 7786 7787 alu.last = 1; 7788 7789 r = r600_bytecode_add_alu(ctx->bc, &alu); 7790 if (r) 7791 return r; 7792 } 7793 7794 /* result.z = RoughApprox2ToX(tmp);*/ 7795 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 7796 if (ctx->bc->chip_class == CAYMAN) { 7797 for (i = 0; i < 3; i++) { 7798 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7799 alu.op = ALU_OP1_EXP_IEEE; 7800 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7801 7802 alu.dst.sel = ctx->temp_reg; 7803 alu.dst.chan = i; 7804 if (i == 2) { 7805 alu.dst.write = 1; 7806 alu.last = 1; 7807 } 7808 7809 r = r600_bytecode_add_alu(ctx->bc, &alu); 7810 if (r) 7811 return r; 7812 } 7813 } else { 7814 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7815 alu.op = ALU_OP1_EXP_IEEE; 7816 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7817 7818 alu.dst.sel = ctx->temp_reg; 7819 alu.dst.write = 1; 7820 alu.dst.chan = 2; 7821 7822 alu.last = 1; 7823 7824 r = r600_bytecode_add_alu(ctx->bc, &alu); 7825 if (r) 7826 return r; 7827 } 7828 } 7829 7830 /* result.w = 1.0;*/ 7831 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 7832 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7833 7834 alu.op = ALU_OP1_MOV; 7835 alu.src[0].sel = V_SQ_ALU_SRC_1; 7836 alu.src[0].chan = 0; 7837 7838 alu.dst.sel = ctx->temp_reg; 7839 alu.dst.chan = 3; 7840 alu.dst.write = 1; 7841 alu.last = 1; 7842 r = r600_bytecode_add_alu(ctx->bc, &alu); 7843 if (r) 7844 return r; 7845 } 7846 return tgsi_helper_copy(ctx, inst); 7847} 7848 7849static int tgsi_log(struct r600_shader_ctx *ctx) 7850{ 7851 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7852 struct r600_bytecode_alu alu; 7853 int r; 7854 int i; 7855 7856 /* result.x = floor(log2(|src|)); */ 7857 if (inst->Dst[0].Register.WriteMask & 1) { 7858 if (ctx->bc->chip_class == CAYMAN) { 7859 for (i = 0; i < 3; i++) { 7860 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7861 7862 alu.op = ALU_OP1_LOG_IEEE; 7863 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7864 r600_bytecode_src_set_abs(&alu.src[0]); 7865 7866 alu.dst.sel = ctx->temp_reg; 7867 alu.dst.chan = i; 7868 if (i == 0) 7869 alu.dst.write = 1; 7870 if (i == 2) 7871 alu.last = 1; 7872 r = r600_bytecode_add_alu(ctx->bc, &alu); 7873 if (r) 7874 return r; 7875 } 7876 7877 } else { 7878 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7879 7880 alu.op = ALU_OP1_LOG_IEEE; 7881 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7882 r600_bytecode_src_set_abs(&alu.src[0]); 7883 7884 alu.dst.sel = ctx->temp_reg; 7885 alu.dst.chan = 0; 7886 alu.dst.write = 1; 7887 alu.last = 1; 7888 r = r600_bytecode_add_alu(ctx->bc, &alu); 7889 if (r) 7890 return r; 7891 } 7892 7893 alu.op = ALU_OP1_FLOOR; 7894 alu.src[0].sel = ctx->temp_reg; 7895 alu.src[0].chan = 0; 7896 7897 alu.dst.sel = ctx->temp_reg; 7898 alu.dst.chan = 0; 7899 alu.dst.write = 1; 7900 alu.last = 1; 7901 7902 r = r600_bytecode_add_alu(ctx->bc, &alu); 7903 if (r) 7904 return r; 7905 } 7906 7907 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 7908 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 7909 7910 if (ctx->bc->chip_class == CAYMAN) { 7911 for (i = 0; i < 3; i++) { 7912 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7913 7914 alu.op = ALU_OP1_LOG_IEEE; 7915 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7916 r600_bytecode_src_set_abs(&alu.src[0]); 7917 7918 alu.dst.sel = ctx->temp_reg; 7919 alu.dst.chan = i; 7920 if (i == 1) 7921 alu.dst.write = 1; 7922 if (i == 2) 7923 alu.last = 1; 7924 7925 r = r600_bytecode_add_alu(ctx->bc, &alu); 7926 if (r) 7927 return r; 7928 } 7929 } else { 7930 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7931 7932 alu.op = ALU_OP1_LOG_IEEE; 7933 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7934 r600_bytecode_src_set_abs(&alu.src[0]); 7935 7936 alu.dst.sel = ctx->temp_reg; 7937 alu.dst.chan = 1; 7938 alu.dst.write = 1; 7939 alu.last = 1; 7940 7941 r = r600_bytecode_add_alu(ctx->bc, &alu); 7942 if (r) 7943 return r; 7944 } 7945 7946 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7947 7948 alu.op = ALU_OP1_FLOOR; 7949 alu.src[0].sel = ctx->temp_reg; 7950 alu.src[0].chan = 1; 7951 7952 alu.dst.sel = ctx->temp_reg; 7953 alu.dst.chan = 1; 7954 alu.dst.write = 1; 7955 alu.last = 1; 7956 7957 r = r600_bytecode_add_alu(ctx->bc, &alu); 7958 if (r) 7959 return r; 7960 7961 if (ctx->bc->chip_class == CAYMAN) { 7962 for (i = 0; i < 3; i++) { 7963 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7964 alu.op = ALU_OP1_EXP_IEEE; 7965 alu.src[0].sel = ctx->temp_reg; 7966 alu.src[0].chan = 1; 7967 7968 alu.dst.sel = ctx->temp_reg; 7969 alu.dst.chan = i; 7970 if (i == 1) 7971 alu.dst.write = 1; 7972 if (i == 2) 7973 alu.last = 1; 7974 7975 r = r600_bytecode_add_alu(ctx->bc, &alu); 7976 if (r) 7977 return r; 7978 } 7979 } else { 7980 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7981 alu.op = ALU_OP1_EXP_IEEE; 7982 alu.src[0].sel = ctx->temp_reg; 7983 alu.src[0].chan = 1; 7984 7985 alu.dst.sel = ctx->temp_reg; 7986 alu.dst.chan = 1; 7987 alu.dst.write = 1; 7988 alu.last = 1; 7989 7990 r = r600_bytecode_add_alu(ctx->bc, &alu); 7991 if (r) 7992 return r; 7993 } 7994 7995 if (ctx->bc->chip_class == CAYMAN) { 7996 for (i = 0; i < 3; i++) { 7997 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7998 alu.op = ALU_OP1_RECIP_IEEE; 7999 alu.src[0].sel = ctx->temp_reg; 8000 alu.src[0].chan = 1; 8001 8002 alu.dst.sel = ctx->temp_reg; 8003 alu.dst.chan = i; 8004 if (i == 1) 8005 alu.dst.write = 1; 8006 if (i == 2) 8007 alu.last = 1; 8008 8009 r = r600_bytecode_add_alu(ctx->bc, &alu); 8010 if (r) 8011 return r; 8012 } 8013 } else { 8014 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8015 alu.op = ALU_OP1_RECIP_IEEE; 8016 alu.src[0].sel = ctx->temp_reg; 8017 alu.src[0].chan = 1; 8018 8019 alu.dst.sel = ctx->temp_reg; 8020 alu.dst.chan = 1; 8021 alu.dst.write = 1; 8022 alu.last = 1; 8023 8024 r = r600_bytecode_add_alu(ctx->bc, &alu); 8025 if (r) 8026 return r; 8027 } 8028 8029 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8030 8031 alu.op = ALU_OP2_MUL; 8032 8033 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8034 r600_bytecode_src_set_abs(&alu.src[0]); 8035 8036 alu.src[1].sel = ctx->temp_reg; 8037 alu.src[1].chan = 1; 8038 8039 alu.dst.sel = ctx->temp_reg; 8040 alu.dst.chan = 1; 8041 alu.dst.write = 1; 8042 alu.last = 1; 8043 8044 r = r600_bytecode_add_alu(ctx->bc, &alu); 8045 if (r) 8046 return r; 8047 } 8048 8049 /* result.z = log2(|src|);*/ 8050 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 8051 if (ctx->bc->chip_class == CAYMAN) { 8052 for (i = 0; i < 3; i++) { 8053 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8054 8055 alu.op = ALU_OP1_LOG_IEEE; 8056 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8057 r600_bytecode_src_set_abs(&alu.src[0]); 8058 8059 alu.dst.sel = ctx->temp_reg; 8060 if (i == 2) 8061 alu.dst.write = 1; 8062 alu.dst.chan = i; 8063 if (i == 2) 8064 alu.last = 1; 8065 8066 r = r600_bytecode_add_alu(ctx->bc, &alu); 8067 if (r) 8068 return r; 8069 } 8070 } else { 8071 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8072 8073 alu.op = ALU_OP1_LOG_IEEE; 8074 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8075 r600_bytecode_src_set_abs(&alu.src[0]); 8076 8077 alu.dst.sel = ctx->temp_reg; 8078 alu.dst.write = 1; 8079 alu.dst.chan = 2; 8080 alu.last = 1; 8081 8082 r = r600_bytecode_add_alu(ctx->bc, &alu); 8083 if (r) 8084 return r; 8085 } 8086 } 8087 8088 /* result.w = 1.0; */ 8089 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 8090 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8091 8092 alu.op = ALU_OP1_MOV; 8093 alu.src[0].sel = V_SQ_ALU_SRC_1; 8094 alu.src[0].chan = 0; 8095 8096 alu.dst.sel = ctx->temp_reg; 8097 alu.dst.chan = 3; 8098 alu.dst.write = 1; 8099 alu.last = 1; 8100 8101 r = r600_bytecode_add_alu(ctx->bc, &alu); 8102 if (r) 8103 return r; 8104 } 8105 8106 return tgsi_helper_copy(ctx, inst); 8107} 8108 8109static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 8110{ 8111 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8112 struct r600_bytecode_alu alu; 8113 int r; 8114 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8115 unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index); 8116 8117 assert(inst->Dst[0].Register.Index < 3); 8118 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8119 8120 switch (inst->Instruction.Opcode) { 8121 case TGSI_OPCODE_ARL: 8122 alu.op = ALU_OP1_FLT_TO_INT_FLOOR; 8123 break; 8124 case TGSI_OPCODE_ARR: 8125 alu.op = ALU_OP1_FLT_TO_INT; 8126 break; 8127 case TGSI_OPCODE_UARL: 8128 alu.op = ALU_OP1_MOV; 8129 break; 8130 default: 8131 assert(0); 8132 return -1; 8133 } 8134 8135 for (i = 0; i <= lasti; ++i) { 8136 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8137 continue; 8138 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8139 alu.last = i == lasti; 8140 alu.dst.sel = reg; 8141 alu.dst.chan = i; 8142 alu.dst.write = 1; 8143 r = r600_bytecode_add_alu(ctx->bc, &alu); 8144 if (r) 8145 return r; 8146 } 8147 8148 if (inst->Dst[0].Register.Index > 0) 8149 ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0; 8150 else 8151 ctx->bc->ar_loaded = 0; 8152 8153 return 0; 8154} 8155static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 8156{ 8157 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8158 struct r600_bytecode_alu alu; 8159 int r; 8160 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8161 8162 switch (inst->Instruction.Opcode) { 8163 case TGSI_OPCODE_ARL: 8164 memset(&alu, 0, sizeof(alu)); 8165 alu.op = ALU_OP1_FLOOR; 8166 alu.dst.sel = ctx->bc->ar_reg; 8167 alu.dst.write = 1; 8168 for (i = 0; i <= lasti; ++i) { 8169 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8170 alu.dst.chan = i; 8171 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8172 alu.last = i == lasti; 8173 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8174 return r; 8175 } 8176 } 8177 8178 memset(&alu, 0, sizeof(alu)); 8179 alu.op = ALU_OP1_FLT_TO_INT; 8180 alu.src[0].sel = ctx->bc->ar_reg; 8181 alu.dst.sel = ctx->bc->ar_reg; 8182 alu.dst.write = 1; 8183 /* FLT_TO_INT is trans-only on r600/r700 */ 8184 alu.last = TRUE; 8185 for (i = 0; i <= lasti; ++i) { 8186 alu.dst.chan = i; 8187 alu.src[0].chan = i; 8188 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8189 return r; 8190 } 8191 break; 8192 case TGSI_OPCODE_ARR: 8193 memset(&alu, 0, sizeof(alu)); 8194 alu.op = ALU_OP1_FLT_TO_INT; 8195 alu.dst.sel = ctx->bc->ar_reg; 8196 alu.dst.write = 1; 8197 /* FLT_TO_INT is trans-only on r600/r700 */ 8198 alu.last = TRUE; 8199 for (i = 0; i <= lasti; ++i) { 8200 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8201 alu.dst.chan = i; 8202 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8203 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8204 return r; 8205 } 8206 } 8207 break; 8208 case TGSI_OPCODE_UARL: 8209 memset(&alu, 0, sizeof(alu)); 8210 alu.op = ALU_OP1_MOV; 8211 alu.dst.sel = ctx->bc->ar_reg; 8212 alu.dst.write = 1; 8213 for (i = 0; i <= lasti; ++i) { 8214 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8215 alu.dst.chan = i; 8216 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8217 alu.last = i == lasti; 8218 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8219 return r; 8220 } 8221 } 8222 break; 8223 default: 8224 assert(0); 8225 return -1; 8226 } 8227 8228 ctx->bc->ar_loaded = 0; 8229 return 0; 8230} 8231 8232static int tgsi_opdst(struct r600_shader_ctx *ctx) 8233{ 8234 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8235 struct r600_bytecode_alu alu; 8236 int i, r = 0; 8237 8238 for (i = 0; i < 4; i++) { 8239 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8240 8241 alu.op = ALU_OP2_MUL; 8242 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 8243 8244 if (i == 0 || i == 3) { 8245 alu.src[0].sel = V_SQ_ALU_SRC_1; 8246 } else { 8247 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8248 } 8249 8250 if (i == 0 || i == 2) { 8251 alu.src[1].sel = V_SQ_ALU_SRC_1; 8252 } else { 8253 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 8254 } 8255 if (i == 3) 8256 alu.last = 1; 8257 r = r600_bytecode_add_alu(ctx->bc, &alu); 8258 if (r) 8259 return r; 8260 } 8261 return 0; 8262} 8263 8264static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type) 8265{ 8266 struct r600_bytecode_alu alu; 8267 int r; 8268 8269 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8270 alu.op = opcode; 8271 alu.execute_mask = 1; 8272 alu.update_pred = 1; 8273 8274 alu.dst.sel = ctx->temp_reg; 8275 alu.dst.write = 1; 8276 alu.dst.chan = 0; 8277 8278 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8279 alu.src[1].sel = V_SQ_ALU_SRC_0; 8280 alu.src[1].chan = 0; 8281 8282 alu.last = 1; 8283 8284 r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); 8285 if (r) 8286 return r; 8287 return 0; 8288} 8289 8290static int pops(struct r600_shader_ctx *ctx, int pops) 8291{ 8292 unsigned force_pop = ctx->bc->force_add_cf; 8293 8294 if (!force_pop) { 8295 int alu_pop = 3; 8296 if (ctx->bc->cf_last) { 8297 if (ctx->bc->cf_last->op == CF_OP_ALU) 8298 alu_pop = 0; 8299 else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER) 8300 alu_pop = 1; 8301 } 8302 alu_pop += pops; 8303 if (alu_pop == 1) { 8304 ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER; 8305 ctx->bc->force_add_cf = 1; 8306 } else if (alu_pop == 2) { 8307 ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER; 8308 ctx->bc->force_add_cf = 1; 8309 } else { 8310 force_pop = 1; 8311 } 8312 } 8313 8314 if (force_pop) { 8315 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 8316 ctx->bc->cf_last->pop_count = pops; 8317 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 8318 } 8319 8320 return 0; 8321} 8322 8323static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, 8324 unsigned reason) 8325{ 8326 struct r600_stack_info *stack = &ctx->bc->stack; 8327 unsigned elements, entries; 8328 8329 unsigned entry_size = stack->entry_size; 8330 8331 elements = (stack->loop + stack->push_wqm ) * entry_size; 8332 elements += stack->push; 8333 8334 switch (ctx->bc->chip_class) { 8335 case R600: 8336 case R700: 8337 /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on 8338 * the stack must be reserved to hold the current active/continue 8339 * masks */ 8340 if (reason == FC_PUSH_VPM) { 8341 elements += 2; 8342 } 8343 break; 8344 8345 case CAYMAN: 8346 /* r9xx: any stack operation on empty stack consumes 2 additional 8347 * elements */ 8348 elements += 2; 8349 8350 /* fallthrough */ 8351 /* FIXME: do the two elements added above cover the cases for the 8352 * r8xx+ below? */ 8353 8354 case EVERGREEN: 8355 /* r8xx+: 2 extra elements are not always required, but one extra 8356 * element must be added for each of the following cases: 8357 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest 8358 * stack usage. 8359 * (Currently we don't use ALU_ELSE_AFTER.) 8360 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM 8361 * PUSH instruction executed. 8362 * 8363 * NOTE: it seems we also need to reserve additional element in some 8364 * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, 8365 * then STACK_SIZE should be 2 instead of 1 */ 8366 if (reason == FC_PUSH_VPM) { 8367 elements += 1; 8368 } 8369 break; 8370 8371 default: 8372 assert(0); 8373 break; 8374 } 8375 8376 /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4 8377 * for all chips, so we use 4 in the final formula, not the real entry_size 8378 * for the chip */ 8379 entry_size = 4; 8380 8381 entries = (elements + (entry_size - 1)) / entry_size; 8382 8383 if (entries > stack->max_entries) 8384 stack->max_entries = entries; 8385} 8386 8387static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) 8388{ 8389 switch(reason) { 8390 case FC_PUSH_VPM: 8391 --ctx->bc->stack.push; 8392 assert(ctx->bc->stack.push >= 0); 8393 break; 8394 case FC_PUSH_WQM: 8395 --ctx->bc->stack.push_wqm; 8396 assert(ctx->bc->stack.push_wqm >= 0); 8397 break; 8398 case FC_LOOP: 8399 --ctx->bc->stack.loop; 8400 assert(ctx->bc->stack.loop >= 0); 8401 break; 8402 default: 8403 assert(0); 8404 break; 8405 } 8406} 8407 8408static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) 8409{ 8410 switch (reason) { 8411 case FC_PUSH_VPM: 8412 ++ctx->bc->stack.push; 8413 break; 8414 case FC_PUSH_WQM: 8415 ++ctx->bc->stack.push_wqm; 8416 case FC_LOOP: 8417 ++ctx->bc->stack.loop; 8418 break; 8419 default: 8420 assert(0); 8421 } 8422 8423 callstack_update_max_depth(ctx, reason); 8424} 8425 8426static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 8427{ 8428 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 8429 8430 sp->mid = realloc((void *)sp->mid, 8431 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 8432 sp->mid[sp->num_mid] = ctx->bc->cf_last; 8433 sp->num_mid++; 8434} 8435 8436static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 8437{ 8438 ctx->bc->fc_sp++; 8439 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 8440 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 8441} 8442 8443static void fc_poplevel(struct r600_shader_ctx *ctx) 8444{ 8445 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 8446 free(sp->mid); 8447 sp->mid = NULL; 8448 sp->num_mid = 0; 8449 sp->start = NULL; 8450 sp->type = 0; 8451 ctx->bc->fc_sp--; 8452} 8453 8454#if 0 8455static int emit_return(struct r600_shader_ctx *ctx) 8456{ 8457 r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN)); 8458 return 0; 8459} 8460 8461static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 8462{ 8463 8464 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP)); 8465 ctx->bc->cf_last->pop_count = pops; 8466 /* XXX work out offset */ 8467 return 0; 8468} 8469 8470static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 8471{ 8472 return 0; 8473} 8474 8475static void emit_testflag(struct r600_shader_ctx *ctx) 8476{ 8477 8478} 8479 8480static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 8481{ 8482 emit_testflag(ctx); 8483 emit_jump_to_offset(ctx, 1, 4); 8484 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 8485 pops(ctx, ifidx + 1); 8486 emit_return(ctx); 8487} 8488 8489static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 8490{ 8491 emit_testflag(ctx); 8492 8493 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8494 ctx->bc->cf_last->pop_count = 1; 8495 8496 fc_set_mid(ctx, fc_sp); 8497 8498 pops(ctx, 1); 8499} 8500#endif 8501 8502static int emit_if(struct r600_shader_ctx *ctx, int opcode) 8503{ 8504 int alu_type = CF_OP_ALU_PUSH_BEFORE; 8505 8506 /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by 8507 * LOOP_STARTxxx for nested loops may put the branch stack into a state 8508 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this 8509 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ 8510 if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) { 8511 r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); 8512 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 8513 alu_type = CF_OP_ALU; 8514 } 8515 8516 emit_logic_pred(ctx, opcode, alu_type); 8517 8518 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 8519 8520 fc_pushlevel(ctx, FC_IF); 8521 8522 callstack_push(ctx, FC_PUSH_VPM); 8523 return 0; 8524} 8525 8526static int tgsi_if(struct r600_shader_ctx *ctx) 8527{ 8528 return emit_if(ctx, ALU_OP2_PRED_SETNE); 8529} 8530 8531static int tgsi_uif(struct r600_shader_ctx *ctx) 8532{ 8533 return emit_if(ctx, ALU_OP2_PRED_SETNE_INT); 8534} 8535 8536static int tgsi_else(struct r600_shader_ctx *ctx) 8537{ 8538 r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); 8539 ctx->bc->cf_last->pop_count = 1; 8540 8541 fc_set_mid(ctx, ctx->bc->fc_sp); 8542 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 8543 return 0; 8544} 8545 8546static int tgsi_endif(struct r600_shader_ctx *ctx) 8547{ 8548 pops(ctx, 1); 8549 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 8550 R600_ERR("if/endif unbalanced in shader\n"); 8551 return -1; 8552 } 8553 8554 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 8555 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 8556 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 8557 } else { 8558 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 8559 } 8560 fc_poplevel(ctx); 8561 8562 callstack_pop(ctx, FC_PUSH_VPM); 8563 return 0; 8564} 8565 8566static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 8567{ 8568 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not 8569 * limited to 4096 iterations, like the other LOOP_* instructions. */ 8570 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10); 8571 8572 fc_pushlevel(ctx, FC_LOOP); 8573 8574 /* check stack depth */ 8575 callstack_push(ctx, FC_LOOP); 8576 return 0; 8577} 8578 8579static int tgsi_endloop(struct r600_shader_ctx *ctx) 8580{ 8581 int i; 8582 8583 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); 8584 8585 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 8586 R600_ERR("loop/endloop in shader code are not paired.\n"); 8587 return -EINVAL; 8588 } 8589 8590 /* fixup loop pointers - from r600isa 8591 LOOP END points to CF after LOOP START, 8592 LOOP START point to CF after LOOP END 8593 BRK/CONT point to LOOP END CF 8594 */ 8595 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 8596 8597 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 8598 8599 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 8600 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 8601 } 8602 /* XXX add LOOPRET support */ 8603 fc_poplevel(ctx); 8604 callstack_pop(ctx, FC_LOOP); 8605 return 0; 8606} 8607 8608static int tgsi_loop_breakc(struct r600_shader_ctx *ctx) 8609{ 8610 int r; 8611 unsigned int fscp; 8612 8613 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 8614 { 8615 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 8616 break; 8617 } 8618 if (fscp == 0) { 8619 R600_ERR("BREAKC not inside loop/endloop pair\n"); 8620 return -EINVAL; 8621 } 8622 8623 if (ctx->bc->chip_class == EVERGREEN && 8624 ctx->bc->family != CHIP_CYPRESS && 8625 ctx->bc->family != CHIP_JUNIPER) { 8626 /* HW bug: ALU_BREAK does not save the active mask correctly */ 8627 r = tgsi_uif(ctx); 8628 if (r) 8629 return r; 8630 8631 r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK); 8632 if (r) 8633 return r; 8634 fc_set_mid(ctx, fscp); 8635 8636 return tgsi_endif(ctx); 8637 } else { 8638 r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK); 8639 if (r) 8640 return r; 8641 fc_set_mid(ctx, fscp); 8642 } 8643 8644 return 0; 8645} 8646 8647static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 8648{ 8649 unsigned int fscp; 8650 8651 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 8652 { 8653 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 8654 break; 8655 } 8656 8657 if (fscp == 0) { 8658 R600_ERR("Break not inside loop/endloop pair\n"); 8659 return -EINVAL; 8660 } 8661 8662 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8663 8664 fc_set_mid(ctx, fscp); 8665 8666 return 0; 8667} 8668 8669static int tgsi_gs_emit(struct r600_shader_ctx *ctx) 8670{ 8671 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8672 int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX]; 8673 int r; 8674 8675 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 8676 emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE); 8677 8678 r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8679 if (!r) { 8680 ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream 8681 return emit_inc_ring_offset(ctx, stream, TRUE); 8682 } 8683 return r; 8684} 8685 8686static int tgsi_umad(struct r600_shader_ctx *ctx) 8687{ 8688 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8689 struct r600_bytecode_alu alu; 8690 int i, j, k, r; 8691 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8692 8693 /* src0 * src1 */ 8694 for (i = 0; i < lasti + 1; i++) { 8695 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8696 continue; 8697 8698 if (ctx->bc->chip_class == CAYMAN) { 8699 for (j = 0 ; j < 4; j++) { 8700 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8701 8702 alu.op = ALU_OP2_MULLO_UINT; 8703 for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { 8704 r600_bytecode_src(&alu.src[k], &ctx->src[k], i); 8705 } 8706 alu.dst.chan = j; 8707 alu.dst.sel = ctx->temp_reg; 8708 alu.dst.write = (j == i); 8709 if (j == 3) 8710 alu.last = 1; 8711 r = r600_bytecode_add_alu(ctx->bc, &alu); 8712 if (r) 8713 return r; 8714 } 8715 } else { 8716 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8717 8718 alu.dst.chan = i; 8719 alu.dst.sel = ctx->temp_reg; 8720 alu.dst.write = 1; 8721 8722 alu.op = ALU_OP2_MULLO_UINT; 8723 for (j = 0; j < 2; j++) { 8724 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 8725 } 8726 8727 alu.last = 1; 8728 r = r600_bytecode_add_alu(ctx->bc, &alu); 8729 if (r) 8730 return r; 8731 } 8732 } 8733 8734 8735 for (i = 0; i < lasti + 1; i++) { 8736 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8737 continue; 8738 8739 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8740 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 8741 8742 alu.op = ALU_OP2_ADD_INT; 8743 8744 alu.src[0].sel = ctx->temp_reg; 8745 alu.src[0].chan = i; 8746 8747 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 8748 if (i == lasti) { 8749 alu.last = 1; 8750 } 8751 r = r600_bytecode_add_alu(ctx->bc, &alu); 8752 if (r) 8753 return r; 8754 } 8755 return 0; 8756} 8757 8758static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 8759 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, 8760 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 8761 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 8762 8763 /* XXX: 8764 * For state trackers other than OpenGL, we'll want to use 8765 * _RECIP_IEEE instead. 8766 */ 8767 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 8768 8769 [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, 8770 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 8771 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 8772 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 8773 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 8774 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 8775 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 8776 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 8777 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 8778 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 8779 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 8780 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 8781 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 8782 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 8783 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 8784 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 8785 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 8786 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 8787 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 8788 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 8789 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 8790 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 8791 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 8792 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 8793 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 8794 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 8795 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 8796 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 8797 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 8798 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 8799 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 8800 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 8801 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 8802 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8803 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8804 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 8805 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8806 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8807 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8808 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8809 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 8810 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 8811 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 8812 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 8813 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 8814 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 8815 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 8816 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 8817 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 8818 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 8819 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 8820 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8821 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8822 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8823 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8824 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 8825 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 8826 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_r600_arl}, 8827 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 8828 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 8829 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 8830 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 8831 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 8832 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 8833 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8834 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 8835 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 8836 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 8837 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8838 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 8839 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 8840 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 8841 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 8842 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 8843 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 8844 [TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 8845 [TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 8846 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 8847 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 8848 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 8849 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 8850 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 8851 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 8852 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2_trans}, 8853 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 8854 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 8855 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 8856 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 8857 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 8858 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8859 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 8860 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8861 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 8862 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 8863 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 8864 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 8865 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8866 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 8867 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8868 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8869 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 8870 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 8871 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 8872 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 8873 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 8874 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 8875 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 8876 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 8877 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 8878 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 8879 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 8880 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc}, 8881 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 8882 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 8883 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 8884 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, 8885 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 8886 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 8887 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 8888 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 8889 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 8890 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2_trans}, 8891 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 8892 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, 8893 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 8894 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 8895 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 8896 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 8897 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 8898 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 8899 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 8900 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 8901 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 8902 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 8903 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2_trans}, 8904 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 8905 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2_swap}, 8906 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8907 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 8908 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 8909 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8910 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 8911 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 8912 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 8913 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 8914 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 8915 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 8916 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 8917 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 8918 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 8919 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 8920 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 8921 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 8922 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_r600_arl}, 8923 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 8924 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 8925 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 8926 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8927 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 8928 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8929 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8930 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8931 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 8932 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 8933 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 8934 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 8935 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 8936 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8937 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8938 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8939 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8940 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8941 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8942 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 8943 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8944 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8945 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 8946 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 8947 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_unsupported}, 8948 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_unsupported}, 8949 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_unsupported}, 8950 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_unsupported}, 8951 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_unsupported}, 8952 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_unsupported}, 8953 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_unsupported}, 8954 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_unsupported}, 8955 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_unsupported}, 8956 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_unsupported}, 8957 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_unsupported}, 8958 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_unsupported}, 8959 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_unsupported}, 8960 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 8961}; 8962 8963static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 8964 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 8965 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 8966 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 8967 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 8968 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, 8969 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 8970 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 8971 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 8972 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 8973 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 8974 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 8975 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 8976 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 8977 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 8978 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 8979 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 8980 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 8981 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 8982 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 8983 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 8984 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 8985 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 8986 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 8987 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 8988 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 8989 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 8990 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 8991 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 8992 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 8993 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 8994 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 8995 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 8996 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 8997 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 8998 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 8999 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 9000 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 9001 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9002 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9003 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 9004 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9005 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9006 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9007 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9008 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 9009 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 9010 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 9011 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 9012 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 9013 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 9014 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 9015 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 9016 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 9017 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 9018 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 9019 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9020 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9021 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9022 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9023 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 9024 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 9025 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 9026 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 9027 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 9028 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 9029 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 9030 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 9031 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 9032 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9033 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 9034 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 9035 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 9036 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9037 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 9038 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 9039 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 9040 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 9041 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 9042 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 9043 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9044 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9045 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 9046 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 9047 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 9048 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 9049 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 9050 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 9051 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 9052 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9053 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9054 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9055 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9056 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9057 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9058 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9059 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9060 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9061 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9062 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9063 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9064 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9065 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9066 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9067 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9068 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9069 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 9070 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9071 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9072 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9073 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9074 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9075 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9076 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 9077 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9078 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9079 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 9080 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9081 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9082 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 9083 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i}, 9084 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9085 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9086 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9087 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9088 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9089 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 9090 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9091 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_f2i}, 9092 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 9093 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9094 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9095 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9096 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9097 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9098 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9099 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 9100 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9101 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9102 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 9103 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9104 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 9105 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9106 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9107 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9108 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9109 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9110 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9111 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9112 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9113 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9114 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9115 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9116 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9117 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9118 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9119 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9120 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9121 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 9122 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9123 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9124 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9125 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9126 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9127 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9128 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9129 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9130 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 9131 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9132 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9133 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9134 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9135 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9136 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9137 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9138 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9139 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9140 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9141 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9142 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9143 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9144 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 9145 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 9146 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 9147 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 9148 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 9149 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 9150 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 9151 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 9152 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 9153 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 9154 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 9155 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 9156 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9157 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9158 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9159 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 9160 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 9161 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 9162 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 9163 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 9164 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 9165 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 9166 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 9167 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 9168 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 9169 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 9170 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 9171 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 9172 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 9173 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 9174 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 9175 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 9176 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 9177 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 9178 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 9179 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 9180 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 9181 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 9182 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9183}; 9184 9185static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 9186 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 9187 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 9188 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 9189 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, 9190 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, 9191 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 9192 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 9193 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 9194 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 9195 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 9196 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 9197 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 9198 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 9199 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 9200 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 9201 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 9202 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 9203 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 9204 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 9205 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 9206 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, 9207 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 9208 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 9209 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 9210 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 9211 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 9212 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 9213 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 9214 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, 9215 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, 9216 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, 9217 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 9218 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 9219 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 9220 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 9221 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 9222 [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig}, 9223 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9224 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9225 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 9226 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9227 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9228 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9229 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9230 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 9231 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 9232 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 9233 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 9234 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, cayman_trig}, 9235 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 9236 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 9237 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 9238 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 9239 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 9240 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 9241 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9242 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9243 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9244 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9245 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 9246 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 9247 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 9248 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 9249 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 9250 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 9251 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 9252 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 9253 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 9254 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9255 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 9256 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 9257 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 9258 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9259 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 9260 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 9261 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 9262 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 9263 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 9264 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 9265 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9266 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9267 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 9268 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 9269 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 9270 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2}, 9271 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 9272 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 9273 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 9274 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9275 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9276 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9277 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9278 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9279 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9280 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9281 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9282 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9283 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9284 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9285 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9286 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9287 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9288 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9289 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9290 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9291 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 9292 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9293 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9294 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9295 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9296 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9297 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9298 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 9299 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9300 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9301 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 9302 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9303 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9304 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 9305 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2}, 9306 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9307 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9308 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9309 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9310 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9311 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 9312 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9313 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2}, 9314 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2}, 9315 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9316 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9317 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9318 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9319 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9320 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9321 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_INT, cayman_mul_int_instr}, 9322 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9323 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9324 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 9325 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9326 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 9327 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9328 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9329 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9330 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9331 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9332 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9333 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9334 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9335 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9336 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9337 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9338 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9339 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9340 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9341 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9342 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9343 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 9344 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9345 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9346 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9347 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9348 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9349 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9350 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9351 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9352 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 9353 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9354 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9355 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9356 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9357 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9358 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9359 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9360 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9361 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9362 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9363 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9364 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9365 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9366 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, cayman_mul_int_instr}, 9367 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, cayman_mul_int_instr}, 9368 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 9369 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 9370 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 9371 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 9372 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 9373 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 9374 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 9375 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 9376 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 9377 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 9378 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9379 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9380 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9381 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 9382 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 9383 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 9384 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 9385 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 9386 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 9387 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 9388 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 9389 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 9390 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 9391 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 9392 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 9393 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 9394 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 9395 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 9396 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 9397 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 9398 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 9399 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 9400 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 9401 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 9402 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 9403 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 9404 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9405}; 9406