r600_shader.c revision 2a9639e41fdcecb489e39f739e4d42e6a78655f3
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "r600_sq.h" 24#include "r600_llvm.h" 25#include "r600_formats.h" 26#include "r600_opcodes.h" 27#include "r600_shader.h" 28#include "r600d.h" 29 30#include "sb/sb_public.h" 31 32#include "pipe/p_shader_tokens.h" 33#include "tgsi/tgsi_info.h" 34#include "tgsi/tgsi_parse.h" 35#include "tgsi/tgsi_scan.h" 36#include "tgsi/tgsi_dump.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39#include <stdio.h> 40#include <errno.h> 41 42/* CAYMAN notes 43Why CAYMAN got loops for lots of instructions is explained here. 44 45-These 8xx t-slot only ops are implemented in all vector slots. 46MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 47These 8xx t-slot only opcodes become vector ops, with all four 48slots expecting the arguments on sources a and b. Result is 49broadcast to all channels. 50MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64 51These 8xx t-slot only opcodes become vector ops in the z, y, and 52x slots. 53EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 54RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 55SQRT_IEEE/_64 56SIN/COS 57The w slot may have an independent co-issued operation, or if the 58result is required to be in the w slot, the opcode above may be 59issued in the w slot as well. 60The compiler must issue the source argument to slots z, y, and x 61*/ 62 63#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) 64static int r600_shader_from_tgsi(struct r600_context *rctx, 65 struct r600_pipe_shader *pipeshader, 66 union r600_shader_key key); 67 68 69static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, 70 int size, unsigned comp_mask) { 71 72 if (!size) 73 return; 74 75 if (ps->num_arrays == ps->max_arrays) { 76 ps->max_arrays += 64; 77 ps->arrays = realloc(ps->arrays, ps->max_arrays * 78 sizeof(struct r600_shader_array)); 79 } 80 81 int n = ps->num_arrays; 82 ++ps->num_arrays; 83 84 ps->arrays[n].comp_mask = comp_mask; 85 ps->arrays[n].gpr_start = start_gpr; 86 ps->arrays[n].gpr_count = size; 87} 88 89static void r600_dump_streamout(struct pipe_stream_output_info *so) 90{ 91 unsigned i; 92 93 fprintf(stderr, "STREAMOUT\n"); 94 for (i = 0; i < so->num_outputs; i++) { 95 unsigned mask = ((1 << so->output[i].num_components) - 1) << 96 so->output[i].start_component; 97 fprintf(stderr, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", 98 i, 99 so->output[i].stream, 100 so->output[i].output_buffer, 101 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, 102 so->output[i].register_index, 103 mask & 1 ? "x" : "", 104 mask & 2 ? "y" : "", 105 mask & 4 ? "z" : "", 106 mask & 8 ? "w" : "", 107 so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); 108 } 109} 110 111static int store_shader(struct pipe_context *ctx, 112 struct r600_pipe_shader *shader) 113{ 114 struct r600_context *rctx = (struct r600_context *)ctx; 115 uint32_t *ptr, i; 116 117 if (shader->bo == NULL) { 118 shader->bo = (struct r600_resource*) 119 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); 120 if (shader->bo == NULL) { 121 return -ENOMEM; 122 } 123 ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); 124 if (R600_BIG_ENDIAN) { 125 for (i = 0; i < shader->shader.bc.ndw; ++i) { 126 ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); 127 } 128 } else { 129 memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); 130 } 131 rctx->b.ws->buffer_unmap(shader->bo->cs_buf); 132 } 133 134 return 0; 135} 136 137int r600_pipe_shader_create(struct pipe_context *ctx, 138 struct r600_pipe_shader *shader, 139 union r600_shader_key key) 140{ 141 struct r600_context *rctx = (struct r600_context *)ctx; 142 struct r600_pipe_shader_selector *sel = shader->selector; 143 int r; 144 bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens); 145 unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); 146 unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 147 unsigned export_shader; 148 149 shader->shader.bc.isa = rctx->isa; 150 151 if (dump) { 152 fprintf(stderr, "--------------------------------------------------------------\n"); 153 tgsi_dump(sel->tokens, 0); 154 155 if (sel->so.num_outputs) { 156 r600_dump_streamout(&sel->so); 157 } 158 } 159 r = r600_shader_from_tgsi(rctx, shader, key); 160 if (r) { 161 R600_ERR("translation from TGSI failed !\n"); 162 goto error; 163 } 164 if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) { 165 /* only disable for vertex shaders in tess paths */ 166 if (key.vs.as_ls) 167 use_sb = 0; 168 } 169 use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_TESS_CTRL); 170 use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_TESS_EVAL); 171 172 /* disable SB for shaders using doubles */ 173 use_sb &= !shader->shader.uses_doubles; 174 175 /* Check if the bytecode has already been built. When using the llvm 176 * backend, r600_shader_from_tgsi() will take care of building the 177 * bytecode. 178 */ 179 if (!shader->shader.bc.bytecode) { 180 r = r600_bytecode_build(&shader->shader.bc); 181 if (r) { 182 R600_ERR("building bytecode failed !\n"); 183 goto error; 184 } 185 } 186 187 if (dump && !sb_disasm) { 188 fprintf(stderr, "--------------------------------------------------------------\n"); 189 r600_bytecode_disasm(&shader->shader.bc); 190 fprintf(stderr, "______________________________________________________________\n"); 191 } else if ((dump && sb_disasm) || use_sb) { 192 r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, 193 dump, use_sb); 194 if (r) { 195 R600_ERR("r600_sb_bytecode_process failed !\n"); 196 goto error; 197 } 198 } 199 200 if (shader->gs_copy_shader) { 201 if (dump) { 202 // dump copy shader 203 r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, 204 &shader->gs_copy_shader->shader, dump, 0); 205 if (r) 206 goto error; 207 } 208 209 if ((r = store_shader(ctx, shader->gs_copy_shader))) 210 goto error; 211 } 212 213 /* Store the shader in a buffer. */ 214 if ((r = store_shader(ctx, shader))) 215 goto error; 216 217 /* Build state. */ 218 switch (shader->shader.processor_type) { 219 case TGSI_PROCESSOR_TESS_CTRL: 220 evergreen_update_hs_state(ctx, shader); 221 break; 222 case TGSI_PROCESSOR_TESS_EVAL: 223 if (key.tes.as_es) 224 evergreen_update_es_state(ctx, shader); 225 else 226 evergreen_update_vs_state(ctx, shader); 227 break; 228 case TGSI_PROCESSOR_GEOMETRY: 229 if (rctx->b.chip_class >= EVERGREEN) { 230 evergreen_update_gs_state(ctx, shader); 231 evergreen_update_vs_state(ctx, shader->gs_copy_shader); 232 } else { 233 r600_update_gs_state(ctx, shader); 234 r600_update_vs_state(ctx, shader->gs_copy_shader); 235 } 236 break; 237 case TGSI_PROCESSOR_VERTEX: 238 export_shader = key.vs.as_es; 239 if (rctx->b.chip_class >= EVERGREEN) { 240 if (key.vs.as_ls) 241 evergreen_update_ls_state(ctx, shader); 242 else if (key.vs.as_es) 243 evergreen_update_es_state(ctx, shader); 244 else 245 evergreen_update_vs_state(ctx, shader); 246 } else { 247 if (export_shader) 248 r600_update_es_state(ctx, shader); 249 else 250 r600_update_vs_state(ctx, shader); 251 } 252 break; 253 case TGSI_PROCESSOR_FRAGMENT: 254 if (rctx->b.chip_class >= EVERGREEN) { 255 evergreen_update_ps_state(ctx, shader); 256 } else { 257 r600_update_ps_state(ctx, shader); 258 } 259 break; 260 default: 261 r = -EINVAL; 262 goto error; 263 } 264 return 0; 265 266error: 267 r600_pipe_shader_destroy(ctx, shader); 268 return r; 269} 270 271void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 272{ 273 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 274 r600_bytecode_clear(&shader->shader.bc); 275 r600_release_command_buffer(&shader->command_buffer); 276} 277 278/* 279 * tgsi -> r600 shader 280 */ 281struct r600_shader_tgsi_instruction; 282 283struct r600_shader_src { 284 unsigned sel; 285 unsigned swizzle[4]; 286 unsigned neg; 287 unsigned abs; 288 unsigned rel; 289 unsigned kc_bank; 290 boolean kc_rel; /* true if cache bank is indexed */ 291 uint32_t value[4]; 292}; 293 294struct eg_interp { 295 boolean enabled; 296 unsigned ij_index; 297}; 298 299struct r600_shader_ctx { 300 struct tgsi_shader_info info; 301 struct tgsi_parse_context parse; 302 const struct tgsi_token *tokens; 303 unsigned type; 304 unsigned file_offset[TGSI_FILE_COUNT]; 305 unsigned temp_reg; 306 const struct r600_shader_tgsi_instruction *inst_info; 307 struct r600_bytecode *bc; 308 struct r600_shader *shader; 309 struct r600_shader_src src[4]; 310 uint32_t *literals; 311 uint32_t nliterals; 312 uint32_t max_driver_temp_used; 313 boolean use_llvm; 314 /* needed for evergreen interpolation */ 315 struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid 316 /* evergreen/cayman also store sample mask in face register */ 317 int face_gpr; 318 /* sample id is .w component stored in fixed point position register */ 319 int fixed_pt_position_gpr; 320 int colors_used; 321 boolean clip_vertex_write; 322 unsigned cv_output; 323 unsigned edgeflag_output; 324 int fragcoord_input; 325 int native_integers; 326 int next_ring_offset; 327 int gs_out_ring_offset; 328 int gs_next_vertex; 329 struct r600_shader *gs_for_vs; 330 int gs_export_gpr_tregs[4]; 331 const struct pipe_stream_output_info *gs_stream_output_info; 332 unsigned enabled_stream_buffers_mask; 333 unsigned tess_input_info; /* temp with tess input offsets */ 334 unsigned tess_output_info; /* temp with tess input offsets */ 335}; 336 337struct r600_shader_tgsi_instruction { 338 unsigned op; 339 int (*process)(struct r600_shader_ctx *ctx); 340}; 341 342static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind); 343static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 344static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 345static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); 346static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 347static int tgsi_else(struct r600_shader_ctx *ctx); 348static int tgsi_endif(struct r600_shader_ctx *ctx); 349static int tgsi_bgnloop(struct r600_shader_ctx *ctx); 350static int tgsi_endloop(struct r600_shader_ctx *ctx); 351static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 352static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 353 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 354 unsigned int dst_reg); 355static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 356 const struct r600_shader_src *shader_src, 357 unsigned chan); 358 359static int tgsi_last_instruction(unsigned writemask) 360{ 361 int i, lasti = 0; 362 363 for (i = 0; i < 4; i++) { 364 if (writemask & (1 << i)) { 365 lasti = i; 366 } 367 } 368 return lasti; 369} 370 371static int tgsi_is_supported(struct r600_shader_ctx *ctx) 372{ 373 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 374 int j; 375 376 if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) { 377 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 378 return -EINVAL; 379 } 380 if (i->Instruction.Predicate) { 381 R600_ERR("predicate unsupported\n"); 382 return -EINVAL; 383 } 384#if 0 385 if (i->Instruction.Label) { 386 R600_ERR("label unsupported\n"); 387 return -EINVAL; 388 } 389#endif 390 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 391 if (i->Src[j].Register.Dimension) { 392 switch (i->Src[j].Register.File) { 393 case TGSI_FILE_CONSTANT: 394 break; 395 case TGSI_FILE_INPUT: 396 if (ctx->type == TGSI_PROCESSOR_GEOMETRY) 397 break; 398 default: 399 R600_ERR("unsupported src %d (dimension %d)\n", j, 400 i->Src[j].Register.Dimension); 401 return -EINVAL; 402 } 403 } 404 } 405 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 406 if (i->Dst[j].Register.Dimension) { 407 R600_ERR("unsupported dst (dimension)\n"); 408 return -EINVAL; 409 } 410 } 411 return 0; 412} 413 414int eg_get_interpolator_index(unsigned interpolate, unsigned location) 415{ 416 if (interpolate == TGSI_INTERPOLATE_COLOR || 417 interpolate == TGSI_INTERPOLATE_LINEAR || 418 interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 419 { 420 int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR; 421 int loc; 422 423 switch(location) { 424 case TGSI_INTERPOLATE_LOC_CENTER: 425 loc = 1; 426 break; 427 case TGSI_INTERPOLATE_LOC_CENTROID: 428 loc = 2; 429 break; 430 case TGSI_INTERPOLATE_LOC_SAMPLE: 431 default: 432 loc = 0; break; 433 } 434 435 return is_linear * 3 + loc; 436 } 437 438 return -1; 439} 440 441static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, 442 int input) 443{ 444 int i = eg_get_interpolator_index( 445 ctx->shader->input[input].interpolate, 446 ctx->shader->input[input].interpolate_location); 447 assert(i >= 0); 448 ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index; 449} 450 451static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 452{ 453 int i, r; 454 struct r600_bytecode_alu alu; 455 int gpr = 0, base_chan = 0; 456 int ij_index = ctx->shader->input[input].ij_index; 457 458 /* work out gpr and base_chan from index */ 459 gpr = ij_index / 2; 460 base_chan = (2 * (ij_index % 2)) + 1; 461 462 for (i = 0; i < 8; i++) { 463 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 464 465 if (i < 4) 466 alu.op = ALU_OP2_INTERP_ZW; 467 else 468 alu.op = ALU_OP2_INTERP_XY; 469 470 if ((i > 1) && (i < 6)) { 471 alu.dst.sel = ctx->shader->input[input].gpr; 472 alu.dst.write = 1; 473 } 474 475 alu.dst.chan = i % 4; 476 477 alu.src[0].sel = gpr; 478 alu.src[0].chan = (base_chan - (i % 2)); 479 480 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 481 482 alu.bank_swizzle_force = SQ_ALU_VEC_210; 483 if ((i % 4) == 3) 484 alu.last = 1; 485 r = r600_bytecode_add_alu(ctx->bc, &alu); 486 if (r) 487 return r; 488 } 489 return 0; 490} 491 492static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 493{ 494 int i, r; 495 struct r600_bytecode_alu alu; 496 497 for (i = 0; i < 4; i++) { 498 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 499 500 alu.op = ALU_OP1_INTERP_LOAD_P0; 501 502 alu.dst.sel = ctx->shader->input[input].gpr; 503 alu.dst.write = 1; 504 505 alu.dst.chan = i; 506 507 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 508 alu.src[0].chan = i; 509 510 if (i == 3) 511 alu.last = 1; 512 r = r600_bytecode_add_alu(ctx->bc, &alu); 513 if (r) 514 return r; 515 } 516 return 0; 517} 518 519/* 520 * Special export handling in shaders 521 * 522 * shader export ARRAY_BASE for EXPORT_POS: 523 * 60 is position 524 * 61 is misc vector 525 * 62, 63 are clip distance vectors 526 * 527 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 528 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 529 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 530 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 531 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 532 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 533 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 534 * exclusive from render target index) 535 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 536 * 537 * 538 * shader export ARRAY_BASE for EXPORT_PIXEL: 539 * 0-7 CB targets 540 * 61 computed Z vector 541 * 542 * The use of the values exported in the computed Z vector are controlled 543 * by DB_SHADER_CONTROL: 544 * Z_EXPORT_ENABLE - Z as a float in RED 545 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 546 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 547 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 548 * DB_SOURCE_FORMAT - export control restrictions 549 * 550 */ 551 552 553/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 554static int r600_spi_sid(struct r600_shader_io * io) 555{ 556 int index, name = io->name; 557 558 /* These params are handled differently, they don't need 559 * semantic indices, so we'll use 0 for them. 560 */ 561 if (name == TGSI_SEMANTIC_POSITION || 562 name == TGSI_SEMANTIC_PSIZE || 563 name == TGSI_SEMANTIC_EDGEFLAG || 564 name == TGSI_SEMANTIC_FACE || 565 name == TGSI_SEMANTIC_SAMPLEMASK) 566 index = 0; 567 else { 568 if (name == TGSI_SEMANTIC_GENERIC) { 569 /* For generic params simply use sid from tgsi */ 570 index = io->sid; 571 } else { 572 /* For non-generic params - pack name and sid into 8 bits */ 573 index = 0x80 | (name<<3) | (io->sid); 574 } 575 576 /* Make sure that all really used indices have nonzero value, so 577 * we can just compare it to 0 later instead of comparing the name 578 * with different values to detect special cases. */ 579 index++; 580 } 581 582 return index; 583}; 584 585/* we need this to get a common lds index for vs/tcs/tes input/outputs */ 586int r600_get_lds_unique_index(unsigned semantic_name, unsigned index) 587{ 588 switch (semantic_name) { 589 case TGSI_SEMANTIC_POSITION: 590 return 0; 591 case TGSI_SEMANTIC_PSIZE: 592 return 1; 593 case TGSI_SEMANTIC_CLIPDIST: 594 assert(index <= 1); 595 return 2 + index; 596 case TGSI_SEMANTIC_GENERIC: 597 if (index <= 63-4) 598 return 4 + index - 9; 599 else 600 /* same explanation as in the default statement, 601 * the only user hitting this is st/nine. 602 */ 603 return 0; 604 605 /* patch indices are completely separate and thus start from 0 */ 606 case TGSI_SEMANTIC_TESSOUTER: 607 return 0; 608 case TGSI_SEMANTIC_TESSINNER: 609 return 1; 610 case TGSI_SEMANTIC_PATCH: 611 return 2 + index; 612 613 default: 614 /* Don't fail here. The result of this function is only used 615 * for LS, TCS, TES, and GS, where legacy GL semantics can't 616 * occur, but this function is called for all vertex shaders 617 * before it's known whether LS will be compiled or not. 618 */ 619 return 0; 620 } 621} 622 623/* turn input into interpolate on EG */ 624static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 625{ 626 int r = 0; 627 628 if (ctx->shader->input[index].spi_sid) { 629 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 630 if (ctx->shader->input[index].interpolate > 0) { 631 evergreen_interp_assign_ij_index(ctx, index); 632 if (!ctx->use_llvm) 633 r = evergreen_interp_alu(ctx, index); 634 } else { 635 if (!ctx->use_llvm) 636 r = evergreen_interp_flat(ctx, index); 637 } 638 } 639 return r; 640} 641 642static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 643{ 644 struct r600_bytecode_alu alu; 645 int i, r; 646 int gpr_front = ctx->shader->input[front].gpr; 647 int gpr_back = ctx->shader->input[back].gpr; 648 649 for (i = 0; i < 4; i++) { 650 memset(&alu, 0, sizeof(alu)); 651 alu.op = ALU_OP3_CNDGT; 652 alu.is_op3 = 1; 653 alu.dst.write = 1; 654 alu.dst.sel = gpr_front; 655 alu.src[0].sel = ctx->face_gpr; 656 alu.src[1].sel = gpr_front; 657 alu.src[2].sel = gpr_back; 658 659 alu.dst.chan = i; 660 alu.src[1].chan = i; 661 alu.src[2].chan = i; 662 alu.last = (i==3); 663 664 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 665 return r; 666 } 667 668 return 0; 669} 670 671/* execute a single slot ALU calculation */ 672static int single_alu_op2(struct r600_shader_ctx *ctx, int op, 673 int dst_sel, int dst_chan, 674 int src0_sel, unsigned src0_chan_val, 675 int src1_sel, unsigned src1_chan_val) 676{ 677 struct r600_bytecode_alu alu; 678 int r, i; 679 680 if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) { 681 for (i = 0; i < 4; i++) { 682 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 683 alu.op = op; 684 alu.src[0].sel = src0_sel; 685 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 686 alu.src[0].value = src0_chan_val; 687 else 688 alu.src[0].chan = src0_chan_val; 689 alu.src[1].sel = src1_sel; 690 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 691 alu.src[1].value = src1_chan_val; 692 else 693 alu.src[1].chan = src1_chan_val; 694 alu.dst.sel = dst_sel; 695 alu.dst.chan = i; 696 alu.dst.write = i == dst_chan; 697 alu.last = (i == 3); 698 r = r600_bytecode_add_alu(ctx->bc, &alu); 699 if (r) 700 return r; 701 } 702 return 0; 703 } 704 705 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 706 alu.op = op; 707 alu.src[0].sel = src0_sel; 708 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 709 alu.src[0].value = src0_chan_val; 710 else 711 alu.src[0].chan = src0_chan_val; 712 alu.src[1].sel = src1_sel; 713 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 714 alu.src[1].value = src1_chan_val; 715 else 716 alu.src[1].chan = src1_chan_val; 717 alu.dst.sel = dst_sel; 718 alu.dst.chan = dst_chan; 719 alu.dst.write = 1; 720 alu.last = 1; 721 r = r600_bytecode_add_alu(ctx->bc, &alu); 722 if (r) 723 return r; 724 return 0; 725} 726 727/* execute a single slot ALU calculation */ 728static int single_alu_op3(struct r600_shader_ctx *ctx, int op, 729 int dst_sel, int dst_chan, 730 int src0_sel, unsigned src0_chan_val, 731 int src1_sel, unsigned src1_chan_val, 732 int src2_sel, unsigned src2_chan_val) 733{ 734 struct r600_bytecode_alu alu; 735 int r; 736 737 /* validate this for other ops */ 738 assert(op == ALU_OP3_MULADD_UINT24); 739 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 740 alu.op = op; 741 alu.src[0].sel = src0_sel; 742 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 743 alu.src[0].value = src0_chan_val; 744 else 745 alu.src[0].chan = src0_chan_val; 746 alu.src[1].sel = src1_sel; 747 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 748 alu.src[1].value = src1_chan_val; 749 else 750 alu.src[1].chan = src1_chan_val; 751 alu.src[2].sel = src2_sel; 752 if (src2_sel == V_SQ_ALU_SRC_LITERAL) 753 alu.src[2].value = src2_chan_val; 754 else 755 alu.src[2].chan = src2_chan_val; 756 alu.dst.sel = dst_sel; 757 alu.dst.chan = dst_chan; 758 alu.is_op3 = 1; 759 alu.last = 1; 760 r = r600_bytecode_add_alu(ctx->bc, &alu); 761 if (r) 762 return r; 763 return 0; 764} 765 766static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index) 767{ 768 return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg; 769} 770 771static int r600_get_temp(struct r600_shader_ctx *ctx) 772{ 773 return ctx->temp_reg + ctx->max_driver_temp_used++; 774} 775 776static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid) 777{ 778 int i; 779 i = ctx->shader->noutput++; 780 ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID; 781 ctx->shader->output[i].sid = 0; 782 ctx->shader->output[i].gpr = 0; 783 ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT; 784 ctx->shader->output[i].write_mask = 0x4; 785 ctx->shader->output[i].spi_sid = prim_id_sid; 786 787 return 0; 788} 789 790static int tgsi_declaration(struct r600_shader_ctx *ctx) 791{ 792 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 793 int r, i, j, count = d->Range.Last - d->Range.First + 1; 794 795 switch (d->Declaration.File) { 796 case TGSI_FILE_INPUT: 797 for (j = 0; j < count; j++) { 798 i = ctx->shader->ninput + j; 799 assert(i < Elements(ctx->shader->input)); 800 ctx->shader->input[i].name = d->Semantic.Name; 801 ctx->shader->input[i].sid = d->Semantic.Index + j; 802 ctx->shader->input[i].interpolate = d->Interp.Interpolate; 803 ctx->shader->input[i].interpolate_location = d->Interp.Location; 804 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j; 805 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 806 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 807 switch (ctx->shader->input[i].name) { 808 case TGSI_SEMANTIC_FACE: 809 if (ctx->face_gpr != -1) 810 ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ 811 else 812 ctx->face_gpr = ctx->shader->input[i].gpr; 813 break; 814 case TGSI_SEMANTIC_COLOR: 815 ctx->colors_used++; 816 break; 817 case TGSI_SEMANTIC_POSITION: 818 ctx->fragcoord_input = i; 819 break; 820 case TGSI_SEMANTIC_PRIMID: 821 /* set this for now */ 822 ctx->shader->gs_prim_id_input = true; 823 ctx->shader->ps_prim_id_input = i; 824 break; 825 } 826 if (ctx->bc->chip_class >= EVERGREEN) { 827 if ((r = evergreen_interp_input(ctx, i))) 828 return r; 829 } 830 } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 831 /* FIXME probably skip inputs if they aren't passed in the ring */ 832 ctx->shader->input[i].ring_offset = ctx->next_ring_offset; 833 ctx->next_ring_offset += 16; 834 if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) 835 ctx->shader->gs_prim_id_input = true; 836 } 837 } 838 ctx->shader->ninput += count; 839 break; 840 case TGSI_FILE_OUTPUT: 841 for (j = 0; j < count; j++) { 842 i = ctx->shader->noutput + j; 843 assert(i < Elements(ctx->shader->output)); 844 ctx->shader->output[i].name = d->Semantic.Name; 845 ctx->shader->output[i].sid = d->Semantic.Index + j; 846 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j; 847 ctx->shader->output[i].interpolate = d->Interp.Interpolate; 848 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 849 if (ctx->type == TGSI_PROCESSOR_VERTEX || 850 ctx->type == TGSI_PROCESSOR_GEOMETRY) { 851 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 852 switch (d->Semantic.Name) { 853 case TGSI_SEMANTIC_CLIPDIST: 854 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << 855 ((d->Semantic.Index + j) << 2); 856 break; 857 case TGSI_SEMANTIC_PSIZE: 858 ctx->shader->vs_out_misc_write = 1; 859 ctx->shader->vs_out_point_size = 1; 860 break; 861 case TGSI_SEMANTIC_EDGEFLAG: 862 ctx->shader->vs_out_misc_write = 1; 863 ctx->shader->vs_out_edgeflag = 1; 864 ctx->edgeflag_output = i; 865 break; 866 case TGSI_SEMANTIC_VIEWPORT_INDEX: 867 ctx->shader->vs_out_misc_write = 1; 868 ctx->shader->vs_out_viewport = 1; 869 break; 870 case TGSI_SEMANTIC_LAYER: 871 ctx->shader->vs_out_misc_write = 1; 872 ctx->shader->vs_out_layer = 1; 873 break; 874 case TGSI_SEMANTIC_CLIPVERTEX: 875 ctx->clip_vertex_write = TRUE; 876 ctx->cv_output = i; 877 break; 878 } 879 if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 880 ctx->gs_out_ring_offset += 16; 881 } 882 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 883 switch (d->Semantic.Name) { 884 case TGSI_SEMANTIC_COLOR: 885 ctx->shader->nr_ps_max_color_exports++; 886 break; 887 } 888 } 889 } 890 ctx->shader->noutput += count; 891 break; 892 case TGSI_FILE_TEMPORARY: 893 if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 894 if (d->Array.ArrayID) { 895 r600_add_gpr_array(ctx->shader, 896 ctx->file_offset[TGSI_FILE_TEMPORARY] + 897 d->Range.First, 898 d->Range.Last - d->Range.First + 1, 0x0F); 899 } 900 } 901 break; 902 903 case TGSI_FILE_CONSTANT: 904 case TGSI_FILE_SAMPLER: 905 case TGSI_FILE_SAMPLER_VIEW: 906 case TGSI_FILE_ADDRESS: 907 break; 908 909 case TGSI_FILE_SYSTEM_VALUE: 910 if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK || 911 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID || 912 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) { 913 break; /* Already handled from allocate_system_value_inputs */ 914 } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 915 if (!ctx->native_integers) { 916 struct r600_bytecode_alu alu; 917 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 918 919 alu.op = ALU_OP1_INT_TO_FLT; 920 alu.src[0].sel = 0; 921 alu.src[0].chan = 3; 922 923 alu.dst.sel = 0; 924 alu.dst.chan = 3; 925 alu.dst.write = 1; 926 alu.last = 1; 927 928 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 929 return r; 930 } 931 break; 932 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 933 break; 934 else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID) 935 break; 936 default: 937 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 938 return -EINVAL; 939 } 940 return 0; 941} 942 943static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset) 944{ 945 struct tgsi_parse_context parse; 946 struct { 947 boolean enabled; 948 int *reg; 949 unsigned name, alternate_name; 950 } inputs[2] = { 951 { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */ 952 953 { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */ 954 }; 955 int i, k, num_regs = 0; 956 957 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 958 return 0; 959 } 960 961 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 962 while (!tgsi_parse_end_of_tokens(&parse)) { 963 tgsi_parse_token(&parse); 964 965 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 966 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 967 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 968 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 969 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 970 { 971 int interpolate, location, k; 972 973 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 974 location = TGSI_INTERPOLATE_LOC_CENTER; 975 inputs[1].enabled = true; /* needs SAMPLEID */ 976 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 977 location = TGSI_INTERPOLATE_LOC_CENTER; 978 /* Needs sample positions, currently those are always available */ 979 } else { 980 location = TGSI_INTERPOLATE_LOC_CENTROID; 981 } 982 983 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 984 k = eg_get_interpolator_index(interpolate, location); 985 ctx->eg_interpolators[k].enabled = true; 986 } 987 } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { 988 struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration; 989 if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 990 for (k = 0; k < Elements(inputs); k++) { 991 if (d->Semantic.Name == inputs[k].name || 992 d->Semantic.Name == inputs[k].alternate_name) { 993 inputs[k].enabled = true; 994 } 995 } 996 } 997 } 998 } 999 1000 tgsi_parse_free(&parse); 1001 1002 for (i = 0; i < Elements(inputs); i++) { 1003 boolean enabled = inputs[i].enabled; 1004 int *reg = inputs[i].reg; 1005 unsigned name = inputs[i].name; 1006 1007 if (enabled) { 1008 int gpr = gpr_offset + num_regs++; 1009 1010 // add to inputs, allocate a gpr 1011 k = ctx->shader->ninput ++; 1012 ctx->shader->input[k].name = name; 1013 ctx->shader->input[k].sid = 0; 1014 ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT; 1015 ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER; 1016 *reg = ctx->shader->input[k].gpr = gpr; 1017 } 1018 } 1019 1020 return gpr_offset + num_regs; 1021} 1022 1023/* 1024 * for evergreen we need to scan the shader to find the number of GPRs we need to 1025 * reserve for interpolation and system values 1026 * 1027 * we need to know if we are going to emit 1028 * any sample or centroid inputs 1029 * if perspective and linear are required 1030*/ 1031static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 1032{ 1033 int i; 1034 int num_baryc; 1035 struct tgsi_parse_context parse; 1036 1037 memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators)); 1038 1039 for (i = 0; i < ctx->info.num_inputs; i++) { 1040 int k; 1041 /* skip position/face/mask/sampleid */ 1042 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 1043 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE || 1044 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK || 1045 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID) 1046 continue; 1047 1048 k = eg_get_interpolator_index( 1049 ctx->info.input_interpolate[i], 1050 ctx->info.input_interpolate_loc[i]); 1051 if (k >= 0) 1052 ctx->eg_interpolators[k].enabled = TRUE; 1053 } 1054 1055 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 1056 return 0; 1057 } 1058 1059 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 1060 while (!tgsi_parse_end_of_tokens(&parse)) { 1061 tgsi_parse_token(&parse); 1062 1063 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 1064 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 1065 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 1066 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 1067 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 1068 { 1069 int interpolate, location, k; 1070 1071 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 1072 location = TGSI_INTERPOLATE_LOC_CENTER; 1073 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 1074 location = TGSI_INTERPOLATE_LOC_CENTER; 1075 } else { 1076 location = TGSI_INTERPOLATE_LOC_CENTROID; 1077 } 1078 1079 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 1080 k = eg_get_interpolator_index(interpolate, location); 1081 ctx->eg_interpolators[k].enabled = true; 1082 } 1083 } 1084 } 1085 1086 tgsi_parse_free(&parse); 1087 1088 /* assign gpr to each interpolator according to priority */ 1089 num_baryc = 0; 1090 for (i = 0; i < Elements(ctx->eg_interpolators); i++) { 1091 if (ctx->eg_interpolators[i].enabled) { 1092 ctx->eg_interpolators[i].ij_index = num_baryc; 1093 num_baryc ++; 1094 } 1095 } 1096 1097 /* XXX PULL MODEL and LINE STIPPLE */ 1098 1099 num_baryc = (num_baryc + 1) >> 1; 1100 return allocate_system_value_inputs(ctx, num_baryc); 1101} 1102 1103/* sample_id_sel == NULL means fetch for current sample */ 1104static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel) 1105{ 1106 struct r600_bytecode_vtx vtx; 1107 int r, t1; 1108 1109 assert(ctx->fixed_pt_position_gpr != -1); 1110 1111 t1 = r600_get_temp(ctx); 1112 1113 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 1114 vtx.op = FETCH_OP_VFETCH; 1115 vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 1116 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1117 if (sample_id == NULL) { 1118 vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; 1119 vtx.src_sel_x = 3; 1120 } 1121 else { 1122 struct r600_bytecode_alu alu; 1123 1124 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1125 alu.op = ALU_OP1_MOV; 1126 r600_bytecode_src(&alu.src[0], sample_id, chan_sel); 1127 alu.dst.sel = t1; 1128 alu.dst.write = 1; 1129 alu.last = 1; 1130 r = r600_bytecode_add_alu(ctx->bc, &alu); 1131 if (r) 1132 return r; 1133 1134 vtx.src_gpr = t1; 1135 vtx.src_sel_x = 0; 1136 } 1137 vtx.mega_fetch_count = 16; 1138 vtx.dst_gpr = t1; 1139 vtx.dst_sel_x = 0; 1140 vtx.dst_sel_y = 1; 1141 vtx.dst_sel_z = 2; 1142 vtx.dst_sel_w = 3; 1143 vtx.data_format = FMT_32_32_32_32_FLOAT; 1144 vtx.num_format_all = 2; 1145 vtx.format_comp_all = 1; 1146 vtx.use_const_fields = 0; 1147 vtx.offset = 1; // first element is size of buffer 1148 vtx.endian = r600_endian_swap(32); 1149 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 1150 1151 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 1152 if (r) 1153 return r; 1154 1155 return t1; 1156} 1157 1158static void tgsi_src(struct r600_shader_ctx *ctx, 1159 const struct tgsi_full_src_register *tgsi_src, 1160 struct r600_shader_src *r600_src) 1161{ 1162 memset(r600_src, 0, sizeof(*r600_src)); 1163 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 1164 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 1165 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 1166 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 1167 r600_src->neg = tgsi_src->Register.Negate; 1168 r600_src->abs = tgsi_src->Register.Absolute; 1169 1170 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 1171 int index; 1172 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 1173 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 1174 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 1175 1176 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 1177 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs); 1178 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 1179 return; 1180 } 1181 index = tgsi_src->Register.Index; 1182 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 1183 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 1184 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 1185 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) { 1186 r600_src->swizzle[0] = 2; // Z value 1187 r600_src->swizzle[1] = 2; 1188 r600_src->swizzle[2] = 2; 1189 r600_src->swizzle[3] = 2; 1190 r600_src->sel = ctx->face_gpr; 1191 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) { 1192 r600_src->swizzle[0] = 3; // W value 1193 r600_src->swizzle[1] = 3; 1194 r600_src->swizzle[2] = 3; 1195 r600_src->swizzle[3] = 3; 1196 r600_src->sel = ctx->fixed_pt_position_gpr; 1197 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) { 1198 r600_src->swizzle[0] = 0; 1199 r600_src->swizzle[1] = 1; 1200 r600_src->swizzle[2] = 4; 1201 r600_src->swizzle[3] = 4; 1202 r600_src->sel = load_sample_position(ctx, NULL, -1); 1203 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 1204 r600_src->swizzle[0] = 3; 1205 r600_src->swizzle[1] = 3; 1206 r600_src->swizzle[2] = 3; 1207 r600_src->swizzle[3] = 3; 1208 r600_src->sel = 0; 1209 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 1210 r600_src->swizzle[0] = 0; 1211 r600_src->swizzle[1] = 0; 1212 r600_src->swizzle[2] = 0; 1213 r600_src->swizzle[3] = 0; 1214 r600_src->sel = 0; 1215 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1216 r600_src->swizzle[0] = 3; 1217 r600_src->swizzle[1] = 3; 1218 r600_src->swizzle[2] = 3; 1219 r600_src->swizzle[3] = 3; 1220 r600_src->sel = 1; 1221 } 1222 } else { 1223 if (tgsi_src->Register.Indirect) 1224 r600_src->rel = V_SQ_REL_RELATIVE; 1225 r600_src->sel = tgsi_src->Register.Index; 1226 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 1227 } 1228 if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { 1229 if (tgsi_src->Register.Dimension) { 1230 r600_src->kc_bank = tgsi_src->Dimension.Index; 1231 if (tgsi_src->Dimension.Indirect) { 1232 r600_src->kc_rel = 1; 1233 } 1234 } 1235 } 1236} 1237 1238static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 1239 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 1240 unsigned int dst_reg) 1241{ 1242 struct r600_bytecode_vtx vtx; 1243 unsigned int ar_reg; 1244 int r; 1245 1246 if (offset) { 1247 struct r600_bytecode_alu alu; 1248 1249 memset(&alu, 0, sizeof(alu)); 1250 1251 alu.op = ALU_OP2_ADD_INT; 1252 alu.src[0].sel = ctx->bc->ar_reg; 1253 alu.src[0].chan = ar_chan; 1254 1255 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1256 alu.src[1].value = offset; 1257 1258 alu.dst.sel = dst_reg; 1259 alu.dst.chan = ar_chan; 1260 alu.dst.write = 1; 1261 alu.last = 1; 1262 1263 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1264 return r; 1265 1266 ar_reg = dst_reg; 1267 } else { 1268 ar_reg = ctx->bc->ar_reg; 1269 } 1270 1271 memset(&vtx, 0, sizeof(vtx)); 1272 vtx.buffer_id = cb_idx; 1273 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1274 vtx.src_gpr = ar_reg; 1275 vtx.src_sel_x = ar_chan; 1276 vtx.mega_fetch_count = 16; 1277 vtx.dst_gpr = dst_reg; 1278 vtx.dst_sel_x = 0; /* SEL_X */ 1279 vtx.dst_sel_y = 1; /* SEL_Y */ 1280 vtx.dst_sel_z = 2; /* SEL_Z */ 1281 vtx.dst_sel_w = 3; /* SEL_W */ 1282 vtx.data_format = FMT_32_32_32_32_FLOAT; 1283 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 1284 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 1285 vtx.endian = r600_endian_swap(32); 1286 vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE; 1287 1288 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1289 return r; 1290 1291 return 0; 1292} 1293 1294static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1295{ 1296 struct r600_bytecode_vtx vtx; 1297 int r; 1298 unsigned index = src->Register.Index; 1299 unsigned vtx_id = src->Dimension.Index; 1300 int offset_reg = vtx_id / 3; 1301 int offset_chan = vtx_id % 3; 1302 1303 /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, 1304 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ 1305 1306 if (offset_reg == 0 && offset_chan == 2) 1307 offset_chan = 3; 1308 1309 if (src->Dimension.Indirect) { 1310 int treg[3]; 1311 int t2; 1312 struct r600_bytecode_alu alu; 1313 int r, i; 1314 1315 /* you have got to be shitting me - 1316 we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt. 1317 at least this is what fglrx seems to do. */ 1318 for (i = 0; i < 3; i++) { 1319 treg[i] = r600_get_temp(ctx); 1320 } 1321 r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F); 1322 1323 t2 = r600_get_temp(ctx); 1324 for (i = 0; i < 3; i++) { 1325 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1326 alu.op = ALU_OP1_MOV; 1327 alu.src[0].sel = 0; 1328 alu.src[0].chan = i == 2 ? 3 : i; 1329 alu.dst.sel = treg[i]; 1330 alu.dst.chan = 0; 1331 alu.dst.write = 1; 1332 alu.last = 1; 1333 r = r600_bytecode_add_alu(ctx->bc, &alu); 1334 if (r) 1335 return r; 1336 } 1337 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1338 alu.op = ALU_OP1_MOV; 1339 alu.src[0].sel = treg[0]; 1340 alu.src[0].rel = 1; 1341 alu.dst.sel = t2; 1342 alu.dst.write = 1; 1343 alu.last = 1; 1344 r = r600_bytecode_add_alu(ctx->bc, &alu); 1345 if (r) 1346 return r; 1347 offset_reg = t2; 1348 } 1349 1350 1351 memset(&vtx, 0, sizeof(vtx)); 1352 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1353 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1354 vtx.src_gpr = offset_reg; 1355 vtx.src_sel_x = offset_chan; 1356 vtx.offset = index * 16; /*bytes*/ 1357 vtx.mega_fetch_count = 16; 1358 vtx.dst_gpr = dst_reg; 1359 vtx.dst_sel_x = 0; /* SEL_X */ 1360 vtx.dst_sel_y = 1; /* SEL_Y */ 1361 vtx.dst_sel_z = 2; /* SEL_Z */ 1362 vtx.dst_sel_w = 3; /* SEL_W */ 1363 if (ctx->bc->chip_class >= EVERGREEN) { 1364 vtx.use_const_fields = 1; 1365 } else { 1366 vtx.data_format = FMT_32_32_32_32_FLOAT; 1367 } 1368 1369 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1370 return r; 1371 1372 return 0; 1373} 1374 1375static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) 1376{ 1377 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1378 int i; 1379 1380 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1381 struct tgsi_full_src_register *src = &inst->Src[i]; 1382 1383 if (src->Register.File == TGSI_FILE_INPUT) { 1384 if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) { 1385 /* primitive id is in R0.z */ 1386 ctx->src[i].sel = 0; 1387 ctx->src[i].swizzle[0] = 2; 1388 } 1389 } 1390 if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { 1391 int treg = r600_get_temp(ctx); 1392 1393 fetch_gs_input(ctx, src, treg); 1394 ctx->src[i].sel = treg; 1395 } 1396 } 1397 return 0; 1398} 1399 1400static int tgsi_split_constant(struct r600_shader_ctx *ctx) 1401{ 1402 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1403 struct r600_bytecode_alu alu; 1404 int i, j, k, nconst, r; 1405 1406 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 1407 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 1408 nconst++; 1409 } 1410 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 1411 } 1412 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 1413 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 1414 continue; 1415 } 1416 1417 if (ctx->src[i].rel) { 1418 int chan = inst->Src[i].Indirect.Swizzle; 1419 int treg = r600_get_temp(ctx); 1420 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg))) 1421 return r; 1422 1423 ctx->src[i].kc_bank = 0; 1424 ctx->src[i].kc_rel = 0; 1425 ctx->src[i].sel = treg; 1426 ctx->src[i].rel = 0; 1427 j--; 1428 } else if (j > 0) { 1429 int treg = r600_get_temp(ctx); 1430 for (k = 0; k < 4; k++) { 1431 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1432 alu.op = ALU_OP1_MOV; 1433 alu.src[0].sel = ctx->src[i].sel; 1434 alu.src[0].chan = k; 1435 alu.src[0].rel = ctx->src[i].rel; 1436 alu.src[0].kc_bank = ctx->src[i].kc_bank; 1437 alu.src[0].kc_rel = ctx->src[i].kc_rel; 1438 alu.dst.sel = treg; 1439 alu.dst.chan = k; 1440 alu.dst.write = 1; 1441 if (k == 3) 1442 alu.last = 1; 1443 r = r600_bytecode_add_alu(ctx->bc, &alu); 1444 if (r) 1445 return r; 1446 } 1447 ctx->src[i].sel = treg; 1448 ctx->src[i].rel =0; 1449 j--; 1450 } 1451 } 1452 return 0; 1453} 1454 1455/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 1456static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 1457{ 1458 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1459 struct r600_bytecode_alu alu; 1460 int i, j, k, nliteral, r; 1461 1462 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 1463 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1464 nliteral++; 1465 } 1466 } 1467 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 1468 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1469 int treg = r600_get_temp(ctx); 1470 for (k = 0; k < 4; k++) { 1471 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1472 alu.op = ALU_OP1_MOV; 1473 alu.src[0].sel = ctx->src[i].sel; 1474 alu.src[0].chan = k; 1475 alu.src[0].value = ctx->src[i].value[k]; 1476 alu.dst.sel = treg; 1477 alu.dst.chan = k; 1478 alu.dst.write = 1; 1479 if (k == 3) 1480 alu.last = 1; 1481 r = r600_bytecode_add_alu(ctx->bc, &alu); 1482 if (r) 1483 return r; 1484 } 1485 ctx->src[i].sel = treg; 1486 j--; 1487 } 1488 } 1489 return 0; 1490} 1491 1492static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 1493{ 1494 int i, r, count = ctx->shader->ninput; 1495 1496 for (i = 0; i < count; i++) { 1497 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1498 r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); 1499 if (r) 1500 return r; 1501 } 1502 } 1503 return 0; 1504} 1505 1506static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so, 1507 int stream, unsigned *stream_item_size) 1508{ 1509 unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; 1510 unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; 1511 int i, j, r; 1512 1513 /* Sanity checking. */ 1514 if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) { 1515 R600_ERR("Too many stream outputs: %d\n", so->num_outputs); 1516 r = -EINVAL; 1517 goto out_err; 1518 } 1519 for (i = 0; i < so->num_outputs; i++) { 1520 if (so->output[i].output_buffer >= 4) { 1521 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", 1522 so->output[i].output_buffer); 1523 r = -EINVAL; 1524 goto out_err; 1525 } 1526 } 1527 1528 /* Initialize locations where the outputs are stored. */ 1529 for (i = 0; i < so->num_outputs; i++) { 1530 1531 so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr; 1532 start_comp[i] = so->output[i].start_component; 1533 /* Lower outputs with dst_offset < start_component. 1534 * 1535 * We can only output 4D vectors with a write mask, e.g. we can 1536 * only output the W component at offset 3, etc. If we want 1537 * to store Y, Z, or W at buffer offset 0, we need to use MOV 1538 * to move it to X and output X. */ 1539 if (so->output[i].dst_offset < so->output[i].start_component) { 1540 unsigned tmp = r600_get_temp(ctx); 1541 1542 for (j = 0; j < so->output[i].num_components; j++) { 1543 struct r600_bytecode_alu alu; 1544 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1545 alu.op = ALU_OP1_MOV; 1546 alu.src[0].sel = so_gpr[i]; 1547 alu.src[0].chan = so->output[i].start_component + j; 1548 1549 alu.dst.sel = tmp; 1550 alu.dst.chan = j; 1551 alu.dst.write = 1; 1552 if (j == so->output[i].num_components - 1) 1553 alu.last = 1; 1554 r = r600_bytecode_add_alu(ctx->bc, &alu); 1555 if (r) 1556 return r; 1557 } 1558 start_comp[i] = 0; 1559 so_gpr[i] = tmp; 1560 } 1561 } 1562 1563 /* Write outputs to buffers. */ 1564 for (i = 0; i < so->num_outputs; i++) { 1565 struct r600_bytecode_output output; 1566 1567 if (stream != -1 && stream != so->output[i].output_buffer) 1568 continue; 1569 1570 memset(&output, 0, sizeof(struct r600_bytecode_output)); 1571 output.gpr = so_gpr[i]; 1572 output.elem_size = so->output[i].num_components - 1; 1573 if (output.elem_size == 2) 1574 output.elem_size = 3; // 3 not supported, write 4 with junk at end 1575 output.array_base = so->output[i].dst_offset - start_comp[i]; 1576 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 1577 output.burst_count = 1; 1578 /* array_size is an upper limit for the burst_count 1579 * with MEM_STREAM instructions */ 1580 output.array_size = 0xFFF; 1581 output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i]; 1582 1583 if (ctx->bc->chip_class >= EVERGREEN) { 1584 switch (so->output[i].output_buffer) { 1585 case 0: 1586 output.op = CF_OP_MEM_STREAM0_BUF0; 1587 break; 1588 case 1: 1589 output.op = CF_OP_MEM_STREAM0_BUF1; 1590 break; 1591 case 2: 1592 output.op = CF_OP_MEM_STREAM0_BUF2; 1593 break; 1594 case 3: 1595 output.op = CF_OP_MEM_STREAM0_BUF3; 1596 break; 1597 } 1598 output.op += so->output[i].stream * 4; 1599 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); 1600 ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4; 1601 } else { 1602 switch (so->output[i].output_buffer) { 1603 case 0: 1604 output.op = CF_OP_MEM_STREAM0; 1605 break; 1606 case 1: 1607 output.op = CF_OP_MEM_STREAM1; 1608 break; 1609 case 2: 1610 output.op = CF_OP_MEM_STREAM2; 1611 break; 1612 case 3: 1613 output.op = CF_OP_MEM_STREAM3; 1614 break; 1615 } 1616 ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer; 1617 } 1618 r = r600_bytecode_add_output(ctx->bc, &output); 1619 if (r) 1620 goto out_err; 1621 } 1622 return 0; 1623out_err: 1624 return r; 1625} 1626 1627static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) 1628{ 1629 struct r600_bytecode_alu alu; 1630 unsigned reg; 1631 1632 if (!ctx->shader->vs_out_edgeflag) 1633 return; 1634 1635 reg = ctx->shader->output[ctx->edgeflag_output].gpr; 1636 1637 /* clamp(x, 0, 1) */ 1638 memset(&alu, 0, sizeof(alu)); 1639 alu.op = ALU_OP1_MOV; 1640 alu.src[0].sel = reg; 1641 alu.dst.sel = reg; 1642 alu.dst.write = 1; 1643 alu.dst.clamp = 1; 1644 alu.last = 1; 1645 r600_bytecode_add_alu(ctx->bc, &alu); 1646 1647 memset(&alu, 0, sizeof(alu)); 1648 alu.op = ALU_OP1_FLT_TO_INT; 1649 alu.src[0].sel = reg; 1650 alu.dst.sel = reg; 1651 alu.dst.write = 1; 1652 alu.last = 1; 1653 r600_bytecode_add_alu(ctx->bc, &alu); 1654} 1655 1656static int generate_gs_copy_shader(struct r600_context *rctx, 1657 struct r600_pipe_shader *gs, 1658 struct pipe_stream_output_info *so) 1659{ 1660 struct r600_shader_ctx ctx = {}; 1661 struct r600_shader *gs_shader = &gs->shader; 1662 struct r600_pipe_shader *cshader; 1663 int ocnt = gs_shader->noutput; 1664 struct r600_bytecode_alu alu; 1665 struct r600_bytecode_vtx vtx; 1666 struct r600_bytecode_output output; 1667 struct r600_bytecode_cf *cf_jump, *cf_pop, 1668 *last_exp_pos = NULL, *last_exp_param = NULL; 1669 int i, j, next_clip_pos = 61, next_param = 0; 1670 int ring; 1671 1672 cshader = calloc(1, sizeof(struct r600_pipe_shader)); 1673 if (!cshader) 1674 return 0; 1675 1676 memcpy(cshader->shader.output, gs_shader->output, ocnt * 1677 sizeof(struct r600_shader_io)); 1678 1679 cshader->shader.noutput = ocnt; 1680 1681 ctx.shader = &cshader->shader; 1682 ctx.bc = &ctx.shader->bc; 1683 ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX; 1684 1685 r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, 1686 rctx->screen->has_compressed_msaa_texturing); 1687 1688 ctx.bc->isa = rctx->isa; 1689 1690 cf_jump = NULL; 1691 memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes)); 1692 1693 /* R0.x = R0.x & 0x3fffffff */ 1694 memset(&alu, 0, sizeof(alu)); 1695 alu.op = ALU_OP2_AND_INT; 1696 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1697 alu.src[1].value = 0x3fffffff; 1698 alu.dst.write = 1; 1699 r600_bytecode_add_alu(ctx.bc, &alu); 1700 1701 /* R0.y = R0.x >> 30 */ 1702 memset(&alu, 0, sizeof(alu)); 1703 alu.op = ALU_OP2_LSHR_INT; 1704 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1705 alu.src[1].value = 0x1e; 1706 alu.dst.chan = 1; 1707 alu.dst.write = 1; 1708 alu.last = 1; 1709 r600_bytecode_add_alu(ctx.bc, &alu); 1710 1711 /* fetch vertex data from GSVS ring */ 1712 for (i = 0; i < ocnt; ++i) { 1713 struct r600_shader_io *out = &ctx.shader->output[i]; 1714 1715 out->gpr = i + 1; 1716 out->ring_offset = i * 16; 1717 1718 memset(&vtx, 0, sizeof(vtx)); 1719 vtx.op = FETCH_OP_VFETCH; 1720 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1721 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1722 vtx.mega_fetch_count = 16; 1723 vtx.offset = out->ring_offset; 1724 vtx.dst_gpr = out->gpr; 1725 vtx.src_gpr = 0; 1726 vtx.dst_sel_x = 0; 1727 vtx.dst_sel_y = 1; 1728 vtx.dst_sel_z = 2; 1729 vtx.dst_sel_w = 3; 1730 if (rctx->b.chip_class >= EVERGREEN) { 1731 vtx.use_const_fields = 1; 1732 } else { 1733 vtx.data_format = FMT_32_32_32_32_FLOAT; 1734 } 1735 1736 r600_bytecode_add_vtx(ctx.bc, &vtx); 1737 } 1738 ctx.temp_reg = i + 1; 1739 for (ring = 3; ring >= 0; --ring) { 1740 bool enabled = false; 1741 for (i = 0; i < so->num_outputs; i++) { 1742 if (so->output[i].stream == ring) { 1743 enabled = true; 1744 break; 1745 } 1746 } 1747 if (ring != 0 && !enabled) { 1748 cshader->shader.ring_item_sizes[ring] = 0; 1749 continue; 1750 } 1751 1752 if (cf_jump) { 1753 // Patch up jump label 1754 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 1755 cf_pop = ctx.bc->cf_last; 1756 1757 cf_jump->cf_addr = cf_pop->id + 2; 1758 cf_jump->pop_count = 1; 1759 cf_pop->cf_addr = cf_pop->id + 2; 1760 cf_pop->pop_count = 1; 1761 } 1762 1763 /* PRED_SETE_INT __, R0.y, ring */ 1764 memset(&alu, 0, sizeof(alu)); 1765 alu.op = ALU_OP2_PRED_SETE_INT; 1766 alu.src[0].chan = 1; 1767 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1768 alu.src[1].value = ring; 1769 alu.execute_mask = 1; 1770 alu.update_pred = 1; 1771 alu.last = 1; 1772 r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); 1773 1774 r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); 1775 cf_jump = ctx.bc->cf_last; 1776 1777 if (enabled) 1778 emit_streamout(&ctx, so, ring, &cshader->shader.ring_item_sizes[ring]); 1779 cshader->shader.ring_item_sizes[ring] = ocnt * 16; 1780 } 1781 1782 /* bc adds nops - copy it */ 1783 if (ctx.bc->chip_class == R600) { 1784 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1785 alu.op = ALU_OP0_NOP; 1786 alu.last = 1; 1787 r600_bytecode_add_alu(ctx.bc, &alu); 1788 1789 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 1790 } 1791 1792 /* export vertex data */ 1793 /* XXX factor out common code with r600_shader_from_tgsi ? */ 1794 for (i = 0; i < ocnt; ++i) { 1795 struct r600_shader_io *out = &ctx.shader->output[i]; 1796 bool instream0 = true; 1797 if (out->name == TGSI_SEMANTIC_CLIPVERTEX) 1798 continue; 1799 1800 for (j = 0; j < so->num_outputs; j++) { 1801 if (so->output[j].register_index == i) { 1802 if (so->output[j].stream == 0) 1803 break; 1804 if (so->output[j].stream > 0) 1805 instream0 = false; 1806 } 1807 } 1808 if (!instream0) 1809 continue; 1810 memset(&output, 0, sizeof(output)); 1811 output.gpr = out->gpr; 1812 output.elem_size = 3; 1813 output.swizzle_x = 0; 1814 output.swizzle_y = 1; 1815 output.swizzle_z = 2; 1816 output.swizzle_w = 3; 1817 output.burst_count = 1; 1818 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1819 output.op = CF_OP_EXPORT; 1820 switch (out->name) { 1821 case TGSI_SEMANTIC_POSITION: 1822 output.array_base = 60; 1823 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1824 break; 1825 1826 case TGSI_SEMANTIC_PSIZE: 1827 output.array_base = 61; 1828 if (next_clip_pos == 61) 1829 next_clip_pos = 62; 1830 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1831 output.swizzle_y = 7; 1832 output.swizzle_z = 7; 1833 output.swizzle_w = 7; 1834 ctx.shader->vs_out_misc_write = 1; 1835 ctx.shader->vs_out_point_size = 1; 1836 break; 1837 case TGSI_SEMANTIC_LAYER: 1838 if (out->spi_sid) { 1839 /* duplicate it as PARAM to pass to the pixel shader */ 1840 output.array_base = next_param++; 1841 r600_bytecode_add_output(ctx.bc, &output); 1842 last_exp_param = ctx.bc->cf_last; 1843 } 1844 output.array_base = 61; 1845 if (next_clip_pos == 61) 1846 next_clip_pos = 62; 1847 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1848 output.swizzle_x = 7; 1849 output.swizzle_y = 7; 1850 output.swizzle_z = 0; 1851 output.swizzle_w = 7; 1852 ctx.shader->vs_out_misc_write = 1; 1853 ctx.shader->vs_out_layer = 1; 1854 break; 1855 case TGSI_SEMANTIC_VIEWPORT_INDEX: 1856 if (out->spi_sid) { 1857 /* duplicate it as PARAM to pass to the pixel shader */ 1858 output.array_base = next_param++; 1859 r600_bytecode_add_output(ctx.bc, &output); 1860 last_exp_param = ctx.bc->cf_last; 1861 } 1862 output.array_base = 61; 1863 if (next_clip_pos == 61) 1864 next_clip_pos = 62; 1865 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1866 ctx.shader->vs_out_misc_write = 1; 1867 ctx.shader->vs_out_viewport = 1; 1868 output.swizzle_x = 7; 1869 output.swizzle_y = 7; 1870 output.swizzle_z = 7; 1871 output.swizzle_w = 0; 1872 break; 1873 case TGSI_SEMANTIC_CLIPDIST: 1874 /* spi_sid is 0 for clipdistance outputs that were generated 1875 * for clipvertex - we don't need to pass them to PS */ 1876 ctx.shader->clip_dist_write = gs->shader.clip_dist_write; 1877 if (out->spi_sid) { 1878 /* duplicate it as PARAM to pass to the pixel shader */ 1879 output.array_base = next_param++; 1880 r600_bytecode_add_output(ctx.bc, &output); 1881 last_exp_param = ctx.bc->cf_last; 1882 } 1883 output.array_base = next_clip_pos++; 1884 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1885 break; 1886 case TGSI_SEMANTIC_FOG: 1887 output.swizzle_y = 4; /* 0 */ 1888 output.swizzle_z = 4; /* 0 */ 1889 output.swizzle_w = 5; /* 1 */ 1890 break; 1891 default: 1892 output.array_base = next_param++; 1893 break; 1894 } 1895 r600_bytecode_add_output(ctx.bc, &output); 1896 if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) 1897 last_exp_param = ctx.bc->cf_last; 1898 else 1899 last_exp_pos = ctx.bc->cf_last; 1900 } 1901 1902 if (!last_exp_pos) { 1903 memset(&output, 0, sizeof(output)); 1904 output.gpr = 0; 1905 output.elem_size = 3; 1906 output.swizzle_x = 7; 1907 output.swizzle_y = 7; 1908 output.swizzle_z = 7; 1909 output.swizzle_w = 7; 1910 output.burst_count = 1; 1911 output.type = 2; 1912 output.op = CF_OP_EXPORT; 1913 output.array_base = 60; 1914 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1915 r600_bytecode_add_output(ctx.bc, &output); 1916 last_exp_pos = ctx.bc->cf_last; 1917 } 1918 1919 if (!last_exp_param) { 1920 memset(&output, 0, sizeof(output)); 1921 output.gpr = 0; 1922 output.elem_size = 3; 1923 output.swizzle_x = 7; 1924 output.swizzle_y = 7; 1925 output.swizzle_z = 7; 1926 output.swizzle_w = 7; 1927 output.burst_count = 1; 1928 output.type = 2; 1929 output.op = CF_OP_EXPORT; 1930 output.array_base = next_param++; 1931 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1932 r600_bytecode_add_output(ctx.bc, &output); 1933 last_exp_param = ctx.bc->cf_last; 1934 } 1935 1936 last_exp_pos->op = CF_OP_EXPORT_DONE; 1937 last_exp_param->op = CF_OP_EXPORT_DONE; 1938 1939 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 1940 cf_pop = ctx.bc->cf_last; 1941 1942 cf_jump->cf_addr = cf_pop->id + 2; 1943 cf_jump->pop_count = 1; 1944 cf_pop->cf_addr = cf_pop->id + 2; 1945 cf_pop->pop_count = 1; 1946 1947 if (ctx.bc->chip_class == CAYMAN) 1948 cm_bytecode_add_cf_end(ctx.bc); 1949 else { 1950 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 1951 ctx.bc->cf_last->end_of_program = 1; 1952 } 1953 1954 gs->gs_copy_shader = cshader; 1955 cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 1956 1957 ctx.bc->nstack = 1; 1958 1959 return r600_bytecode_build(ctx.bc); 1960} 1961 1962static int emit_inc_ring_offset(struct r600_shader_ctx *ctx, int idx, bool ind) 1963{ 1964 if (ind) { 1965 struct r600_bytecode_alu alu; 1966 int r; 1967 1968 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1969 alu.op = ALU_OP2_ADD_INT; 1970 alu.src[0].sel = ctx->gs_export_gpr_tregs[idx]; 1971 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1972 alu.src[1].value = ctx->gs_out_ring_offset >> 4; 1973 alu.dst.sel = ctx->gs_export_gpr_tregs[idx]; 1974 alu.dst.write = 1; 1975 alu.last = 1; 1976 r = r600_bytecode_add_alu(ctx->bc, &alu); 1977 if (r) 1978 return r; 1979 } 1980 return 0; 1981} 1982 1983static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind) 1984{ 1985 struct r600_bytecode_output output; 1986 int i, k, ring_offset; 1987 int effective_stream = stream == -1 ? 0 : stream; 1988 int idx = 0; 1989 1990 for (i = 0; i < ctx->shader->noutput; i++) { 1991 if (ctx->gs_for_vs) { 1992 /* for ES we need to lookup corresponding ring offset expected by GS 1993 * (map this output to GS input by name and sid) */ 1994 /* FIXME precompute offsets */ 1995 ring_offset = -1; 1996 for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { 1997 struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; 1998 struct r600_shader_io *out = &ctx->shader->output[i]; 1999 if (in->name == out->name && in->sid == out->sid) 2000 ring_offset = in->ring_offset; 2001 } 2002 2003 if (ring_offset == -1) 2004 continue; 2005 } else { 2006 ring_offset = idx * 16; 2007 idx++; 2008 } 2009 2010 if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) 2011 continue; 2012 /* next_ring_offset after parsing input decls contains total size of 2013 * single vertex data, gs_next_vertex - current vertex index */ 2014 if (!ind) 2015 ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex; 2016 2017 memset(&output, 0, sizeof(struct r600_bytecode_output)); 2018 output.gpr = ctx->shader->output[i].gpr; 2019 output.elem_size = 3; 2020 output.comp_mask = 0xF; 2021 output.burst_count = 1; 2022 2023 if (ind) 2024 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 2025 else 2026 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2027 2028 switch (stream) { 2029 default: 2030 case 0: 2031 output.op = CF_OP_MEM_RING; break; 2032 case 1: 2033 output.op = CF_OP_MEM_RING1; break; 2034 case 2: 2035 output.op = CF_OP_MEM_RING2; break; 2036 case 3: 2037 output.op = CF_OP_MEM_RING3; break; 2038 } 2039 2040 if (ind) { 2041 output.array_base = ring_offset >> 2; /* in dwords */ 2042 output.array_size = 0xfff; 2043 output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream]; 2044 } else 2045 output.array_base = ring_offset >> 2; /* in dwords */ 2046 r600_bytecode_add_output(ctx->bc, &output); 2047 } 2048 2049 ++ctx->gs_next_vertex; 2050 return 0; 2051} 2052 2053 2054static int r600_fetch_tess_io_info(struct r600_shader_ctx *ctx) 2055{ 2056 int r; 2057 struct r600_bytecode_vtx vtx; 2058 int temp_val = ctx->temp_reg; 2059 /* need to store the TCS output somewhere */ 2060 r = single_alu_op2(ctx, ALU_OP1_MOV, 2061 temp_val, 0, 2062 V_SQ_ALU_SRC_LITERAL, 0, 2063 0, 0); 2064 if (r) 2065 return r; 2066 2067 /* used by VS/TCS */ 2068 if (ctx->tess_input_info) { 2069 /* fetch tcs input values into resv space */ 2070 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2071 vtx.op = FETCH_OP_VFETCH; 2072 vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2073 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2074 vtx.mega_fetch_count = 16; 2075 vtx.data_format = FMT_32_32_32_32; 2076 vtx.num_format_all = 2; 2077 vtx.format_comp_all = 1; 2078 vtx.use_const_fields = 0; 2079 vtx.endian = r600_endian_swap(32); 2080 vtx.srf_mode_all = 1; 2081 vtx.offset = 0; 2082 vtx.dst_gpr = ctx->tess_input_info; 2083 vtx.dst_sel_x = 0; 2084 vtx.dst_sel_y = 1; 2085 vtx.dst_sel_z = 2; 2086 vtx.dst_sel_w = 3; 2087 vtx.src_gpr = temp_val; 2088 vtx.src_sel_x = 0; 2089 2090 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2091 if (r) 2092 return r; 2093 } 2094 2095 /* used by TCS/TES */ 2096 if (ctx->tess_output_info) { 2097 /* fetch tcs output values into resv space */ 2098 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2099 vtx.op = FETCH_OP_VFETCH; 2100 vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2101 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2102 vtx.mega_fetch_count = 16; 2103 vtx.data_format = FMT_32_32_32_32; 2104 vtx.num_format_all = 2; 2105 vtx.format_comp_all = 1; 2106 vtx.use_const_fields = 0; 2107 vtx.endian = r600_endian_swap(32); 2108 vtx.srf_mode_all = 1; 2109 vtx.offset = 16; 2110 vtx.dst_gpr = ctx->tess_output_info; 2111 vtx.dst_sel_x = 0; 2112 vtx.dst_sel_y = 1; 2113 vtx.dst_sel_z = 2; 2114 vtx.dst_sel_w = 3; 2115 vtx.src_gpr = temp_val; 2116 vtx.src_sel_x = 0; 2117 2118 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2119 if (r) 2120 return r; 2121 } 2122 return 0; 2123} 2124 2125static int r600_shader_from_tgsi(struct r600_context *rctx, 2126 struct r600_pipe_shader *pipeshader, 2127 union r600_shader_key key) 2128{ 2129 struct r600_screen *rscreen = rctx->screen; 2130 struct r600_shader *shader = &pipeshader->shader; 2131 struct tgsi_token *tokens = pipeshader->selector->tokens; 2132 struct pipe_stream_output_info so = pipeshader->selector->so; 2133 struct tgsi_full_immediate *immediate; 2134 struct r600_shader_ctx ctx; 2135 struct r600_bytecode_output output[32]; 2136 unsigned output_done, noutput; 2137 unsigned opcode; 2138 int i, j, k, r = 0; 2139 int next_param_base = 0, next_clip_base; 2140 int max_color_exports = MAX2(key.ps.nr_cbufs, 1); 2141 /* Declarations used by llvm code */ 2142 bool use_llvm = false; 2143 bool indirect_gprs; 2144 bool ring_outputs = false; 2145 bool pos_emitted = false; 2146 2147#ifdef R600_USE_LLVM 2148 use_llvm = rscreen->b.debug_flags & DBG_LLVM; 2149#endif 2150 ctx.bc = &shader->bc; 2151 ctx.shader = shader; 2152 ctx.native_integers = true; 2153 2154 2155 r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, 2156 rscreen->has_compressed_msaa_texturing); 2157 ctx.tokens = tokens; 2158 tgsi_scan_shader(tokens, &ctx.info); 2159 shader->indirect_files = ctx.info.indirect_files; 2160 2161 shader->uses_doubles = ctx.info.uses_doubles; 2162 2163 indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER)); 2164 tgsi_parse_init(&ctx.parse, tokens); 2165 ctx.type = ctx.info.processor; 2166 shader->processor_type = ctx.type; 2167 ctx.bc->type = shader->processor_type; 2168 2169 switch (ctx.type) { 2170 case TGSI_PROCESSOR_VERTEX: 2171 shader->vs_as_gs_a = key.vs.as_gs_a; 2172 shader->vs_as_es = key.vs.as_es; 2173 shader->vs_as_ls = key.vs.as_ls; 2174 if (shader->vs_as_es) 2175 ring_outputs = true; 2176 break; 2177 case TGSI_PROCESSOR_GEOMETRY: 2178 ring_outputs = true; 2179 break; 2180 case TGSI_PROCESSOR_TESS_CTRL: 2181 shader->tcs_prim_mode = key.tcs.prim_mode; 2182 break; 2183 case TGSI_PROCESSOR_TESS_EVAL: 2184 shader->tes_as_es = key.tes.as_es; 2185 if (shader->tes_as_es) 2186 ring_outputs = true; 2187 break; 2188 case TGSI_PROCESSOR_FRAGMENT: 2189 shader->two_side = key.ps.color_two_side; 2190 break; 2191 default: 2192 break; 2193 } 2194 2195 if (shader->vs_as_es || shader->tes_as_es) { 2196 ctx.gs_for_vs = &rctx->gs_shader->current->shader; 2197 } else { 2198 ctx.gs_for_vs = NULL; 2199 } 2200 2201 ctx.next_ring_offset = 0; 2202 ctx.gs_out_ring_offset = 0; 2203 ctx.gs_next_vertex = 0; 2204 ctx.gs_stream_output_info = &so; 2205 2206 ctx.face_gpr = -1; 2207 ctx.fixed_pt_position_gpr = -1; 2208 ctx.fragcoord_input = -1; 2209 ctx.colors_used = 0; 2210 ctx.clip_vertex_write = 0; 2211 2212 shader->nr_ps_color_exports = 0; 2213 shader->nr_ps_max_color_exports = 0; 2214 2215 2216 /* register allocations */ 2217 /* Values [0,127] correspond to GPR[0..127]. 2218 * Values [128,159] correspond to constant buffer bank 0 2219 * Values [160,191] correspond to constant buffer bank 1 2220 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 2221 * Values [256,287] correspond to constant buffer bank 2 (EG) 2222 * Values [288,319] correspond to constant buffer bank 3 (EG) 2223 * Other special values are shown in the list below. 2224 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 2225 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 2226 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 2227 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 2228 * 248 SQ_ALU_SRC_0: special constant 0.0. 2229 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 2230 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 2231 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 2232 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 2233 * 253 SQ_ALU_SRC_LITERAL: literal constant. 2234 * 254 SQ_ALU_SRC_PV: previous vector result. 2235 * 255 SQ_ALU_SRC_PS: previous scalar result. 2236 */ 2237 for (i = 0; i < TGSI_FILE_COUNT; i++) { 2238 ctx.file_offset[i] = 0; 2239 } 2240 2241#ifdef R600_USE_LLVM 2242 if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) { 2243 fprintf(stderr, "Warning: R600 LLVM backend does not support " 2244 "indirect adressing. Falling back to TGSI " 2245 "backend.\n"); 2246 use_llvm = 0; 2247 } 2248#endif 2249 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 2250 ctx.file_offset[TGSI_FILE_INPUT] = 1; 2251 if (!use_llvm) { 2252 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); 2253 } 2254 } 2255 if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { 2256 if (ctx.bc->chip_class >= EVERGREEN) 2257 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 2258 else 2259 ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); 2260 } 2261 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2262 /* FIXME 1 would be enough in some cases (3 or less input vertices) */ 2263 ctx.file_offset[TGSI_FILE_INPUT] = 2; 2264 } 2265 ctx.use_llvm = use_llvm; 2266 2267 if (use_llvm) { 2268 ctx.file_offset[TGSI_FILE_OUTPUT] = 2269 ctx.file_offset[TGSI_FILE_INPUT]; 2270 } else { 2271 ctx.file_offset[TGSI_FILE_OUTPUT] = 2272 ctx.file_offset[TGSI_FILE_INPUT] + 2273 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 2274 } 2275 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 2276 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 2277 2278 /* Outside the GPR range. This will be translated to one of the 2279 * kcache banks later. */ 2280 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 2281 2282 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 2283 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 2284 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 2285 ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1; 2286 ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2; 2287 2288 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) { 2289 ctx.tess_input_info = ctx.bc->ar_reg + 3; 2290 ctx.tess_output_info = ctx.bc->ar_reg + 4; 2291 ctx.temp_reg = ctx.bc->ar_reg + 5; 2292 } else if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) { 2293 ctx.tess_input_info = 0; 2294 ctx.tess_output_info = ctx.bc->ar_reg + 3; 2295 ctx.temp_reg = ctx.bc->ar_reg + 4; 2296 } else if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2297 ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3; 2298 ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4; 2299 ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5; 2300 ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6; 2301 ctx.temp_reg = ctx.bc->ar_reg + 7; 2302 } else { 2303 ctx.temp_reg = ctx.bc->ar_reg + 3; 2304 } 2305 2306 shader->max_arrays = 0; 2307 shader->num_arrays = 0; 2308 if (indirect_gprs) { 2309 2310 if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { 2311 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], 2312 ctx.file_offset[TGSI_FILE_OUTPUT] - 2313 ctx.file_offset[TGSI_FILE_INPUT], 2314 0x0F); 2315 } 2316 if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2317 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT], 2318 ctx.file_offset[TGSI_FILE_TEMPORARY] - 2319 ctx.file_offset[TGSI_FILE_OUTPUT], 2320 0x0F); 2321 } 2322 } 2323 2324 ctx.nliterals = 0; 2325 ctx.literals = NULL; 2326 2327 shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]; 2328 shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 2329 shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]; 2330 2331 if (shader->vs_as_gs_a) 2332 vs_add_primid_output(&ctx, key.vs.prim_id_out); 2333 2334 if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) 2335 r600_fetch_tess_io_info(&ctx); 2336 2337 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 2338 tgsi_parse_token(&ctx.parse); 2339 switch (ctx.parse.FullToken.Token.Type) { 2340 case TGSI_TOKEN_TYPE_IMMEDIATE: 2341 immediate = &ctx.parse.FullToken.FullImmediate; 2342 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 2343 if(ctx.literals == NULL) { 2344 r = -ENOMEM; 2345 goto out_err; 2346 } 2347 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 2348 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 2349 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 2350 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 2351 ctx.nliterals++; 2352 break; 2353 case TGSI_TOKEN_TYPE_DECLARATION: 2354 r = tgsi_declaration(&ctx); 2355 if (r) 2356 goto out_err; 2357 break; 2358 case TGSI_TOKEN_TYPE_INSTRUCTION: 2359 case TGSI_TOKEN_TYPE_PROPERTY: 2360 break; 2361 default: 2362 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 2363 r = -EINVAL; 2364 goto out_err; 2365 } 2366 } 2367 2368 shader->ring_item_sizes[0] = ctx.next_ring_offset; 2369 shader->ring_item_sizes[1] = 0; 2370 shader->ring_item_sizes[2] = 0; 2371 shader->ring_item_sizes[3] = 0; 2372 2373 /* Process two side if needed */ 2374 if (shader->two_side && ctx.colors_used) { 2375 int i, count = ctx.shader->ninput; 2376 unsigned next_lds_loc = ctx.shader->nlds; 2377 2378 /* additional inputs will be allocated right after the existing inputs, 2379 * we won't need them after the color selection, so we don't need to 2380 * reserve these gprs for the rest of the shader code and to adjust 2381 * output offsets etc. */ 2382 int gpr = ctx.file_offset[TGSI_FILE_INPUT] + 2383 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 2384 2385 /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */ 2386 if (ctx.face_gpr == -1) { 2387 i = ctx.shader->ninput++; 2388 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; 2389 ctx.shader->input[i].spi_sid = 0; 2390 ctx.shader->input[i].gpr = gpr++; 2391 ctx.face_gpr = ctx.shader->input[i].gpr; 2392 } 2393 2394 for (i = 0; i < count; i++) { 2395 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { 2396 int ni = ctx.shader->ninput++; 2397 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); 2398 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 2399 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); 2400 ctx.shader->input[ni].gpr = gpr++; 2401 // TGSI to LLVM needs to know the lds position of inputs. 2402 // Non LLVM path computes it later (in process_twoside_color) 2403 ctx.shader->input[ni].lds_pos = next_lds_loc++; 2404 ctx.shader->input[i].back_color_input = ni; 2405 if (ctx.bc->chip_class >= EVERGREEN) { 2406 if ((r = evergreen_interp_input(&ctx, ni))) 2407 return r; 2408 } 2409 } 2410 } 2411 } 2412 2413/* LLVM backend setup */ 2414#ifdef R600_USE_LLVM 2415 if (use_llvm) { 2416 struct radeon_llvm_context radeon_llvm_ctx; 2417 LLVMModuleRef mod; 2418 bool dump = r600_can_dump_shader(&rscreen->b, tokens); 2419 boolean use_kill = false; 2420 2421 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); 2422 radeon_llvm_ctx.type = ctx.type; 2423 radeon_llvm_ctx.two_side = shader->two_side; 2424 radeon_llvm_ctx.face_gpr = ctx.face_gpr; 2425 radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1; 2426 radeon_llvm_ctx.r600_inputs = ctx.shader->input; 2427 radeon_llvm_ctx.r600_outputs = ctx.shader->output; 2428 radeon_llvm_ctx.color_buffer_count = max_color_exports; 2429 radeon_llvm_ctx.chip_class = ctx.bc->chip_class; 2430 radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN); 2431 radeon_llvm_ctx.stream_outputs = &so; 2432 radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one; 2433 radeon_llvm_ctx.has_compressed_msaa_texturing = 2434 ctx.bc->has_compressed_msaa_texturing; 2435 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); 2436 ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp; 2437 ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers; 2438 2439 if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, dump)) { 2440 radeon_llvm_dispose(&radeon_llvm_ctx); 2441 use_llvm = 0; 2442 fprintf(stderr, "R600 LLVM backend failed to compile " 2443 "shader. Falling back to TGSI\n"); 2444 } else { 2445 ctx.file_offset[TGSI_FILE_OUTPUT] = 2446 ctx.file_offset[TGSI_FILE_INPUT]; 2447 } 2448 if (use_kill) 2449 ctx.shader->uses_kill = use_kill; 2450 radeon_llvm_dispose(&radeon_llvm_ctx); 2451 } 2452#endif 2453/* End of LLVM backend setup */ 2454 2455 if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) 2456 shader->nr_ps_max_color_exports = 8; 2457 2458 if (!use_llvm) { 2459 if (ctx.fragcoord_input >= 0) { 2460 if (ctx.bc->chip_class == CAYMAN) { 2461 for (j = 0 ; j < 4; j++) { 2462 struct r600_bytecode_alu alu; 2463 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2464 alu.op = ALU_OP1_RECIP_IEEE; 2465 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 2466 alu.src[0].chan = 3; 2467 2468 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 2469 alu.dst.chan = j; 2470 alu.dst.write = (j == 3); 2471 alu.last = 1; 2472 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 2473 return r; 2474 } 2475 } else { 2476 struct r600_bytecode_alu alu; 2477 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2478 alu.op = ALU_OP1_RECIP_IEEE; 2479 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 2480 alu.src[0].chan = 3; 2481 2482 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 2483 alu.dst.chan = 3; 2484 alu.dst.write = 1; 2485 alu.last = 1; 2486 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 2487 return r; 2488 } 2489 } 2490 2491 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2492 struct r600_bytecode_alu alu; 2493 int r; 2494 2495 /* GS thread with no output workaround - emit a cut at start of GS */ 2496 if (ctx.bc->chip_class == R600) 2497 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); 2498 2499 for (j = 0; j < 4; j++) { 2500 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2501 alu.op = ALU_OP1_MOV; 2502 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 2503 alu.src[0].value = 0; 2504 alu.dst.sel = ctx.gs_export_gpr_tregs[j]; 2505 alu.dst.write = 1; 2506 alu.last = 1; 2507 r = r600_bytecode_add_alu(ctx.bc, &alu); 2508 if (r) 2509 return r; 2510 } 2511 } 2512 2513 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) 2514 r600_fetch_tess_io_info(&ctx); 2515 2516 if (shader->two_side && ctx.colors_used) { 2517 if ((r = process_twoside_color_inputs(&ctx))) 2518 return r; 2519 } 2520 2521 tgsi_parse_init(&ctx.parse, tokens); 2522 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 2523 tgsi_parse_token(&ctx.parse); 2524 switch (ctx.parse.FullToken.Token.Type) { 2525 case TGSI_TOKEN_TYPE_INSTRUCTION: 2526 r = tgsi_is_supported(&ctx); 2527 if (r) 2528 goto out_err; 2529 ctx.max_driver_temp_used = 0; 2530 /* reserve first tmp for everyone */ 2531 r600_get_temp(&ctx); 2532 2533 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 2534 if ((r = tgsi_split_constant(&ctx))) 2535 goto out_err; 2536 if ((r = tgsi_split_literal_constant(&ctx))) 2537 goto out_err; 2538 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) 2539 if ((r = tgsi_split_gs_inputs(&ctx))) 2540 goto out_err; 2541 if (ctx.bc->chip_class == CAYMAN) 2542 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 2543 else if (ctx.bc->chip_class >= EVERGREEN) 2544 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 2545 else 2546 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 2547 r = ctx.inst_info->process(&ctx); 2548 if (r) 2549 goto out_err; 2550 break; 2551 default: 2552 break; 2553 } 2554 } 2555 } 2556 2557 /* Reset the temporary register counter. */ 2558 ctx.max_driver_temp_used = 0; 2559 2560 noutput = shader->noutput; 2561 2562 if (!ring_outputs && ctx.clip_vertex_write) { 2563 unsigned clipdist_temp[2]; 2564 2565 clipdist_temp[0] = r600_get_temp(&ctx); 2566 clipdist_temp[1] = r600_get_temp(&ctx); 2567 2568 /* need to convert a clipvertex write into clipdistance writes and not export 2569 the clip vertex anymore */ 2570 2571 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 2572 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 2573 shader->output[noutput].gpr = clipdist_temp[0]; 2574 noutput++; 2575 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 2576 shader->output[noutput].gpr = clipdist_temp[1]; 2577 noutput++; 2578 2579 /* reset spi_sid for clipvertex output to avoid confusing spi */ 2580 shader->output[ctx.cv_output].spi_sid = 0; 2581 2582 shader->clip_dist_write = 0xFF; 2583 2584 for (i = 0; i < 8; i++) { 2585 int oreg = i >> 2; 2586 int ochan = i & 3; 2587 2588 for (j = 0; j < 4; j++) { 2589 struct r600_bytecode_alu alu; 2590 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2591 alu.op = ALU_OP2_DOT4; 2592 alu.src[0].sel = shader->output[ctx.cv_output].gpr; 2593 alu.src[0].chan = j; 2594 2595 alu.src[1].sel = 512 + i; 2596 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 2597 alu.src[1].chan = j; 2598 2599 alu.dst.sel = clipdist_temp[oreg]; 2600 alu.dst.chan = j; 2601 alu.dst.write = (j == ochan); 2602 if (j == 3) 2603 alu.last = 1; 2604 if (!use_llvm) 2605 r = r600_bytecode_add_alu(ctx.bc, &alu); 2606 if (r) 2607 return r; 2608 } 2609 } 2610 } 2611 2612 /* Add stream outputs. */ 2613 if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX && 2614 so.num_outputs && !use_llvm) 2615 emit_streamout(&ctx, &so, -1, NULL); 2616 2617 pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 2618 convert_edgeflag_to_int(&ctx); 2619 2620 if (ring_outputs) { 2621 if (shader->vs_as_es || shader->tes_as_es) { 2622 ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx); 2623 ctx.gs_export_gpr_tregs[1] = -1; 2624 ctx.gs_export_gpr_tregs[2] = -1; 2625 ctx.gs_export_gpr_tregs[3] = -1; 2626 2627 emit_gs_ring_writes(&ctx, &so, -1, FALSE); 2628 } 2629 } else { 2630 /* Export output */ 2631 next_clip_base = shader->vs_out_misc_write ? 62 : 61; 2632 2633 for (i = 0, j = 0; i < noutput; i++, j++) { 2634 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2635 output[j].gpr = shader->output[i].gpr; 2636 output[j].elem_size = 3; 2637 output[j].swizzle_x = 0; 2638 output[j].swizzle_y = 1; 2639 output[j].swizzle_z = 2; 2640 output[j].swizzle_w = 3; 2641 output[j].burst_count = 1; 2642 output[j].type = -1; 2643 output[j].op = CF_OP_EXPORT; 2644 switch (ctx.type) { 2645 case TGSI_PROCESSOR_VERTEX: 2646 switch (shader->output[i].name) { 2647 case TGSI_SEMANTIC_POSITION: 2648 output[j].array_base = 60; 2649 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2650 pos_emitted = true; 2651 break; 2652 2653 case TGSI_SEMANTIC_PSIZE: 2654 output[j].array_base = 61; 2655 output[j].swizzle_y = 7; 2656 output[j].swizzle_z = 7; 2657 output[j].swizzle_w = 7; 2658 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2659 pos_emitted = true; 2660 break; 2661 case TGSI_SEMANTIC_EDGEFLAG: 2662 output[j].array_base = 61; 2663 output[j].swizzle_x = 7; 2664 output[j].swizzle_y = 0; 2665 output[j].swizzle_z = 7; 2666 output[j].swizzle_w = 7; 2667 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2668 pos_emitted = true; 2669 break; 2670 case TGSI_SEMANTIC_LAYER: 2671 /* spi_sid is 0 for outputs that are 2672 * not consumed by PS */ 2673 if (shader->output[i].spi_sid) { 2674 output[j].array_base = next_param_base++; 2675 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2676 j++; 2677 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 2678 } 2679 output[j].array_base = 61; 2680 output[j].swizzle_x = 7; 2681 output[j].swizzle_y = 7; 2682 output[j].swizzle_z = 0; 2683 output[j].swizzle_w = 7; 2684 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2685 pos_emitted = true; 2686 break; 2687 case TGSI_SEMANTIC_VIEWPORT_INDEX: 2688 /* spi_sid is 0 for outputs that are 2689 * not consumed by PS */ 2690 if (shader->output[i].spi_sid) { 2691 output[j].array_base = next_param_base++; 2692 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2693 j++; 2694 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 2695 } 2696 output[j].array_base = 61; 2697 output[j].swizzle_x = 7; 2698 output[j].swizzle_y = 7; 2699 output[j].swizzle_z = 7; 2700 output[j].swizzle_w = 0; 2701 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2702 pos_emitted = true; 2703 break; 2704 case TGSI_SEMANTIC_CLIPVERTEX: 2705 j--; 2706 break; 2707 case TGSI_SEMANTIC_CLIPDIST: 2708 output[j].array_base = next_clip_base++; 2709 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2710 pos_emitted = true; 2711 /* spi_sid is 0 for clipdistance outputs that were generated 2712 * for clipvertex - we don't need to pass them to PS */ 2713 if (shader->output[i].spi_sid) { 2714 j++; 2715 /* duplicate it as PARAM to pass to the pixel shader */ 2716 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 2717 output[j].array_base = next_param_base++; 2718 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2719 } 2720 break; 2721 case TGSI_SEMANTIC_FOG: 2722 output[j].swizzle_y = 4; /* 0 */ 2723 output[j].swizzle_z = 4; /* 0 */ 2724 output[j].swizzle_w = 5; /* 1 */ 2725 break; 2726 case TGSI_SEMANTIC_PRIMID: 2727 output[j].swizzle_x = 2; 2728 output[j].swizzle_y = 4; /* 0 */ 2729 output[j].swizzle_z = 4; /* 0 */ 2730 output[j].swizzle_w = 4; /* 0 */ 2731 break; 2732 } 2733 2734 break; 2735 case TGSI_PROCESSOR_FRAGMENT: 2736 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 2737 /* never export more colors than the number of CBs */ 2738 if (shader->output[i].sid >= max_color_exports) { 2739 /* skip export */ 2740 j--; 2741 continue; 2742 } 2743 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 2744 output[j].array_base = shader->output[i].sid; 2745 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2746 shader->nr_ps_color_exports++; 2747 if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { 2748 for (k = 1; k < max_color_exports; k++) { 2749 j++; 2750 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2751 output[j].gpr = shader->output[i].gpr; 2752 output[j].elem_size = 3; 2753 output[j].swizzle_x = 0; 2754 output[j].swizzle_y = 1; 2755 output[j].swizzle_z = 2; 2756 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 2757 output[j].burst_count = 1; 2758 output[j].array_base = k; 2759 output[j].op = CF_OP_EXPORT; 2760 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2761 shader->nr_ps_color_exports++; 2762 } 2763 } 2764 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 2765 output[j].array_base = 61; 2766 output[j].swizzle_x = 2; 2767 output[j].swizzle_y = 7; 2768 output[j].swizzle_z = output[j].swizzle_w = 7; 2769 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2770 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 2771 output[j].array_base = 61; 2772 output[j].swizzle_x = 7; 2773 output[j].swizzle_y = 1; 2774 output[j].swizzle_z = output[j].swizzle_w = 7; 2775 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2776 } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 2777 output[j].array_base = 61; 2778 output[j].swizzle_x = 7; 2779 output[j].swizzle_y = 7; 2780 output[j].swizzle_z = 0; 2781 output[j].swizzle_w = 7; 2782 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2783 } else { 2784 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 2785 r = -EINVAL; 2786 goto out_err; 2787 } 2788 break; 2789 default: 2790 R600_ERR("unsupported processor type %d\n", ctx.type); 2791 r = -EINVAL; 2792 goto out_err; 2793 } 2794 2795 if (output[j].type==-1) { 2796 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2797 output[j].array_base = next_param_base++; 2798 } 2799 } 2800 2801 /* add fake position export */ 2802 if (ctx.type == TGSI_PROCESSOR_VERTEX && pos_emitted == false) { 2803 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2804 output[j].gpr = 0; 2805 output[j].elem_size = 3; 2806 output[j].swizzle_x = 7; 2807 output[j].swizzle_y = 7; 2808 output[j].swizzle_z = 7; 2809 output[j].swizzle_w = 7; 2810 output[j].burst_count = 1; 2811 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2812 output[j].array_base = 60; 2813 output[j].op = CF_OP_EXPORT; 2814 j++; 2815 } 2816 2817 /* add fake param output for vertex shader if no param is exported */ 2818 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { 2819 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2820 output[j].gpr = 0; 2821 output[j].elem_size = 3; 2822 output[j].swizzle_x = 7; 2823 output[j].swizzle_y = 7; 2824 output[j].swizzle_z = 7; 2825 output[j].swizzle_w = 7; 2826 output[j].burst_count = 1; 2827 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2828 output[j].array_base = 0; 2829 output[j].op = CF_OP_EXPORT; 2830 j++; 2831 } 2832 2833 /* add fake pixel export */ 2834 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { 2835 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2836 output[j].gpr = 0; 2837 output[j].elem_size = 3; 2838 output[j].swizzle_x = 7; 2839 output[j].swizzle_y = 7; 2840 output[j].swizzle_z = 7; 2841 output[j].swizzle_w = 7; 2842 output[j].burst_count = 1; 2843 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2844 output[j].array_base = 0; 2845 output[j].op = CF_OP_EXPORT; 2846 j++; 2847 shader->nr_ps_color_exports++; 2848 } 2849 2850 noutput = j; 2851 2852 /* set export done on last export of each type */ 2853 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 2854 if (!(output_done & (1 << output[i].type))) { 2855 output_done |= (1 << output[i].type); 2856 output[i].op = CF_OP_EXPORT_DONE; 2857 } 2858 } 2859 /* add output to bytecode */ 2860 if (!use_llvm) { 2861 for (i = 0; i < noutput; i++) { 2862 r = r600_bytecode_add_output(ctx.bc, &output[i]); 2863 if (r) 2864 goto out_err; 2865 } 2866 } 2867 } 2868 2869 /* add program end */ 2870 if (!use_llvm) { 2871 if (ctx.bc->chip_class == CAYMAN) 2872 cm_bytecode_add_cf_end(ctx.bc); 2873 else { 2874 const struct cf_op_info *last = NULL; 2875 2876 if (ctx.bc->cf_last) 2877 last = r600_isa_cf(ctx.bc->cf_last->op); 2878 2879 /* alu clause instructions don't have EOP bit, so add NOP */ 2880 if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS) 2881 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2882 2883 ctx.bc->cf_last->end_of_program = 1; 2884 } 2885 } 2886 2887 /* check GPR limit - we have 124 = 128 - 4 2888 * (4 are reserved as alu clause temporary registers) */ 2889 if (ctx.bc->ngpr > 124) { 2890 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 2891 r = -ENOMEM; 2892 goto out_err; 2893 } 2894 2895 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2896 if ((r = generate_gs_copy_shader(rctx, pipeshader, &so))) 2897 return r; 2898 } 2899 2900 free(ctx.literals); 2901 tgsi_parse_free(&ctx.parse); 2902 return 0; 2903out_err: 2904 free(ctx.literals); 2905 tgsi_parse_free(&ctx.parse); 2906 return r; 2907} 2908 2909static int tgsi_unsupported(struct r600_shader_ctx *ctx) 2910{ 2911 const unsigned tgsi_opcode = 2912 ctx->parse.FullToken.FullInstruction.Instruction.Opcode; 2913 R600_ERR("%s tgsi opcode unsupported\n", 2914 tgsi_get_opcode_name(tgsi_opcode)); 2915 return -EINVAL; 2916} 2917 2918static int tgsi_end(struct r600_shader_ctx *ctx) 2919{ 2920 return 0; 2921} 2922 2923static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 2924 const struct r600_shader_src *shader_src, 2925 unsigned chan) 2926{ 2927 bc_src->sel = shader_src->sel; 2928 bc_src->chan = shader_src->swizzle[chan]; 2929 bc_src->neg = shader_src->neg; 2930 bc_src->abs = shader_src->abs; 2931 bc_src->rel = shader_src->rel; 2932 bc_src->value = shader_src->value[bc_src->chan]; 2933 bc_src->kc_bank = shader_src->kc_bank; 2934 bc_src->kc_rel = shader_src->kc_rel; 2935} 2936 2937static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 2938{ 2939 bc_src->abs = 1; 2940 bc_src->neg = 0; 2941} 2942 2943static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 2944{ 2945 bc_src->neg = !bc_src->neg; 2946} 2947 2948static void tgsi_dst(struct r600_shader_ctx *ctx, 2949 const struct tgsi_full_dst_register *tgsi_dst, 2950 unsigned swizzle, 2951 struct r600_bytecode_alu_dst *r600_dst) 2952{ 2953 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2954 2955 r600_dst->sel = tgsi_dst->Register.Index; 2956 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 2957 r600_dst->chan = swizzle; 2958 r600_dst->write = 1; 2959 if (tgsi_dst->Register.Indirect) 2960 r600_dst->rel = V_SQ_REL_RELATIVE; 2961 if (inst->Instruction.Saturate) { 2962 r600_dst->clamp = 1; 2963 } 2964} 2965 2966static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap) 2967{ 2968 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2969 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2970 struct r600_bytecode_alu alu; 2971 int i, j, r, lasti = tgsi_last_instruction(write_mask); 2972 int use_tmp = 0; 2973 2974 if (singledest) { 2975 switch (write_mask) { 2976 case 0x1: 2977 write_mask = 0x3; 2978 break; 2979 case 0x2: 2980 use_tmp = 1; 2981 write_mask = 0x3; 2982 break; 2983 case 0x4: 2984 write_mask = 0xc; 2985 break; 2986 case 0x8: 2987 write_mask = 0xc; 2988 use_tmp = 3; 2989 break; 2990 } 2991 } 2992 2993 lasti = tgsi_last_instruction(write_mask); 2994 for (i = 0; i <= lasti; i++) { 2995 2996 if (!(write_mask & (1 << i))) 2997 continue; 2998 2999 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3000 3001 if (singledest) { 3002 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3003 if (use_tmp) { 3004 alu.dst.sel = ctx->temp_reg; 3005 alu.dst.chan = i; 3006 alu.dst.write = 1; 3007 } 3008 if (i == 1 || i == 3) 3009 alu.dst.write = 0; 3010 } else 3011 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3012 3013 alu.op = ctx->inst_info->op; 3014 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) { 3015 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3016 } else if (!swap) { 3017 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3018 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 3019 } 3020 } else { 3021 r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i)); 3022 r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i)); 3023 } 3024 3025 /* handle some special cases */ 3026 if (i == 1 || i == 3) { 3027 switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { 3028 case TGSI_OPCODE_SUB: 3029 r600_bytecode_src_toggle_neg(&alu.src[1]); 3030 break; 3031 case TGSI_OPCODE_DABS: 3032 r600_bytecode_src_set_abs(&alu.src[0]); 3033 break; 3034 default: 3035 break; 3036 } 3037 } 3038 if (i == lasti) { 3039 alu.last = 1; 3040 } 3041 r = r600_bytecode_add_alu(ctx->bc, &alu); 3042 if (r) 3043 return r; 3044 } 3045 3046 if (use_tmp) { 3047 write_mask = inst->Dst[0].Register.WriteMask; 3048 3049 /* move result from temp to dst */ 3050 for (i = 0; i <= lasti; i++) { 3051 if (!(write_mask & (1 << i))) 3052 continue; 3053 3054 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3055 alu.op = ALU_OP1_MOV; 3056 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3057 alu.src[0].sel = ctx->temp_reg; 3058 alu.src[0].chan = use_tmp - 1; 3059 alu.last = (i == lasti); 3060 3061 r = r600_bytecode_add_alu(ctx->bc, &alu); 3062 if (r) 3063 return r; 3064 } 3065 } 3066 return 0; 3067} 3068 3069static int tgsi_op2_64(struct r600_shader_ctx *ctx) 3070{ 3071 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3072 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3073 /* confirm writemasking */ 3074 if ((write_mask & 0x3) != 0x3 && 3075 (write_mask & 0xc) != 0xc) { 3076 fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask); 3077 return -1; 3078 } 3079 return tgsi_op2_64_params(ctx, false, false); 3080} 3081 3082static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx) 3083{ 3084 return tgsi_op2_64_params(ctx, true, false); 3085} 3086 3087static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx) 3088{ 3089 return tgsi_op2_64_params(ctx, true, true); 3090} 3091 3092static int tgsi_op3_64(struct r600_shader_ctx *ctx) 3093{ 3094 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3095 struct r600_bytecode_alu alu; 3096 int i, j, r; 3097 int lasti = 3; 3098 int tmp = r600_get_temp(ctx); 3099 3100 for (i = 0; i < lasti + 1; i++) { 3101 3102 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3103 alu.op = ctx->inst_info->op; 3104 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3105 r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1); 3106 } 3107 3108 if (inst->Dst[0].Register.WriteMask & (1 << i)) 3109 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3110 else 3111 alu.dst.sel = tmp; 3112 3113 alu.dst.chan = i; 3114 alu.is_op3 = 1; 3115 if (i == lasti) { 3116 alu.last = 1; 3117 } 3118 r = r600_bytecode_add_alu(ctx->bc, &alu); 3119 if (r) 3120 return r; 3121 } 3122 return 0; 3123} 3124 3125static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 3126{ 3127 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3128 struct r600_bytecode_alu alu; 3129 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3130 int i, j, r, lasti = tgsi_last_instruction(write_mask); 3131 /* use temp register if trans_only and more than one dst component */ 3132 int use_tmp = trans_only && (write_mask ^ (1 << lasti)); 3133 3134 for (i = 0; i <= lasti; i++) { 3135 if (!(write_mask & (1 << i))) 3136 continue; 3137 3138 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3139 if (use_tmp) { 3140 alu.dst.sel = ctx->temp_reg; 3141 alu.dst.chan = i; 3142 alu.dst.write = 1; 3143 } else 3144 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3145 3146 alu.op = ctx->inst_info->op; 3147 if (!swap) { 3148 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3149 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3150 } 3151 } else { 3152 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3153 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3154 } 3155 /* handle some special cases */ 3156 switch (inst->Instruction.Opcode) { 3157 case TGSI_OPCODE_SUB: 3158 r600_bytecode_src_toggle_neg(&alu.src[1]); 3159 break; 3160 case TGSI_OPCODE_ABS: 3161 r600_bytecode_src_set_abs(&alu.src[0]); 3162 break; 3163 default: 3164 break; 3165 } 3166 if (i == lasti || trans_only) { 3167 alu.last = 1; 3168 } 3169 r = r600_bytecode_add_alu(ctx->bc, &alu); 3170 if (r) 3171 return r; 3172 } 3173 3174 if (use_tmp) { 3175 /* move result from temp to dst */ 3176 for (i = 0; i <= lasti; i++) { 3177 if (!(write_mask & (1 << i))) 3178 continue; 3179 3180 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3181 alu.op = ALU_OP1_MOV; 3182 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3183 alu.src[0].sel = ctx->temp_reg; 3184 alu.src[0].chan = i; 3185 alu.last = (i == lasti); 3186 3187 r = r600_bytecode_add_alu(ctx->bc, &alu); 3188 if (r) 3189 return r; 3190 } 3191 } 3192 return 0; 3193} 3194 3195static int tgsi_op2(struct r600_shader_ctx *ctx) 3196{ 3197 return tgsi_op2_s(ctx, 0, 0); 3198} 3199 3200static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 3201{ 3202 return tgsi_op2_s(ctx, 1, 0); 3203} 3204 3205static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 3206{ 3207 return tgsi_op2_s(ctx, 0, 1); 3208} 3209 3210static int tgsi_ineg(struct r600_shader_ctx *ctx) 3211{ 3212 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3213 struct r600_bytecode_alu alu; 3214 int i, r; 3215 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3216 3217 for (i = 0; i < lasti + 1; i++) { 3218 3219 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3220 continue; 3221 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3222 alu.op = ctx->inst_info->op; 3223 3224 alu.src[0].sel = V_SQ_ALU_SRC_0; 3225 3226 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3227 3228 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3229 3230 if (i == lasti) { 3231 alu.last = 1; 3232 } 3233 r = r600_bytecode_add_alu(ctx->bc, &alu); 3234 if (r) 3235 return r; 3236 } 3237 return 0; 3238 3239} 3240 3241static int tgsi_dneg(struct r600_shader_ctx *ctx) 3242{ 3243 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3244 struct r600_bytecode_alu alu; 3245 int i, r; 3246 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3247 3248 for (i = 0; i < lasti + 1; i++) { 3249 3250 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3251 continue; 3252 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3253 alu.op = ALU_OP1_MOV; 3254 3255 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3256 3257 if (i == 1 || i == 3) 3258 r600_bytecode_src_toggle_neg(&alu.src[0]); 3259 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3260 3261 if (i == lasti) { 3262 alu.last = 1; 3263 } 3264 r = r600_bytecode_add_alu(ctx->bc, &alu); 3265 if (r) 3266 return r; 3267 } 3268 return 0; 3269 3270} 3271 3272static int tgsi_dfracexp(struct r600_shader_ctx *ctx) 3273{ 3274 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3275 struct r600_bytecode_alu alu; 3276 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3277 int i, j, r; 3278 int firsti = write_mask == 0xc ? 2 : 0; 3279 3280 for (i = 0; i <= 3; i++) { 3281 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3282 alu.op = ctx->inst_info->op; 3283 3284 alu.dst.sel = ctx->temp_reg; 3285 alu.dst.chan = i; 3286 alu.dst.write = 1; 3287 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3288 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 3289 } 3290 3291 if (i == 3) 3292 alu.last = 1; 3293 3294 r = r600_bytecode_add_alu(ctx->bc, &alu); 3295 if (r) 3296 return r; 3297 } 3298 3299 /* MOV first two channels to writemask dst0 */ 3300 for (i = 0; i <= 1; i++) { 3301 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3302 alu.op = ALU_OP1_MOV; 3303 alu.src[0].chan = i + 2; 3304 alu.src[0].sel = ctx->temp_reg; 3305 3306 tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst); 3307 alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1; 3308 alu.last = 1; 3309 r = r600_bytecode_add_alu(ctx->bc, &alu); 3310 if (r) 3311 return r; 3312 } 3313 3314 for (i = 0; i <= 3; i++) { 3315 if (inst->Dst[1].Register.WriteMask & (1 << i)) { 3316 /* MOV third channels to writemask dst1 */ 3317 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3318 alu.op = ALU_OP1_MOV; 3319 alu.src[0].chan = 1; 3320 alu.src[0].sel = ctx->temp_reg; 3321 3322 tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst); 3323 alu.last = 1; 3324 r = r600_bytecode_add_alu(ctx->bc, &alu); 3325 if (r) 3326 return r; 3327 break; 3328 } 3329 } 3330 return 0; 3331} 3332 3333 3334static int egcm_int_to_double(struct r600_shader_ctx *ctx) 3335{ 3336 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3337 struct r600_bytecode_alu alu; 3338 int i, r; 3339 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3340 3341 assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D || 3342 inst->Instruction.Opcode == TGSI_OPCODE_U2D); 3343 3344 for (i = 0; i <= (lasti+1)/2; i++) { 3345 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3346 alu.op = ctx->inst_info->op; 3347 3348 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3349 alu.dst.sel = ctx->temp_reg; 3350 alu.dst.chan = i; 3351 alu.dst.write = 1; 3352 alu.last = 1; 3353 3354 r = r600_bytecode_add_alu(ctx->bc, &alu); 3355 if (r) 3356 return r; 3357 } 3358 3359 for (i = 0; i <= lasti; i++) { 3360 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3361 alu.op = ALU_OP1_FLT32_TO_FLT64; 3362 3363 alu.src[0].chan = i/2; 3364 if (i%2 == 0) 3365 alu.src[0].sel = ctx->temp_reg; 3366 else { 3367 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 3368 alu.src[0].value = 0x0; 3369 } 3370 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3371 alu.last = i == lasti; 3372 3373 r = r600_bytecode_add_alu(ctx->bc, &alu); 3374 if (r) 3375 return r; 3376 } 3377 3378 return 0; 3379} 3380 3381static int egcm_double_to_int(struct r600_shader_ctx *ctx) 3382{ 3383 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3384 struct r600_bytecode_alu alu; 3385 int i, r; 3386 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3387 3388 assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I || 3389 inst->Instruction.Opcode == TGSI_OPCODE_D2U); 3390 3391 for (i = 0; i <= lasti; i++) { 3392 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3393 alu.op = ALU_OP1_FLT64_TO_FLT32; 3394 3395 r600_bytecode_src(&alu.src[0], &ctx->src[0], fp64_switch(i)); 3396 alu.dst.chan = i; 3397 alu.dst.sel = ctx->temp_reg; 3398 alu.dst.write = i%2 == 0; 3399 alu.last = i == lasti; 3400 3401 r = r600_bytecode_add_alu(ctx->bc, &alu); 3402 if (r) 3403 return r; 3404 } 3405 3406 for (i = 0; i <= (lasti+1)/2; i++) { 3407 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3408 alu.op = ctx->inst_info->op; 3409 3410 alu.src[0].chan = i*2; 3411 alu.src[0].sel = ctx->temp_reg; 3412 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 3413 alu.last = 1; 3414 3415 r = r600_bytecode_add_alu(ctx->bc, &alu); 3416 if (r) 3417 return r; 3418 } 3419 3420 return 0; 3421} 3422 3423static int cayman_emit_double_instr(struct r600_shader_ctx *ctx) 3424{ 3425 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3426 int i, r; 3427 struct r600_bytecode_alu alu; 3428 int last_slot = 3; 3429 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3430 int t1 = ctx->temp_reg; 3431 3432 /* these have to write the result to X/Y by the looks of it */ 3433 for (i = 0 ; i < last_slot; i++) { 3434 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3435 alu.op = ctx->inst_info->op; 3436 3437 /* should only be one src regs */ 3438 assert (inst->Instruction.NumSrcRegs == 1); 3439 3440 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 3441 r600_bytecode_src(&alu.src[1], &ctx->src[0], 0); 3442 3443 /* RSQ should take the absolute value of src */ 3444 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ || 3445 ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) { 3446 r600_bytecode_src_set_abs(&alu.src[1]); 3447 } 3448 alu.dst.sel = t1; 3449 alu.dst.chan = i; 3450 alu.dst.write = (i == 0 || i == 1); 3451 3452 if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1) 3453 alu.last = 1; 3454 r = r600_bytecode_add_alu(ctx->bc, &alu); 3455 if (r) 3456 return r; 3457 } 3458 3459 for (i = 0 ; i <= lasti; i++) { 3460 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3461 continue; 3462 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3463 alu.op = ALU_OP1_MOV; 3464 alu.src[0].sel = t1; 3465 alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1; 3466 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3467 alu.dst.write = 1; 3468 if (i == lasti) 3469 alu.last = 1; 3470 r = r600_bytecode_add_alu(ctx->bc, &alu); 3471 if (r) 3472 return r; 3473 } 3474 return 0; 3475} 3476 3477static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 3478{ 3479 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3480 int i, j, r; 3481 struct r600_bytecode_alu alu; 3482 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 3483 3484 for (i = 0 ; i < last_slot; i++) { 3485 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3486 alu.op = ctx->inst_info->op; 3487 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3488 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 3489 3490 /* RSQ should take the absolute value of src */ 3491 if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) { 3492 r600_bytecode_src_set_abs(&alu.src[j]); 3493 } 3494 } 3495 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3496 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3497 3498 if (i == last_slot - 1) 3499 alu.last = 1; 3500 r = r600_bytecode_add_alu(ctx->bc, &alu); 3501 if (r) 3502 return r; 3503 } 3504 return 0; 3505} 3506 3507static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 3508{ 3509 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3510 int i, j, k, r; 3511 struct r600_bytecode_alu alu; 3512 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3513 int t1 = ctx->temp_reg; 3514 3515 for (k = 0; k <= lasti; k++) { 3516 if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 3517 continue; 3518 3519 for (i = 0 ; i < 4; i++) { 3520 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3521 alu.op = ctx->inst_info->op; 3522 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3523 r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 3524 } 3525 alu.dst.sel = t1; 3526 alu.dst.chan = i; 3527 alu.dst.write = (i == k); 3528 if (i == 3) 3529 alu.last = 1; 3530 r = r600_bytecode_add_alu(ctx->bc, &alu); 3531 if (r) 3532 return r; 3533 } 3534 } 3535 3536 for (i = 0 ; i <= lasti; i++) { 3537 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3538 continue; 3539 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3540 alu.op = ALU_OP1_MOV; 3541 alu.src[0].sel = t1; 3542 alu.src[0].chan = i; 3543 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3544 alu.dst.write = 1; 3545 if (i == lasti) 3546 alu.last = 1; 3547 r = r600_bytecode_add_alu(ctx->bc, &alu); 3548 if (r) 3549 return r; 3550 } 3551 3552 return 0; 3553} 3554 3555 3556static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) 3557{ 3558 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3559 int i, j, k, r; 3560 struct r600_bytecode_alu alu; 3561 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3562 int t1 = ctx->temp_reg; 3563 3564 for (k = 0; k < 2; k++) { 3565 if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2)))) 3566 continue; 3567 3568 for (i = 0; i < 4; i++) { 3569 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3570 alu.op = ctx->inst_info->op; 3571 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3572 r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));; 3573 } 3574 alu.dst.sel = t1; 3575 alu.dst.chan = i; 3576 alu.dst.write = 1; 3577 if (i == 3) 3578 alu.last = 1; 3579 r = r600_bytecode_add_alu(ctx->bc, &alu); 3580 if (r) 3581 return r; 3582 } 3583 } 3584 3585 for (i = 0; i <= lasti; i++) { 3586 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3587 continue; 3588 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3589 alu.op = ALU_OP1_MOV; 3590 alu.src[0].sel = t1; 3591 alu.src[0].chan = i; 3592 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3593 alu.dst.write = 1; 3594 if (i == lasti) 3595 alu.last = 1; 3596 r = r600_bytecode_add_alu(ctx->bc, &alu); 3597 if (r) 3598 return r; 3599 } 3600 3601 return 0; 3602} 3603 3604/* 3605 * r600 - trunc to -PI..PI range 3606 * r700 - normalize by dividing by 2PI 3607 * see fdo bug 27901 3608 */ 3609static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 3610{ 3611 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 3612 static float double_pi = 3.1415926535 * 2; 3613 static float neg_pi = -3.1415926535; 3614 3615 int r; 3616 struct r600_bytecode_alu alu; 3617 3618 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3619 alu.op = ALU_OP3_MULADD; 3620 alu.is_op3 = 1; 3621 3622 alu.dst.chan = 0; 3623 alu.dst.sel = ctx->temp_reg; 3624 alu.dst.write = 1; 3625 3626 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3627 3628 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 3629 alu.src[1].chan = 0; 3630 alu.src[1].value = *(uint32_t *)&half_inv_pi; 3631 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 3632 alu.src[2].chan = 0; 3633 alu.last = 1; 3634 r = r600_bytecode_add_alu(ctx->bc, &alu); 3635 if (r) 3636 return r; 3637 3638 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3639 alu.op = ALU_OP1_FRACT; 3640 3641 alu.dst.chan = 0; 3642 alu.dst.sel = ctx->temp_reg; 3643 alu.dst.write = 1; 3644 3645 alu.src[0].sel = ctx->temp_reg; 3646 alu.src[0].chan = 0; 3647 alu.last = 1; 3648 r = r600_bytecode_add_alu(ctx->bc, &alu); 3649 if (r) 3650 return r; 3651 3652 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3653 alu.op = ALU_OP3_MULADD; 3654 alu.is_op3 = 1; 3655 3656 alu.dst.chan = 0; 3657 alu.dst.sel = ctx->temp_reg; 3658 alu.dst.write = 1; 3659 3660 alu.src[0].sel = ctx->temp_reg; 3661 alu.src[0].chan = 0; 3662 3663 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 3664 alu.src[1].chan = 0; 3665 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 3666 alu.src[2].chan = 0; 3667 3668 if (ctx->bc->chip_class == R600) { 3669 alu.src[1].value = *(uint32_t *)&double_pi; 3670 alu.src[2].value = *(uint32_t *)&neg_pi; 3671 } else { 3672 alu.src[1].sel = V_SQ_ALU_SRC_1; 3673 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 3674 alu.src[2].neg = 1; 3675 } 3676 3677 alu.last = 1; 3678 r = r600_bytecode_add_alu(ctx->bc, &alu); 3679 if (r) 3680 return r; 3681 return 0; 3682} 3683 3684static int cayman_trig(struct r600_shader_ctx *ctx) 3685{ 3686 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3687 struct r600_bytecode_alu alu; 3688 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 3689 int i, r; 3690 3691 r = tgsi_setup_trig(ctx); 3692 if (r) 3693 return r; 3694 3695 3696 for (i = 0; i < last_slot; i++) { 3697 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3698 alu.op = ctx->inst_info->op; 3699 alu.dst.chan = i; 3700 3701 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3702 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3703 3704 alu.src[0].sel = ctx->temp_reg; 3705 alu.src[0].chan = 0; 3706 if (i == last_slot - 1) 3707 alu.last = 1; 3708 r = r600_bytecode_add_alu(ctx->bc, &alu); 3709 if (r) 3710 return r; 3711 } 3712 return 0; 3713} 3714 3715static int tgsi_trig(struct r600_shader_ctx *ctx) 3716{ 3717 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3718 struct r600_bytecode_alu alu; 3719 int i, r; 3720 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3721 3722 r = tgsi_setup_trig(ctx); 3723 if (r) 3724 return r; 3725 3726 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3727 alu.op = ctx->inst_info->op; 3728 alu.dst.chan = 0; 3729 alu.dst.sel = ctx->temp_reg; 3730 alu.dst.write = 1; 3731 3732 alu.src[0].sel = ctx->temp_reg; 3733 alu.src[0].chan = 0; 3734 alu.last = 1; 3735 r = r600_bytecode_add_alu(ctx->bc, &alu); 3736 if (r) 3737 return r; 3738 3739 /* replicate result */ 3740 for (i = 0; i < lasti + 1; i++) { 3741 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3742 continue; 3743 3744 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3745 alu.op = ALU_OP1_MOV; 3746 3747 alu.src[0].sel = ctx->temp_reg; 3748 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3749 if (i == lasti) 3750 alu.last = 1; 3751 r = r600_bytecode_add_alu(ctx->bc, &alu); 3752 if (r) 3753 return r; 3754 } 3755 return 0; 3756} 3757 3758static int tgsi_scs(struct r600_shader_ctx *ctx) 3759{ 3760 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3761 struct r600_bytecode_alu alu; 3762 int i, r; 3763 3764 /* We'll only need the trig stuff if we are going to write to the 3765 * X or Y components of the destination vector. 3766 */ 3767 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 3768 r = tgsi_setup_trig(ctx); 3769 if (r) 3770 return r; 3771 } 3772 3773 /* dst.x = COS */ 3774 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3775 if (ctx->bc->chip_class == CAYMAN) { 3776 for (i = 0 ; i < 3; i++) { 3777 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3778 alu.op = ALU_OP1_COS; 3779 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3780 3781 if (i == 0) 3782 alu.dst.write = 1; 3783 else 3784 alu.dst.write = 0; 3785 alu.src[0].sel = ctx->temp_reg; 3786 alu.src[0].chan = 0; 3787 if (i == 2) 3788 alu.last = 1; 3789 r = r600_bytecode_add_alu(ctx->bc, &alu); 3790 if (r) 3791 return r; 3792 } 3793 } else { 3794 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3795 alu.op = ALU_OP1_COS; 3796 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 3797 3798 alu.src[0].sel = ctx->temp_reg; 3799 alu.src[0].chan = 0; 3800 alu.last = 1; 3801 r = r600_bytecode_add_alu(ctx->bc, &alu); 3802 if (r) 3803 return r; 3804 } 3805 } 3806 3807 /* dst.y = SIN */ 3808 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3809 if (ctx->bc->chip_class == CAYMAN) { 3810 for (i = 0 ; i < 3; i++) { 3811 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3812 alu.op = ALU_OP1_SIN; 3813 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3814 if (i == 1) 3815 alu.dst.write = 1; 3816 else 3817 alu.dst.write = 0; 3818 alu.src[0].sel = ctx->temp_reg; 3819 alu.src[0].chan = 0; 3820 if (i == 2) 3821 alu.last = 1; 3822 r = r600_bytecode_add_alu(ctx->bc, &alu); 3823 if (r) 3824 return r; 3825 } 3826 } else { 3827 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3828 alu.op = ALU_OP1_SIN; 3829 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 3830 3831 alu.src[0].sel = ctx->temp_reg; 3832 alu.src[0].chan = 0; 3833 alu.last = 1; 3834 r = r600_bytecode_add_alu(ctx->bc, &alu); 3835 if (r) 3836 return r; 3837 } 3838 } 3839 3840 /* dst.z = 0.0; */ 3841 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3842 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3843 3844 alu.op = ALU_OP1_MOV; 3845 3846 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 3847 3848 alu.src[0].sel = V_SQ_ALU_SRC_0; 3849 alu.src[0].chan = 0; 3850 3851 alu.last = 1; 3852 3853 r = r600_bytecode_add_alu(ctx->bc, &alu); 3854 if (r) 3855 return r; 3856 } 3857 3858 /* dst.w = 1.0; */ 3859 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3860 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3861 3862 alu.op = ALU_OP1_MOV; 3863 3864 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 3865 3866 alu.src[0].sel = V_SQ_ALU_SRC_1; 3867 alu.src[0].chan = 0; 3868 3869 alu.last = 1; 3870 3871 r = r600_bytecode_add_alu(ctx->bc, &alu); 3872 if (r) 3873 return r; 3874 } 3875 3876 return 0; 3877} 3878 3879static int tgsi_kill(struct r600_shader_ctx *ctx) 3880{ 3881 const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3882 struct r600_bytecode_alu alu; 3883 int i, r; 3884 3885 for (i = 0; i < 4; i++) { 3886 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3887 alu.op = ctx->inst_info->op; 3888 3889 alu.dst.chan = i; 3890 3891 alu.src[0].sel = V_SQ_ALU_SRC_0; 3892 3893 if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) { 3894 alu.src[1].sel = V_SQ_ALU_SRC_1; 3895 alu.src[1].neg = 1; 3896 } else { 3897 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3898 } 3899 if (i == 3) { 3900 alu.last = 1; 3901 } 3902 r = r600_bytecode_add_alu(ctx->bc, &alu); 3903 if (r) 3904 return r; 3905 } 3906 3907 /* kill must be last in ALU */ 3908 ctx->bc->force_add_cf = 1; 3909 ctx->shader->uses_kill = TRUE; 3910 return 0; 3911} 3912 3913static int tgsi_lit(struct r600_shader_ctx *ctx) 3914{ 3915 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3916 struct r600_bytecode_alu alu; 3917 int r; 3918 3919 /* tmp.x = max(src.y, 0.0) */ 3920 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3921 alu.op = ALU_OP2_MAX; 3922 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 3923 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 3924 alu.src[1].chan = 1; 3925 3926 alu.dst.sel = ctx->temp_reg; 3927 alu.dst.chan = 0; 3928 alu.dst.write = 1; 3929 3930 alu.last = 1; 3931 r = r600_bytecode_add_alu(ctx->bc, &alu); 3932 if (r) 3933 return r; 3934 3935 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 3936 { 3937 int chan; 3938 int sel; 3939 int i; 3940 3941 if (ctx->bc->chip_class == CAYMAN) { 3942 for (i = 0; i < 3; i++) { 3943 /* tmp.z = log(tmp.x) */ 3944 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3945 alu.op = ALU_OP1_LOG_CLAMPED; 3946 alu.src[0].sel = ctx->temp_reg; 3947 alu.src[0].chan = 0; 3948 alu.dst.sel = ctx->temp_reg; 3949 alu.dst.chan = i; 3950 if (i == 2) { 3951 alu.dst.write = 1; 3952 alu.last = 1; 3953 } else 3954 alu.dst.write = 0; 3955 3956 r = r600_bytecode_add_alu(ctx->bc, &alu); 3957 if (r) 3958 return r; 3959 } 3960 } else { 3961 /* tmp.z = log(tmp.x) */ 3962 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3963 alu.op = ALU_OP1_LOG_CLAMPED; 3964 alu.src[0].sel = ctx->temp_reg; 3965 alu.src[0].chan = 0; 3966 alu.dst.sel = ctx->temp_reg; 3967 alu.dst.chan = 2; 3968 alu.dst.write = 1; 3969 alu.last = 1; 3970 r = r600_bytecode_add_alu(ctx->bc, &alu); 3971 if (r) 3972 return r; 3973 } 3974 3975 chan = alu.dst.chan; 3976 sel = alu.dst.sel; 3977 3978 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 3979 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3980 alu.op = ALU_OP3_MUL_LIT; 3981 alu.src[0].sel = sel; 3982 alu.src[0].chan = chan; 3983 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 3984 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 3985 alu.dst.sel = ctx->temp_reg; 3986 alu.dst.chan = 0; 3987 alu.dst.write = 1; 3988 alu.is_op3 = 1; 3989 alu.last = 1; 3990 r = r600_bytecode_add_alu(ctx->bc, &alu); 3991 if (r) 3992 return r; 3993 3994 if (ctx->bc->chip_class == CAYMAN) { 3995 for (i = 0; i < 3; i++) { 3996 /* dst.z = exp(tmp.x) */ 3997 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3998 alu.op = ALU_OP1_EXP_IEEE; 3999 alu.src[0].sel = ctx->temp_reg; 4000 alu.src[0].chan = 0; 4001 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4002 if (i == 2) { 4003 alu.dst.write = 1; 4004 alu.last = 1; 4005 } else 4006 alu.dst.write = 0; 4007 r = r600_bytecode_add_alu(ctx->bc, &alu); 4008 if (r) 4009 return r; 4010 } 4011 } else { 4012 /* dst.z = exp(tmp.x) */ 4013 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4014 alu.op = ALU_OP1_EXP_IEEE; 4015 alu.src[0].sel = ctx->temp_reg; 4016 alu.src[0].chan = 0; 4017 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 4018 alu.last = 1; 4019 r = r600_bytecode_add_alu(ctx->bc, &alu); 4020 if (r) 4021 return r; 4022 } 4023 } 4024 4025 /* dst.x, <- 1.0 */ 4026 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4027 alu.op = ALU_OP1_MOV; 4028 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 4029 alu.src[0].chan = 0; 4030 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4031 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 4032 r = r600_bytecode_add_alu(ctx->bc, &alu); 4033 if (r) 4034 return r; 4035 4036 /* dst.y = max(src.x, 0.0) */ 4037 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4038 alu.op = ALU_OP2_MAX; 4039 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4040 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 4041 alu.src[1].chan = 0; 4042 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 4043 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 4044 r = r600_bytecode_add_alu(ctx->bc, &alu); 4045 if (r) 4046 return r; 4047 4048 /* dst.w, <- 1.0 */ 4049 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4050 alu.op = ALU_OP1_MOV; 4051 alu.src[0].sel = V_SQ_ALU_SRC_1; 4052 alu.src[0].chan = 0; 4053 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 4054 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 4055 alu.last = 1; 4056 r = r600_bytecode_add_alu(ctx->bc, &alu); 4057 if (r) 4058 return r; 4059 4060 return 0; 4061} 4062 4063static int tgsi_rsq(struct r600_shader_ctx *ctx) 4064{ 4065 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4066 struct r600_bytecode_alu alu; 4067 int i, r; 4068 4069 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4070 4071 /* XXX: 4072 * For state trackers other than OpenGL, we'll want to use 4073 * _RECIPSQRT_IEEE instead. 4074 */ 4075 alu.op = ALU_OP1_RECIPSQRT_CLAMPED; 4076 4077 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 4078 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 4079 r600_bytecode_src_set_abs(&alu.src[i]); 4080 } 4081 alu.dst.sel = ctx->temp_reg; 4082 alu.dst.write = 1; 4083 alu.last = 1; 4084 r = r600_bytecode_add_alu(ctx->bc, &alu); 4085 if (r) 4086 return r; 4087 /* replicate result */ 4088 return tgsi_helper_tempx_replicate(ctx); 4089} 4090 4091static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 4092{ 4093 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4094 struct r600_bytecode_alu alu; 4095 int i, r; 4096 4097 for (i = 0; i < 4; i++) { 4098 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4099 alu.src[0].sel = ctx->temp_reg; 4100 alu.op = ALU_OP1_MOV; 4101 alu.dst.chan = i; 4102 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4103 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4104 if (i == 3) 4105 alu.last = 1; 4106 r = r600_bytecode_add_alu(ctx->bc, &alu); 4107 if (r) 4108 return r; 4109 } 4110 return 0; 4111} 4112 4113static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 4114{ 4115 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4116 struct r600_bytecode_alu alu; 4117 int i, r; 4118 4119 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4120 alu.op = ctx->inst_info->op; 4121 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 4122 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 4123 } 4124 alu.dst.sel = ctx->temp_reg; 4125 alu.dst.write = 1; 4126 alu.last = 1; 4127 r = r600_bytecode_add_alu(ctx->bc, &alu); 4128 if (r) 4129 return r; 4130 /* replicate result */ 4131 return tgsi_helper_tempx_replicate(ctx); 4132} 4133 4134static int cayman_pow(struct r600_shader_ctx *ctx) 4135{ 4136 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4137 int i, r; 4138 struct r600_bytecode_alu alu; 4139 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4140 4141 for (i = 0; i < 3; i++) { 4142 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4143 alu.op = ALU_OP1_LOG_IEEE; 4144 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4145 alu.dst.sel = ctx->temp_reg; 4146 alu.dst.chan = i; 4147 alu.dst.write = 1; 4148 if (i == 2) 4149 alu.last = 1; 4150 r = r600_bytecode_add_alu(ctx->bc, &alu); 4151 if (r) 4152 return r; 4153 } 4154 4155 /* b * LOG2(a) */ 4156 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4157 alu.op = ALU_OP2_MUL; 4158 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 4159 alu.src[1].sel = ctx->temp_reg; 4160 alu.dst.sel = ctx->temp_reg; 4161 alu.dst.write = 1; 4162 alu.last = 1; 4163 r = r600_bytecode_add_alu(ctx->bc, &alu); 4164 if (r) 4165 return r; 4166 4167 for (i = 0; i < last_slot; i++) { 4168 /* POW(a,b) = EXP2(b * LOG2(a))*/ 4169 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4170 alu.op = ALU_OP1_EXP_IEEE; 4171 alu.src[0].sel = ctx->temp_reg; 4172 4173 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4174 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4175 if (i == last_slot - 1) 4176 alu.last = 1; 4177 r = r600_bytecode_add_alu(ctx->bc, &alu); 4178 if (r) 4179 return r; 4180 } 4181 return 0; 4182} 4183 4184static int tgsi_pow(struct r600_shader_ctx *ctx) 4185{ 4186 struct r600_bytecode_alu alu; 4187 int r; 4188 4189 /* LOG2(a) */ 4190 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4191 alu.op = ALU_OP1_LOG_IEEE; 4192 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4193 alu.dst.sel = ctx->temp_reg; 4194 alu.dst.write = 1; 4195 alu.last = 1; 4196 r = r600_bytecode_add_alu(ctx->bc, &alu); 4197 if (r) 4198 return r; 4199 /* b * LOG2(a) */ 4200 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4201 alu.op = ALU_OP2_MUL; 4202 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 4203 alu.src[1].sel = ctx->temp_reg; 4204 alu.dst.sel = ctx->temp_reg; 4205 alu.dst.write = 1; 4206 alu.last = 1; 4207 r = r600_bytecode_add_alu(ctx->bc, &alu); 4208 if (r) 4209 return r; 4210 /* POW(a,b) = EXP2(b * LOG2(a))*/ 4211 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4212 alu.op = ALU_OP1_EXP_IEEE; 4213 alu.src[0].sel = ctx->temp_reg; 4214 alu.dst.sel = ctx->temp_reg; 4215 alu.dst.write = 1; 4216 alu.last = 1; 4217 r = r600_bytecode_add_alu(ctx->bc, &alu); 4218 if (r) 4219 return r; 4220 return tgsi_helper_tempx_replicate(ctx); 4221} 4222 4223static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 4224{ 4225 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4226 struct r600_bytecode_alu alu; 4227 int i, r, j; 4228 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4229 int tmp0 = ctx->temp_reg; 4230 int tmp1 = r600_get_temp(ctx); 4231 int tmp2 = r600_get_temp(ctx); 4232 int tmp3 = r600_get_temp(ctx); 4233 /* Unsigned path: 4234 * 4235 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 4236 * 4237 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 4238 * 2. tmp0.z = lo (tmp0.x * src2) 4239 * 3. tmp0.w = -tmp0.z 4240 * 4. tmp0.y = hi (tmp0.x * src2) 4241 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 4242 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 4243 * 7. tmp1.x = tmp0.x - tmp0.w 4244 * 8. tmp1.y = tmp0.x + tmp0.w 4245 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 4246 * 10. tmp0.z = hi(tmp0.x * src1) = q 4247 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 4248 * 4249 * 12. tmp0.w = src1 - tmp0.y = r 4250 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 4251 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 4252 * 4253 * if DIV 4254 * 4255 * 15. tmp1.z = tmp0.z + 1 = q + 1 4256 * 16. tmp1.w = tmp0.z - 1 = q - 1 4257 * 4258 * else MOD 4259 * 4260 * 15. tmp1.z = tmp0.w - src2 = r - src2 4261 * 16. tmp1.w = tmp0.w + src2 = r + src2 4262 * 4263 * endif 4264 * 4265 * 17. tmp1.x = tmp1.x & tmp1.y 4266 * 4267 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 4268 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 4269 * 4270 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 4271 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 4272 * 4273 * Signed path: 4274 * 4275 * Same as unsigned, using abs values of the operands, 4276 * and fixing the sign of the result in the end. 4277 */ 4278 4279 for (i = 0; i < 4; i++) { 4280 if (!(write_mask & (1<<i))) 4281 continue; 4282 4283 if (signed_op) { 4284 4285 /* tmp2.x = -src0 */ 4286 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4287 alu.op = ALU_OP2_SUB_INT; 4288 4289 alu.dst.sel = tmp2; 4290 alu.dst.chan = 0; 4291 alu.dst.write = 1; 4292 4293 alu.src[0].sel = V_SQ_ALU_SRC_0; 4294 4295 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4296 4297 alu.last = 1; 4298 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4299 return r; 4300 4301 /* tmp2.y = -src1 */ 4302 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4303 alu.op = ALU_OP2_SUB_INT; 4304 4305 alu.dst.sel = tmp2; 4306 alu.dst.chan = 1; 4307 alu.dst.write = 1; 4308 4309 alu.src[0].sel = V_SQ_ALU_SRC_0; 4310 4311 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4312 4313 alu.last = 1; 4314 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4315 return r; 4316 4317 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 4318 /* it will be a sign of the quotient */ 4319 if (!mod) { 4320 4321 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4322 alu.op = ALU_OP2_XOR_INT; 4323 4324 alu.dst.sel = tmp2; 4325 alu.dst.chan = 2; 4326 alu.dst.write = 1; 4327 4328 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4329 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4330 4331 alu.last = 1; 4332 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4333 return r; 4334 } 4335 4336 /* tmp2.x = |src0| */ 4337 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4338 alu.op = ALU_OP3_CNDGE_INT; 4339 alu.is_op3 = 1; 4340 4341 alu.dst.sel = tmp2; 4342 alu.dst.chan = 0; 4343 alu.dst.write = 1; 4344 4345 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4346 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4347 alu.src[2].sel = tmp2; 4348 alu.src[2].chan = 0; 4349 4350 alu.last = 1; 4351 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4352 return r; 4353 4354 /* tmp2.y = |src1| */ 4355 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4356 alu.op = ALU_OP3_CNDGE_INT; 4357 alu.is_op3 = 1; 4358 4359 alu.dst.sel = tmp2; 4360 alu.dst.chan = 1; 4361 alu.dst.write = 1; 4362 4363 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4364 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4365 alu.src[2].sel = tmp2; 4366 alu.src[2].chan = 1; 4367 4368 alu.last = 1; 4369 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4370 return r; 4371 4372 } 4373 4374 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 4375 if (ctx->bc->chip_class == CAYMAN) { 4376 /* tmp3.x = u2f(src2) */ 4377 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4378 alu.op = ALU_OP1_UINT_TO_FLT; 4379 4380 alu.dst.sel = tmp3; 4381 alu.dst.chan = 0; 4382 alu.dst.write = 1; 4383 4384 if (signed_op) { 4385 alu.src[0].sel = tmp2; 4386 alu.src[0].chan = 1; 4387 } else { 4388 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4389 } 4390 4391 alu.last = 1; 4392 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4393 return r; 4394 4395 /* tmp0.x = recip(tmp3.x) */ 4396 for (j = 0 ; j < 3; j++) { 4397 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4398 alu.op = ALU_OP1_RECIP_IEEE; 4399 4400 alu.dst.sel = tmp0; 4401 alu.dst.chan = j; 4402 alu.dst.write = (j == 0); 4403 4404 alu.src[0].sel = tmp3; 4405 alu.src[0].chan = 0; 4406 4407 if (j == 2) 4408 alu.last = 1; 4409 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4410 return r; 4411 } 4412 4413 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4414 alu.op = ALU_OP2_MUL; 4415 4416 alu.src[0].sel = tmp0; 4417 alu.src[0].chan = 0; 4418 4419 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4420 alu.src[1].value = 0x4f800000; 4421 4422 alu.dst.sel = tmp3; 4423 alu.dst.write = 1; 4424 alu.last = 1; 4425 r = r600_bytecode_add_alu(ctx->bc, &alu); 4426 if (r) 4427 return r; 4428 4429 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4430 alu.op = ALU_OP1_FLT_TO_UINT; 4431 4432 alu.dst.sel = tmp0; 4433 alu.dst.chan = 0; 4434 alu.dst.write = 1; 4435 4436 alu.src[0].sel = tmp3; 4437 alu.src[0].chan = 0; 4438 4439 alu.last = 1; 4440 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4441 return r; 4442 4443 } else { 4444 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4445 alu.op = ALU_OP1_RECIP_UINT; 4446 4447 alu.dst.sel = tmp0; 4448 alu.dst.chan = 0; 4449 alu.dst.write = 1; 4450 4451 if (signed_op) { 4452 alu.src[0].sel = tmp2; 4453 alu.src[0].chan = 1; 4454 } else { 4455 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4456 } 4457 4458 alu.last = 1; 4459 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4460 return r; 4461 } 4462 4463 /* 2. tmp0.z = lo (tmp0.x * src2) */ 4464 if (ctx->bc->chip_class == CAYMAN) { 4465 for (j = 0 ; j < 4; j++) { 4466 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4467 alu.op = ALU_OP2_MULLO_UINT; 4468 4469 alu.dst.sel = tmp0; 4470 alu.dst.chan = j; 4471 alu.dst.write = (j == 2); 4472 4473 alu.src[0].sel = tmp0; 4474 alu.src[0].chan = 0; 4475 if (signed_op) { 4476 alu.src[1].sel = tmp2; 4477 alu.src[1].chan = 1; 4478 } else { 4479 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4480 } 4481 4482 alu.last = (j == 3); 4483 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4484 return r; 4485 } 4486 } else { 4487 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4488 alu.op = ALU_OP2_MULLO_UINT; 4489 4490 alu.dst.sel = tmp0; 4491 alu.dst.chan = 2; 4492 alu.dst.write = 1; 4493 4494 alu.src[0].sel = tmp0; 4495 alu.src[0].chan = 0; 4496 if (signed_op) { 4497 alu.src[1].sel = tmp2; 4498 alu.src[1].chan = 1; 4499 } else { 4500 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4501 } 4502 4503 alu.last = 1; 4504 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4505 return r; 4506 } 4507 4508 /* 3. tmp0.w = -tmp0.z */ 4509 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4510 alu.op = ALU_OP2_SUB_INT; 4511 4512 alu.dst.sel = tmp0; 4513 alu.dst.chan = 3; 4514 alu.dst.write = 1; 4515 4516 alu.src[0].sel = V_SQ_ALU_SRC_0; 4517 alu.src[1].sel = tmp0; 4518 alu.src[1].chan = 2; 4519 4520 alu.last = 1; 4521 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4522 return r; 4523 4524 /* 4. tmp0.y = hi (tmp0.x * src2) */ 4525 if (ctx->bc->chip_class == CAYMAN) { 4526 for (j = 0 ; j < 4; j++) { 4527 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4528 alu.op = ALU_OP2_MULHI_UINT; 4529 4530 alu.dst.sel = tmp0; 4531 alu.dst.chan = j; 4532 alu.dst.write = (j == 1); 4533 4534 alu.src[0].sel = tmp0; 4535 alu.src[0].chan = 0; 4536 4537 if (signed_op) { 4538 alu.src[1].sel = tmp2; 4539 alu.src[1].chan = 1; 4540 } else { 4541 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4542 } 4543 alu.last = (j == 3); 4544 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4545 return r; 4546 } 4547 } else { 4548 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4549 alu.op = ALU_OP2_MULHI_UINT; 4550 4551 alu.dst.sel = tmp0; 4552 alu.dst.chan = 1; 4553 alu.dst.write = 1; 4554 4555 alu.src[0].sel = tmp0; 4556 alu.src[0].chan = 0; 4557 4558 if (signed_op) { 4559 alu.src[1].sel = tmp2; 4560 alu.src[1].chan = 1; 4561 } else { 4562 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4563 } 4564 4565 alu.last = 1; 4566 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4567 return r; 4568 } 4569 4570 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 4571 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4572 alu.op = ALU_OP3_CNDE_INT; 4573 alu.is_op3 = 1; 4574 4575 alu.dst.sel = tmp0; 4576 alu.dst.chan = 2; 4577 alu.dst.write = 1; 4578 4579 alu.src[0].sel = tmp0; 4580 alu.src[0].chan = 1; 4581 alu.src[1].sel = tmp0; 4582 alu.src[1].chan = 3; 4583 alu.src[2].sel = tmp0; 4584 alu.src[2].chan = 2; 4585 4586 alu.last = 1; 4587 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4588 return r; 4589 4590 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 4591 if (ctx->bc->chip_class == CAYMAN) { 4592 for (j = 0 ; j < 4; j++) { 4593 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4594 alu.op = ALU_OP2_MULHI_UINT; 4595 4596 alu.dst.sel = tmp0; 4597 alu.dst.chan = j; 4598 alu.dst.write = (j == 3); 4599 4600 alu.src[0].sel = tmp0; 4601 alu.src[0].chan = 2; 4602 4603 alu.src[1].sel = tmp0; 4604 alu.src[1].chan = 0; 4605 4606 alu.last = (j == 3); 4607 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4608 return r; 4609 } 4610 } else { 4611 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4612 alu.op = ALU_OP2_MULHI_UINT; 4613 4614 alu.dst.sel = tmp0; 4615 alu.dst.chan = 3; 4616 alu.dst.write = 1; 4617 4618 alu.src[0].sel = tmp0; 4619 alu.src[0].chan = 2; 4620 4621 alu.src[1].sel = tmp0; 4622 alu.src[1].chan = 0; 4623 4624 alu.last = 1; 4625 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4626 return r; 4627 } 4628 4629 /* 7. tmp1.x = tmp0.x - tmp0.w */ 4630 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4631 alu.op = ALU_OP2_SUB_INT; 4632 4633 alu.dst.sel = tmp1; 4634 alu.dst.chan = 0; 4635 alu.dst.write = 1; 4636 4637 alu.src[0].sel = tmp0; 4638 alu.src[0].chan = 0; 4639 alu.src[1].sel = tmp0; 4640 alu.src[1].chan = 3; 4641 4642 alu.last = 1; 4643 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4644 return r; 4645 4646 /* 8. tmp1.y = tmp0.x + tmp0.w */ 4647 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4648 alu.op = ALU_OP2_ADD_INT; 4649 4650 alu.dst.sel = tmp1; 4651 alu.dst.chan = 1; 4652 alu.dst.write = 1; 4653 4654 alu.src[0].sel = tmp0; 4655 alu.src[0].chan = 0; 4656 alu.src[1].sel = tmp0; 4657 alu.src[1].chan = 3; 4658 4659 alu.last = 1; 4660 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4661 return r; 4662 4663 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 4664 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4665 alu.op = ALU_OP3_CNDE_INT; 4666 alu.is_op3 = 1; 4667 4668 alu.dst.sel = tmp0; 4669 alu.dst.chan = 0; 4670 alu.dst.write = 1; 4671 4672 alu.src[0].sel = tmp0; 4673 alu.src[0].chan = 1; 4674 alu.src[1].sel = tmp1; 4675 alu.src[1].chan = 1; 4676 alu.src[2].sel = tmp1; 4677 alu.src[2].chan = 0; 4678 4679 alu.last = 1; 4680 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4681 return r; 4682 4683 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 4684 if (ctx->bc->chip_class == CAYMAN) { 4685 for (j = 0 ; j < 4; j++) { 4686 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4687 alu.op = ALU_OP2_MULHI_UINT; 4688 4689 alu.dst.sel = tmp0; 4690 alu.dst.chan = j; 4691 alu.dst.write = (j == 2); 4692 4693 alu.src[0].sel = tmp0; 4694 alu.src[0].chan = 0; 4695 4696 if (signed_op) { 4697 alu.src[1].sel = tmp2; 4698 alu.src[1].chan = 0; 4699 } else { 4700 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4701 } 4702 4703 alu.last = (j == 3); 4704 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4705 return r; 4706 } 4707 } else { 4708 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4709 alu.op = ALU_OP2_MULHI_UINT; 4710 4711 alu.dst.sel = tmp0; 4712 alu.dst.chan = 2; 4713 alu.dst.write = 1; 4714 4715 alu.src[0].sel = tmp0; 4716 alu.src[0].chan = 0; 4717 4718 if (signed_op) { 4719 alu.src[1].sel = tmp2; 4720 alu.src[1].chan = 0; 4721 } else { 4722 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4723 } 4724 4725 alu.last = 1; 4726 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4727 return r; 4728 } 4729 4730 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 4731 if (ctx->bc->chip_class == CAYMAN) { 4732 for (j = 0 ; j < 4; j++) { 4733 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4734 alu.op = ALU_OP2_MULLO_UINT; 4735 4736 alu.dst.sel = tmp0; 4737 alu.dst.chan = j; 4738 alu.dst.write = (j == 1); 4739 4740 if (signed_op) { 4741 alu.src[0].sel = tmp2; 4742 alu.src[0].chan = 1; 4743 } else { 4744 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4745 } 4746 4747 alu.src[1].sel = tmp0; 4748 alu.src[1].chan = 2; 4749 4750 alu.last = (j == 3); 4751 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4752 return r; 4753 } 4754 } else { 4755 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4756 alu.op = ALU_OP2_MULLO_UINT; 4757 4758 alu.dst.sel = tmp0; 4759 alu.dst.chan = 1; 4760 alu.dst.write = 1; 4761 4762 if (signed_op) { 4763 alu.src[0].sel = tmp2; 4764 alu.src[0].chan = 1; 4765 } else { 4766 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4767 } 4768 4769 alu.src[1].sel = tmp0; 4770 alu.src[1].chan = 2; 4771 4772 alu.last = 1; 4773 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4774 return r; 4775 } 4776 4777 /* 12. tmp0.w = src1 - tmp0.y = r */ 4778 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4779 alu.op = ALU_OP2_SUB_INT; 4780 4781 alu.dst.sel = tmp0; 4782 alu.dst.chan = 3; 4783 alu.dst.write = 1; 4784 4785 if (signed_op) { 4786 alu.src[0].sel = tmp2; 4787 alu.src[0].chan = 0; 4788 } else { 4789 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4790 } 4791 4792 alu.src[1].sel = tmp0; 4793 alu.src[1].chan = 1; 4794 4795 alu.last = 1; 4796 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4797 return r; 4798 4799 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 4800 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4801 alu.op = ALU_OP2_SETGE_UINT; 4802 4803 alu.dst.sel = tmp1; 4804 alu.dst.chan = 0; 4805 alu.dst.write = 1; 4806 4807 alu.src[0].sel = tmp0; 4808 alu.src[0].chan = 3; 4809 if (signed_op) { 4810 alu.src[1].sel = tmp2; 4811 alu.src[1].chan = 1; 4812 } else { 4813 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4814 } 4815 4816 alu.last = 1; 4817 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4818 return r; 4819 4820 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 4821 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4822 alu.op = ALU_OP2_SETGE_UINT; 4823 4824 alu.dst.sel = tmp1; 4825 alu.dst.chan = 1; 4826 alu.dst.write = 1; 4827 4828 if (signed_op) { 4829 alu.src[0].sel = tmp2; 4830 alu.src[0].chan = 0; 4831 } else { 4832 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4833 } 4834 4835 alu.src[1].sel = tmp0; 4836 alu.src[1].chan = 1; 4837 4838 alu.last = 1; 4839 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4840 return r; 4841 4842 if (mod) { /* UMOD */ 4843 4844 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 4845 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4846 alu.op = ALU_OP2_SUB_INT; 4847 4848 alu.dst.sel = tmp1; 4849 alu.dst.chan = 2; 4850 alu.dst.write = 1; 4851 4852 alu.src[0].sel = tmp0; 4853 alu.src[0].chan = 3; 4854 4855 if (signed_op) { 4856 alu.src[1].sel = tmp2; 4857 alu.src[1].chan = 1; 4858 } else { 4859 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4860 } 4861 4862 alu.last = 1; 4863 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4864 return r; 4865 4866 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 4867 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4868 alu.op = ALU_OP2_ADD_INT; 4869 4870 alu.dst.sel = tmp1; 4871 alu.dst.chan = 3; 4872 alu.dst.write = 1; 4873 4874 alu.src[0].sel = tmp0; 4875 alu.src[0].chan = 3; 4876 if (signed_op) { 4877 alu.src[1].sel = tmp2; 4878 alu.src[1].chan = 1; 4879 } else { 4880 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4881 } 4882 4883 alu.last = 1; 4884 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4885 return r; 4886 4887 } else { /* UDIV */ 4888 4889 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 4890 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4891 alu.op = ALU_OP2_ADD_INT; 4892 4893 alu.dst.sel = tmp1; 4894 alu.dst.chan = 2; 4895 alu.dst.write = 1; 4896 4897 alu.src[0].sel = tmp0; 4898 alu.src[0].chan = 2; 4899 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 4900 4901 alu.last = 1; 4902 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4903 return r; 4904 4905 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 4906 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4907 alu.op = ALU_OP2_ADD_INT; 4908 4909 alu.dst.sel = tmp1; 4910 alu.dst.chan = 3; 4911 alu.dst.write = 1; 4912 4913 alu.src[0].sel = tmp0; 4914 alu.src[0].chan = 2; 4915 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 4916 4917 alu.last = 1; 4918 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4919 return r; 4920 4921 } 4922 4923 /* 17. tmp1.x = tmp1.x & tmp1.y */ 4924 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4925 alu.op = ALU_OP2_AND_INT; 4926 4927 alu.dst.sel = tmp1; 4928 alu.dst.chan = 0; 4929 alu.dst.write = 1; 4930 4931 alu.src[0].sel = tmp1; 4932 alu.src[0].chan = 0; 4933 alu.src[1].sel = tmp1; 4934 alu.src[1].chan = 1; 4935 4936 alu.last = 1; 4937 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4938 return r; 4939 4940 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 4941 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 4942 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4943 alu.op = ALU_OP3_CNDE_INT; 4944 alu.is_op3 = 1; 4945 4946 alu.dst.sel = tmp0; 4947 alu.dst.chan = 2; 4948 alu.dst.write = 1; 4949 4950 alu.src[0].sel = tmp1; 4951 alu.src[0].chan = 0; 4952 alu.src[1].sel = tmp0; 4953 alu.src[1].chan = mod ? 3 : 2; 4954 alu.src[2].sel = tmp1; 4955 alu.src[2].chan = 2; 4956 4957 alu.last = 1; 4958 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4959 return r; 4960 4961 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 4962 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4963 alu.op = ALU_OP3_CNDE_INT; 4964 alu.is_op3 = 1; 4965 4966 if (signed_op) { 4967 alu.dst.sel = tmp0; 4968 alu.dst.chan = 2; 4969 alu.dst.write = 1; 4970 } else { 4971 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4972 } 4973 4974 alu.src[0].sel = tmp1; 4975 alu.src[0].chan = 1; 4976 alu.src[1].sel = tmp1; 4977 alu.src[1].chan = 3; 4978 alu.src[2].sel = tmp0; 4979 alu.src[2].chan = 2; 4980 4981 alu.last = 1; 4982 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4983 return r; 4984 4985 if (signed_op) { 4986 4987 /* fix the sign of the result */ 4988 4989 if (mod) { 4990 4991 /* tmp0.x = -tmp0.z */ 4992 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4993 alu.op = ALU_OP2_SUB_INT; 4994 4995 alu.dst.sel = tmp0; 4996 alu.dst.chan = 0; 4997 alu.dst.write = 1; 4998 4999 alu.src[0].sel = V_SQ_ALU_SRC_0; 5000 alu.src[1].sel = tmp0; 5001 alu.src[1].chan = 2; 5002 5003 alu.last = 1; 5004 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5005 return r; 5006 5007 /* sign of the remainder is the same as the sign of src0 */ 5008 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 5009 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5010 alu.op = ALU_OP3_CNDGE_INT; 5011 alu.is_op3 = 1; 5012 5013 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5014 5015 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5016 alu.src[1].sel = tmp0; 5017 alu.src[1].chan = 2; 5018 alu.src[2].sel = tmp0; 5019 alu.src[2].chan = 0; 5020 5021 alu.last = 1; 5022 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5023 return r; 5024 5025 } else { 5026 5027 /* tmp0.x = -tmp0.z */ 5028 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5029 alu.op = ALU_OP2_SUB_INT; 5030 5031 alu.dst.sel = tmp0; 5032 alu.dst.chan = 0; 5033 alu.dst.write = 1; 5034 5035 alu.src[0].sel = V_SQ_ALU_SRC_0; 5036 alu.src[1].sel = tmp0; 5037 alu.src[1].chan = 2; 5038 5039 alu.last = 1; 5040 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5041 return r; 5042 5043 /* fix the quotient sign (same as the sign of src0*src1) */ 5044 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 5045 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5046 alu.op = ALU_OP3_CNDGE_INT; 5047 alu.is_op3 = 1; 5048 5049 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5050 5051 alu.src[0].sel = tmp2; 5052 alu.src[0].chan = 2; 5053 alu.src[1].sel = tmp0; 5054 alu.src[1].chan = 2; 5055 alu.src[2].sel = tmp0; 5056 alu.src[2].chan = 0; 5057 5058 alu.last = 1; 5059 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5060 return r; 5061 } 5062 } 5063 } 5064 return 0; 5065} 5066 5067static int tgsi_udiv(struct r600_shader_ctx *ctx) 5068{ 5069 return tgsi_divmod(ctx, 0, 0); 5070} 5071 5072static int tgsi_umod(struct r600_shader_ctx *ctx) 5073{ 5074 return tgsi_divmod(ctx, 1, 0); 5075} 5076 5077static int tgsi_idiv(struct r600_shader_ctx *ctx) 5078{ 5079 return tgsi_divmod(ctx, 0, 1); 5080} 5081 5082static int tgsi_imod(struct r600_shader_ctx *ctx) 5083{ 5084 return tgsi_divmod(ctx, 1, 1); 5085} 5086 5087 5088static int tgsi_f2i(struct r600_shader_ctx *ctx) 5089{ 5090 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5091 struct r600_bytecode_alu alu; 5092 int i, r; 5093 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5094 int last_inst = tgsi_last_instruction(write_mask); 5095 5096 for (i = 0; i < 4; i++) { 5097 if (!(write_mask & (1<<i))) 5098 continue; 5099 5100 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5101 alu.op = ALU_OP1_TRUNC; 5102 5103 alu.dst.sel = ctx->temp_reg; 5104 alu.dst.chan = i; 5105 alu.dst.write = 1; 5106 5107 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5108 if (i == last_inst) 5109 alu.last = 1; 5110 r = r600_bytecode_add_alu(ctx->bc, &alu); 5111 if (r) 5112 return r; 5113 } 5114 5115 for (i = 0; i < 4; i++) { 5116 if (!(write_mask & (1<<i))) 5117 continue; 5118 5119 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5120 alu.op = ctx->inst_info->op; 5121 5122 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5123 5124 alu.src[0].sel = ctx->temp_reg; 5125 alu.src[0].chan = i; 5126 5127 if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT) 5128 alu.last = 1; 5129 r = r600_bytecode_add_alu(ctx->bc, &alu); 5130 if (r) 5131 return r; 5132 } 5133 5134 return 0; 5135} 5136 5137static int tgsi_iabs(struct r600_shader_ctx *ctx) 5138{ 5139 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5140 struct r600_bytecode_alu alu; 5141 int i, r; 5142 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5143 int last_inst = tgsi_last_instruction(write_mask); 5144 5145 /* tmp = -src */ 5146 for (i = 0; i < 4; i++) { 5147 if (!(write_mask & (1<<i))) 5148 continue; 5149 5150 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5151 alu.op = ALU_OP2_SUB_INT; 5152 5153 alu.dst.sel = ctx->temp_reg; 5154 alu.dst.chan = i; 5155 alu.dst.write = 1; 5156 5157 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5158 alu.src[0].sel = V_SQ_ALU_SRC_0; 5159 5160 if (i == last_inst) 5161 alu.last = 1; 5162 r = r600_bytecode_add_alu(ctx->bc, &alu); 5163 if (r) 5164 return r; 5165 } 5166 5167 /* dst = (src >= 0 ? src : tmp) */ 5168 for (i = 0; i < 4; i++) { 5169 if (!(write_mask & (1<<i))) 5170 continue; 5171 5172 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5173 alu.op = ALU_OP3_CNDGE_INT; 5174 alu.is_op3 = 1; 5175 alu.dst.write = 1; 5176 5177 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5178 5179 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5180 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5181 alu.src[2].sel = ctx->temp_reg; 5182 alu.src[2].chan = i; 5183 5184 if (i == last_inst) 5185 alu.last = 1; 5186 r = r600_bytecode_add_alu(ctx->bc, &alu); 5187 if (r) 5188 return r; 5189 } 5190 return 0; 5191} 5192 5193static int tgsi_issg(struct r600_shader_ctx *ctx) 5194{ 5195 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5196 struct r600_bytecode_alu alu; 5197 int i, r; 5198 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5199 int last_inst = tgsi_last_instruction(write_mask); 5200 5201 /* tmp = (src >= 0 ? src : -1) */ 5202 for (i = 0; i < 4; i++) { 5203 if (!(write_mask & (1<<i))) 5204 continue; 5205 5206 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5207 alu.op = ALU_OP3_CNDGE_INT; 5208 alu.is_op3 = 1; 5209 5210 alu.dst.sel = ctx->temp_reg; 5211 alu.dst.chan = i; 5212 alu.dst.write = 1; 5213 5214 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5215 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5216 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 5217 5218 if (i == last_inst) 5219 alu.last = 1; 5220 r = r600_bytecode_add_alu(ctx->bc, &alu); 5221 if (r) 5222 return r; 5223 } 5224 5225 /* dst = (tmp > 0 ? 1 : tmp) */ 5226 for (i = 0; i < 4; i++) { 5227 if (!(write_mask & (1<<i))) 5228 continue; 5229 5230 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5231 alu.op = ALU_OP3_CNDGT_INT; 5232 alu.is_op3 = 1; 5233 alu.dst.write = 1; 5234 5235 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5236 5237 alu.src[0].sel = ctx->temp_reg; 5238 alu.src[0].chan = i; 5239 5240 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 5241 5242 alu.src[2].sel = ctx->temp_reg; 5243 alu.src[2].chan = i; 5244 5245 if (i == last_inst) 5246 alu.last = 1; 5247 r = r600_bytecode_add_alu(ctx->bc, &alu); 5248 if (r) 5249 return r; 5250 } 5251 return 0; 5252} 5253 5254 5255 5256static int tgsi_ssg(struct r600_shader_ctx *ctx) 5257{ 5258 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5259 struct r600_bytecode_alu alu; 5260 int i, r; 5261 5262 /* tmp = (src > 0 ? 1 : src) */ 5263 for (i = 0; i < 4; i++) { 5264 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5265 alu.op = ALU_OP3_CNDGT; 5266 alu.is_op3 = 1; 5267 5268 alu.dst.sel = ctx->temp_reg; 5269 alu.dst.chan = i; 5270 5271 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5272 alu.src[1].sel = V_SQ_ALU_SRC_1; 5273 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 5274 5275 if (i == 3) 5276 alu.last = 1; 5277 r = r600_bytecode_add_alu(ctx->bc, &alu); 5278 if (r) 5279 return r; 5280 } 5281 5282 /* dst = (-tmp > 0 ? -1 : tmp) */ 5283 for (i = 0; i < 4; i++) { 5284 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5285 alu.op = ALU_OP3_CNDGT; 5286 alu.is_op3 = 1; 5287 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5288 5289 alu.src[0].sel = ctx->temp_reg; 5290 alu.src[0].chan = i; 5291 alu.src[0].neg = 1; 5292 5293 alu.src[1].sel = V_SQ_ALU_SRC_1; 5294 alu.src[1].neg = 1; 5295 5296 alu.src[2].sel = ctx->temp_reg; 5297 alu.src[2].chan = i; 5298 5299 if (i == 3) 5300 alu.last = 1; 5301 r = r600_bytecode_add_alu(ctx->bc, &alu); 5302 if (r) 5303 return r; 5304 } 5305 return 0; 5306} 5307 5308static int tgsi_bfi(struct r600_shader_ctx *ctx) 5309{ 5310 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5311 struct r600_bytecode_alu alu; 5312 int i, r, t1, t2; 5313 5314 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5315 int last_inst = tgsi_last_instruction(write_mask); 5316 5317 t1 = ctx->temp_reg; 5318 5319 for (i = 0; i < 4; i++) { 5320 if (!(write_mask & (1<<i))) 5321 continue; 5322 5323 /* create mask tmp */ 5324 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5325 alu.op = ALU_OP2_BFM_INT; 5326 alu.dst.sel = t1; 5327 alu.dst.chan = i; 5328 alu.dst.write = 1; 5329 alu.last = i == last_inst; 5330 5331 r600_bytecode_src(&alu.src[0], &ctx->src[3], i); 5332 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 5333 5334 r = r600_bytecode_add_alu(ctx->bc, &alu); 5335 if (r) 5336 return r; 5337 } 5338 5339 t2 = r600_get_temp(ctx); 5340 5341 for (i = 0; i < 4; i++) { 5342 if (!(write_mask & (1<<i))) 5343 continue; 5344 5345 /* shift insert left */ 5346 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5347 alu.op = ALU_OP2_LSHL_INT; 5348 alu.dst.sel = t2; 5349 alu.dst.chan = i; 5350 alu.dst.write = 1; 5351 alu.last = i == last_inst; 5352 5353 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5354 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 5355 5356 r = r600_bytecode_add_alu(ctx->bc, &alu); 5357 if (r) 5358 return r; 5359 } 5360 5361 for (i = 0; i < 4; i++) { 5362 if (!(write_mask & (1<<i))) 5363 continue; 5364 5365 /* actual bitfield insert */ 5366 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5367 alu.op = ALU_OP3_BFI_INT; 5368 alu.is_op3 = 1; 5369 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5370 alu.dst.chan = i; 5371 alu.dst.write = 1; 5372 alu.last = i == last_inst; 5373 5374 alu.src[0].sel = t1; 5375 alu.src[0].chan = i; 5376 alu.src[1].sel = t2; 5377 alu.src[1].chan = i; 5378 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 5379 5380 r = r600_bytecode_add_alu(ctx->bc, &alu); 5381 if (r) 5382 return r; 5383 } 5384 5385 return 0; 5386} 5387 5388static int tgsi_msb(struct r600_shader_ctx *ctx) 5389{ 5390 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5391 struct r600_bytecode_alu alu; 5392 int i, r, t1, t2; 5393 5394 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5395 int last_inst = tgsi_last_instruction(write_mask); 5396 5397 assert(ctx->inst_info->op == ALU_OP1_FFBH_INT || 5398 ctx->inst_info->op == ALU_OP1_FFBH_UINT); 5399 5400 t1 = ctx->temp_reg; 5401 5402 /* bit position is indexed from lsb by TGSI, and from msb by the hardware */ 5403 for (i = 0; i < 4; i++) { 5404 if (!(write_mask & (1<<i))) 5405 continue; 5406 5407 /* t1 = FFBH_INT / FFBH_UINT */ 5408 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5409 alu.op = ctx->inst_info->op; 5410 alu.dst.sel = t1; 5411 alu.dst.chan = i; 5412 alu.dst.write = 1; 5413 alu.last = i == last_inst; 5414 5415 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5416 5417 r = r600_bytecode_add_alu(ctx->bc, &alu); 5418 if (r) 5419 return r; 5420 } 5421 5422 t2 = r600_get_temp(ctx); 5423 5424 for (i = 0; i < 4; i++) { 5425 if (!(write_mask & (1<<i))) 5426 continue; 5427 5428 /* t2 = 31 - t1 */ 5429 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5430 alu.op = ALU_OP2_SUB_INT; 5431 alu.dst.sel = t2; 5432 alu.dst.chan = i; 5433 alu.dst.write = 1; 5434 alu.last = i == last_inst; 5435 5436 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 5437 alu.src[0].value = 31; 5438 alu.src[1].sel = t1; 5439 alu.src[1].chan = i; 5440 5441 r = r600_bytecode_add_alu(ctx->bc, &alu); 5442 if (r) 5443 return r; 5444 } 5445 5446 for (i = 0; i < 4; i++) { 5447 if (!(write_mask & (1<<i))) 5448 continue; 5449 5450 /* result = t1 >= 0 ? t2 : t1 */ 5451 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5452 alu.op = ALU_OP3_CNDGE_INT; 5453 alu.is_op3 = 1; 5454 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5455 alu.dst.chan = i; 5456 alu.dst.write = 1; 5457 alu.last = i == last_inst; 5458 5459 alu.src[0].sel = t1; 5460 alu.src[0].chan = i; 5461 alu.src[1].sel = t2; 5462 alu.src[1].chan = i; 5463 alu.src[2].sel = t1; 5464 alu.src[2].chan = i; 5465 5466 r = r600_bytecode_add_alu(ctx->bc, &alu); 5467 if (r) 5468 return r; 5469 } 5470 5471 return 0; 5472} 5473 5474static int tgsi_interp_egcm(struct r600_shader_ctx *ctx) 5475{ 5476 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5477 struct r600_bytecode_alu alu; 5478 int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti; 5479 unsigned location; 5480 int input; 5481 5482 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); 5483 5484 input = inst->Src[0].Register.Index; 5485 5486 /* Interpolators have been marked for use already by allocate_system_value_inputs */ 5487 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 5488 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5489 location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */ 5490 } 5491 else { 5492 location = TGSI_INTERPOLATE_LOC_CENTROID; 5493 } 5494 5495 k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location); 5496 if (k < 0) 5497 k = 0; 5498 interp_gpr = ctx->eg_interpolators[k].ij_index / 2; 5499 interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2); 5500 5501 /* NOTE: currently offset is not perspective correct */ 5502 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 5503 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5504 int sample_gpr = -1; 5505 int gradientsH, gradientsV; 5506 struct r600_bytecode_tex tex; 5507 5508 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5509 sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]); 5510 } 5511 5512 gradientsH = r600_get_temp(ctx); 5513 gradientsV = r600_get_temp(ctx); 5514 for (i = 0; i < 2; i++) { 5515 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 5516 tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V; 5517 tex.src_gpr = interp_gpr; 5518 tex.src_sel_x = interp_base_chan + 0; 5519 tex.src_sel_y = interp_base_chan + 1; 5520 tex.src_sel_z = 0; 5521 tex.src_sel_w = 0; 5522 tex.dst_gpr = i == 0 ? gradientsH : gradientsV; 5523 tex.dst_sel_x = 0; 5524 tex.dst_sel_y = 1; 5525 tex.dst_sel_z = 7; 5526 tex.dst_sel_w = 7; 5527 tex.inst_mod = 1; // Use per pixel gradient calculation 5528 tex.sampler_id = 0; 5529 tex.resource_id = tex.sampler_id; 5530 r = r600_bytecode_add_tex(ctx->bc, &tex); 5531 if (r) 5532 return r; 5533 } 5534 5535 for (i = 0; i < 2; i++) { 5536 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5537 alu.op = ALU_OP3_MULADD; 5538 alu.is_op3 = 1; 5539 alu.src[0].sel = gradientsH; 5540 alu.src[0].chan = i; 5541 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5542 alu.src[1].sel = sample_gpr; 5543 alu.src[1].chan = 2; 5544 } 5545 else { 5546 r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 5547 } 5548 alu.src[2].sel = interp_gpr; 5549 alu.src[2].chan = interp_base_chan + i; 5550 alu.dst.sel = ctx->temp_reg; 5551 alu.dst.chan = i; 5552 alu.last = i == 1; 5553 5554 r = r600_bytecode_add_alu(ctx->bc, &alu); 5555 if (r) 5556 return r; 5557 } 5558 5559 for (i = 0; i < 2; i++) { 5560 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5561 alu.op = ALU_OP3_MULADD; 5562 alu.is_op3 = 1; 5563 alu.src[0].sel = gradientsV; 5564 alu.src[0].chan = i; 5565 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5566 alu.src[1].sel = sample_gpr; 5567 alu.src[1].chan = 3; 5568 } 5569 else { 5570 r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 5571 } 5572 alu.src[2].sel = ctx->temp_reg; 5573 alu.src[2].chan = i; 5574 alu.dst.sel = ctx->temp_reg; 5575 alu.dst.chan = i; 5576 alu.last = i == 1; 5577 5578 r = r600_bytecode_add_alu(ctx->bc, &alu); 5579 if (r) 5580 return r; 5581 } 5582 } 5583 5584 tmp = r600_get_temp(ctx); 5585 for (i = 0; i < 8; i++) { 5586 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5587 alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY; 5588 5589 alu.dst.sel = tmp; 5590 if ((i > 1 && i < 6)) { 5591 alu.dst.write = 1; 5592 } 5593 else { 5594 alu.dst.write = 0; 5595 } 5596 alu.dst.chan = i % 4; 5597 5598 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 5599 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5600 alu.src[0].sel = ctx->temp_reg; 5601 alu.src[0].chan = 1 - (i % 2); 5602 } else { 5603 alu.src[0].sel = interp_gpr; 5604 alu.src[0].chan = interp_base_chan + 1 - (i % 2); 5605 } 5606 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 5607 alu.src[1].chan = 0; 5608 5609 alu.last = i % 4 == 3; 5610 alu.bank_swizzle_force = SQ_ALU_VEC_210; 5611 5612 r = r600_bytecode_add_alu(ctx->bc, &alu); 5613 if (r) 5614 return r; 5615 } 5616 5617 // INTERP can't swizzle dst 5618 lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5619 for (i = 0; i <= lasti; i++) { 5620 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5621 continue; 5622 5623 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5624 alu.op = ALU_OP1_MOV; 5625 alu.src[0].sel = tmp; 5626 alu.src[0].chan = ctx->src[0].swizzle[i]; 5627 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5628 alu.dst.write = 1; 5629 alu.last = i == lasti; 5630 r = r600_bytecode_add_alu(ctx->bc, &alu); 5631 if (r) 5632 return r; 5633 } 5634 5635 return 0; 5636} 5637 5638 5639static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 5640{ 5641 struct r600_bytecode_alu alu; 5642 int i, r; 5643 5644 for (i = 0; i < 4; i++) { 5645 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5646 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 5647 alu.op = ALU_OP0_NOP; 5648 alu.dst.chan = i; 5649 } else { 5650 alu.op = ALU_OP1_MOV; 5651 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5652 alu.src[0].sel = ctx->temp_reg; 5653 alu.src[0].chan = i; 5654 } 5655 if (i == 3) { 5656 alu.last = 1; 5657 } 5658 r = r600_bytecode_add_alu(ctx->bc, &alu); 5659 if (r) 5660 return r; 5661 } 5662 return 0; 5663} 5664 5665static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, 5666 unsigned temp, int chan, 5667 struct r600_bytecode_alu_src *bc_src, 5668 const struct r600_shader_src *shader_src) 5669{ 5670 struct r600_bytecode_alu alu; 5671 int r; 5672 5673 r600_bytecode_src(bc_src, shader_src, chan); 5674 5675 /* op3 operands don't support abs modifier */ 5676 if (bc_src->abs) { 5677 assert(temp!=0); /* we actually need the extra register, make sure it is allocated. */ 5678 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5679 alu.op = ALU_OP1_MOV; 5680 alu.dst.sel = temp; 5681 alu.dst.chan = chan; 5682 alu.dst.write = 1; 5683 5684 alu.src[0] = *bc_src; 5685 alu.last = true; // sufficient? 5686 r = r600_bytecode_add_alu(ctx->bc, &alu); 5687 if (r) 5688 return r; 5689 5690 memset(bc_src, 0, sizeof(*bc_src)); 5691 bc_src->sel = temp; 5692 bc_src->chan = chan; 5693 } 5694 return 0; 5695} 5696 5697static int tgsi_op3(struct r600_shader_ctx *ctx) 5698{ 5699 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5700 struct r600_bytecode_alu alu; 5701 int i, j, r; 5702 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5703 int temp_regs[4]; 5704 5705 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 5706 temp_regs[j] = 0; 5707 if (ctx->src[j].abs) 5708 temp_regs[j] = r600_get_temp(ctx); 5709 } 5710 for (i = 0; i < lasti + 1; i++) { 5711 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5712 continue; 5713 5714 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5715 alu.op = ctx->inst_info->op; 5716 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 5717 r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]); 5718 if (r) 5719 return r; 5720 } 5721 5722 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5723 alu.dst.chan = i; 5724 alu.dst.write = 1; 5725 alu.is_op3 = 1; 5726 if (i == lasti) { 5727 alu.last = 1; 5728 } 5729 r = r600_bytecode_add_alu(ctx->bc, &alu); 5730 if (r) 5731 return r; 5732 } 5733 return 0; 5734} 5735 5736static int tgsi_dp(struct r600_shader_ctx *ctx) 5737{ 5738 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5739 struct r600_bytecode_alu alu; 5740 int i, j, r; 5741 5742 for (i = 0; i < 4; i++) { 5743 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5744 alu.op = ctx->inst_info->op; 5745 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 5746 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 5747 } 5748 5749 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5750 alu.dst.chan = i; 5751 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 5752 /* handle some special cases */ 5753 switch (inst->Instruction.Opcode) { 5754 case TGSI_OPCODE_DP2: 5755 if (i > 1) { 5756 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 5757 alu.src[0].chan = alu.src[1].chan = 0; 5758 } 5759 break; 5760 case TGSI_OPCODE_DP3: 5761 if (i > 2) { 5762 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 5763 alu.src[0].chan = alu.src[1].chan = 0; 5764 } 5765 break; 5766 case TGSI_OPCODE_DPH: 5767 if (i == 3) { 5768 alu.src[0].sel = V_SQ_ALU_SRC_1; 5769 alu.src[0].chan = 0; 5770 alu.src[0].neg = 0; 5771 } 5772 break; 5773 default: 5774 break; 5775 } 5776 if (i == 3) { 5777 alu.last = 1; 5778 } 5779 r = r600_bytecode_add_alu(ctx->bc, &alu); 5780 if (r) 5781 return r; 5782 } 5783 return 0; 5784} 5785 5786static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 5787 unsigned index) 5788{ 5789 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5790 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 5791 inst->Src[index].Register.File != TGSI_FILE_INPUT && 5792 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 5793 ctx->src[index].neg || ctx->src[index].abs || 5794 (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == TGSI_PROCESSOR_GEOMETRY); 5795} 5796 5797static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 5798 unsigned index) 5799{ 5800 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5801 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 5802} 5803 5804static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) 5805{ 5806 struct r600_bytecode_vtx vtx; 5807 struct r600_bytecode_alu alu; 5808 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5809 int src_gpr, r, i; 5810 int id = tgsi_tex_get_src_gpr(ctx, 1); 5811 5812 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 5813 if (src_requires_loading) { 5814 for (i = 0; i < 4; i++) { 5815 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5816 alu.op = ALU_OP1_MOV; 5817 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5818 alu.dst.sel = ctx->temp_reg; 5819 alu.dst.chan = i; 5820 if (i == 3) 5821 alu.last = 1; 5822 alu.dst.write = 1; 5823 r = r600_bytecode_add_alu(ctx->bc, &alu); 5824 if (r) 5825 return r; 5826 } 5827 src_gpr = ctx->temp_reg; 5828 } 5829 5830 memset(&vtx, 0, sizeof(vtx)); 5831 vtx.op = FETCH_OP_VFETCH; 5832 vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; 5833 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 5834 vtx.src_gpr = src_gpr; 5835 vtx.mega_fetch_count = 16; 5836 vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 5837 vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 5838 vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 5839 vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 5840 vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 5841 vtx.use_const_fields = 1; 5842 5843 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 5844 return r; 5845 5846 if (ctx->bc->chip_class >= EVERGREEN) 5847 return 0; 5848 5849 for (i = 0; i < 4; i++) { 5850 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5851 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5852 continue; 5853 5854 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5855 alu.op = ALU_OP2_AND_INT; 5856 5857 alu.dst.chan = i; 5858 alu.dst.sel = vtx.dst_gpr; 5859 alu.dst.write = 1; 5860 5861 alu.src[0].sel = vtx.dst_gpr; 5862 alu.src[0].chan = i; 5863 5864 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL; 5865 alu.src[1].sel += (id * 2); 5866 alu.src[1].chan = i % 4; 5867 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 5868 5869 if (i == lasti) 5870 alu.last = 1; 5871 r = r600_bytecode_add_alu(ctx->bc, &alu); 5872 if (r) 5873 return r; 5874 } 5875 5876 if (inst->Dst[0].Register.WriteMask & 3) { 5877 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5878 alu.op = ALU_OP2_OR_INT; 5879 5880 alu.dst.chan = 3; 5881 alu.dst.sel = vtx.dst_gpr; 5882 alu.dst.write = 1; 5883 5884 alu.src[0].sel = vtx.dst_gpr; 5885 alu.src[0].chan = 3; 5886 5887 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1; 5888 alu.src[1].chan = 0; 5889 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 5890 5891 alu.last = 1; 5892 r = r600_bytecode_add_alu(ctx->bc, &alu); 5893 if (r) 5894 return r; 5895 } 5896 return 0; 5897} 5898 5899static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) 5900{ 5901 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5902 struct r600_bytecode_alu alu; 5903 int r; 5904 int id = tgsi_tex_get_src_gpr(ctx, 1); 5905 5906 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5907 alu.op = ALU_OP1_MOV; 5908 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 5909 if (ctx->bc->chip_class >= EVERGREEN) { 5910 /* channel 0 or 2 of each word */ 5911 alu.src[0].sel += (id / 2); 5912 alu.src[0].chan = (id % 2) * 2; 5913 } else { 5914 /* r600 we have them at channel 2 of the second dword */ 5915 alu.src[0].sel += (id * 2) + 1; 5916 alu.src[0].chan = 1; 5917 } 5918 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 5919 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 5920 alu.last = 1; 5921 r = r600_bytecode_add_alu(ctx->bc, &alu); 5922 if (r) 5923 return r; 5924 return 0; 5925} 5926 5927static int tgsi_tex(struct r600_shader_ctx *ctx) 5928{ 5929 static float one_point_five = 1.5f; 5930 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5931 struct r600_bytecode_tex tex; 5932 struct r600_bytecode_alu alu; 5933 unsigned src_gpr; 5934 int r, i, j; 5935 int opcode; 5936 bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing && 5937 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 5938 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || 5939 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); 5940 5941 bool txf_add_offsets = inst->Texture.NumOffsets && 5942 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 5943 inst->Texture.Texture != TGSI_TEXTURE_BUFFER; 5944 5945 /* Texture fetch instructions can only use gprs as source. 5946 * Also they cannot negate the source or take the absolute value */ 5947 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && 5948 inst->Instruction.Opcode != TGSI_OPCODE_TXQS && 5949 tgsi_tex_src_requires_loading(ctx, 0)) || 5950 read_compressed_msaa || txf_add_offsets; 5951 5952 boolean src_loaded = FALSE; 5953 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; 5954 int8_t offset_x = 0, offset_y = 0, offset_z = 0; 5955 boolean has_txq_cube_array_z = false; 5956 unsigned sampler_index_mode; 5957 5958 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && 5959 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 5960 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) 5961 if (inst->Dst[0].Register.WriteMask & 4) { 5962 ctx->shader->has_txq_cube_array_z_comp = true; 5963 has_txq_cube_array_z = true; 5964 } 5965 5966 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 5967 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 5968 inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || 5969 inst->Instruction.Opcode == TGSI_OPCODE_TG4) 5970 sampler_src_reg = 2; 5971 5972 /* TGSI moves the sampler to src reg 3 for TXD */ 5973 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) 5974 sampler_src_reg = 3; 5975 5976 sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 5977 5978 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 5979 5980 if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { 5981 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { 5982 ctx->shader->uses_tex_buffers = true; 5983 return r600_do_buffer_txq(ctx); 5984 } 5985 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 5986 if (ctx->bc->chip_class < EVERGREEN) 5987 ctx->shader->uses_tex_buffers = true; 5988 return do_vtx_fetch_inst(ctx, src_requires_loading); 5989 } 5990 } 5991 5992 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 5993 int out_chan; 5994 /* Add perspective divide */ 5995 if (ctx->bc->chip_class == CAYMAN) { 5996 out_chan = 2; 5997 for (i = 0; i < 3; i++) { 5998 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5999 alu.op = ALU_OP1_RECIP_IEEE; 6000 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6001 6002 alu.dst.sel = ctx->temp_reg; 6003 alu.dst.chan = i; 6004 if (i == 2) 6005 alu.last = 1; 6006 if (out_chan == i) 6007 alu.dst.write = 1; 6008 r = r600_bytecode_add_alu(ctx->bc, &alu); 6009 if (r) 6010 return r; 6011 } 6012 6013 } else { 6014 out_chan = 3; 6015 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6016 alu.op = ALU_OP1_RECIP_IEEE; 6017 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6018 6019 alu.dst.sel = ctx->temp_reg; 6020 alu.dst.chan = out_chan; 6021 alu.last = 1; 6022 alu.dst.write = 1; 6023 r = r600_bytecode_add_alu(ctx->bc, &alu); 6024 if (r) 6025 return r; 6026 } 6027 6028 for (i = 0; i < 3; i++) { 6029 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6030 alu.op = ALU_OP2_MUL; 6031 alu.src[0].sel = ctx->temp_reg; 6032 alu.src[0].chan = out_chan; 6033 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6034 alu.dst.sel = ctx->temp_reg; 6035 alu.dst.chan = i; 6036 alu.dst.write = 1; 6037 r = r600_bytecode_add_alu(ctx->bc, &alu); 6038 if (r) 6039 return r; 6040 } 6041 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6042 alu.op = ALU_OP1_MOV; 6043 alu.src[0].sel = V_SQ_ALU_SRC_1; 6044 alu.src[0].chan = 0; 6045 alu.dst.sel = ctx->temp_reg; 6046 alu.dst.chan = 3; 6047 alu.last = 1; 6048 alu.dst.write = 1; 6049 r = r600_bytecode_add_alu(ctx->bc, &alu); 6050 if (r) 6051 return r; 6052 src_loaded = TRUE; 6053 src_gpr = ctx->temp_reg; 6054 } 6055 6056 6057 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 6058 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6059 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6060 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 6061 inst->Instruction.Opcode != TGSI_OPCODE_TXQ && 6062 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { 6063 6064 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 6065 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 6066 6067 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 6068 for (i = 0; i < 4; i++) { 6069 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6070 alu.op = ALU_OP2_CUBE; 6071 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 6072 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 6073 alu.dst.sel = ctx->temp_reg; 6074 alu.dst.chan = i; 6075 if (i == 3) 6076 alu.last = 1; 6077 alu.dst.write = 1; 6078 r = r600_bytecode_add_alu(ctx->bc, &alu); 6079 if (r) 6080 return r; 6081 } 6082 6083 /* tmp1.z = RCP_e(|tmp1.z|) */ 6084 if (ctx->bc->chip_class == CAYMAN) { 6085 for (i = 0; i < 3; i++) { 6086 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6087 alu.op = ALU_OP1_RECIP_IEEE; 6088 alu.src[0].sel = ctx->temp_reg; 6089 alu.src[0].chan = 2; 6090 alu.src[0].abs = 1; 6091 alu.dst.sel = ctx->temp_reg; 6092 alu.dst.chan = i; 6093 if (i == 2) 6094 alu.dst.write = 1; 6095 if (i == 2) 6096 alu.last = 1; 6097 r = r600_bytecode_add_alu(ctx->bc, &alu); 6098 if (r) 6099 return r; 6100 } 6101 } else { 6102 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6103 alu.op = ALU_OP1_RECIP_IEEE; 6104 alu.src[0].sel = ctx->temp_reg; 6105 alu.src[0].chan = 2; 6106 alu.src[0].abs = 1; 6107 alu.dst.sel = ctx->temp_reg; 6108 alu.dst.chan = 2; 6109 alu.dst.write = 1; 6110 alu.last = 1; 6111 r = r600_bytecode_add_alu(ctx->bc, &alu); 6112 if (r) 6113 return r; 6114 } 6115 6116 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 6117 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 6118 * muladd has no writemask, have to use another temp 6119 */ 6120 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6121 alu.op = ALU_OP3_MULADD; 6122 alu.is_op3 = 1; 6123 6124 alu.src[0].sel = ctx->temp_reg; 6125 alu.src[0].chan = 0; 6126 alu.src[1].sel = ctx->temp_reg; 6127 alu.src[1].chan = 2; 6128 6129 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 6130 alu.src[2].chan = 0; 6131 alu.src[2].value = *(uint32_t *)&one_point_five; 6132 6133 alu.dst.sel = ctx->temp_reg; 6134 alu.dst.chan = 0; 6135 alu.dst.write = 1; 6136 6137 r = r600_bytecode_add_alu(ctx->bc, &alu); 6138 if (r) 6139 return r; 6140 6141 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6142 alu.op = ALU_OP3_MULADD; 6143 alu.is_op3 = 1; 6144 6145 alu.src[0].sel = ctx->temp_reg; 6146 alu.src[0].chan = 1; 6147 alu.src[1].sel = ctx->temp_reg; 6148 alu.src[1].chan = 2; 6149 6150 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 6151 alu.src[2].chan = 0; 6152 alu.src[2].value = *(uint32_t *)&one_point_five; 6153 6154 alu.dst.sel = ctx->temp_reg; 6155 alu.dst.chan = 1; 6156 alu.dst.write = 1; 6157 6158 alu.last = 1; 6159 r = r600_bytecode_add_alu(ctx->bc, &alu); 6160 if (r) 6161 return r; 6162 /* write initial compare value into Z component 6163 - W src 0 for shadow cube 6164 - X src 1 for shadow cube array */ 6165 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6166 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6167 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6168 alu.op = ALU_OP1_MOV; 6169 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) 6170 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 6171 else 6172 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6173 alu.dst.sel = ctx->temp_reg; 6174 alu.dst.chan = 2; 6175 alu.dst.write = 1; 6176 alu.last = 1; 6177 r = r600_bytecode_add_alu(ctx->bc, &alu); 6178 if (r) 6179 return r; 6180 } 6181 6182 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6183 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6184 if (ctx->bc->chip_class >= EVERGREEN) { 6185 int mytmp = r600_get_temp(ctx); 6186 static const float eight = 8.0f; 6187 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6188 alu.op = ALU_OP1_MOV; 6189 alu.src[0].sel = ctx->temp_reg; 6190 alu.src[0].chan = 3; 6191 alu.dst.sel = mytmp; 6192 alu.dst.chan = 0; 6193 alu.dst.write = 1; 6194 alu.last = 1; 6195 r = r600_bytecode_add_alu(ctx->bc, &alu); 6196 if (r) 6197 return r; 6198 6199 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ 6200 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6201 alu.op = ALU_OP3_MULADD; 6202 alu.is_op3 = 1; 6203 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6204 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6205 alu.src[1].chan = 0; 6206 alu.src[1].value = *(uint32_t *)&eight; 6207 alu.src[2].sel = mytmp; 6208 alu.src[2].chan = 0; 6209 alu.dst.sel = ctx->temp_reg; 6210 alu.dst.chan = 3; 6211 alu.dst.write = 1; 6212 alu.last = 1; 6213 r = r600_bytecode_add_alu(ctx->bc, &alu); 6214 if (r) 6215 return r; 6216 } else if (ctx->bc->chip_class < EVERGREEN) { 6217 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6218 tex.op = FETCH_OP_SET_CUBEMAP_INDEX; 6219 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6220 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6221 tex.src_gpr = r600_get_temp(ctx); 6222 tex.src_sel_x = 0; 6223 tex.src_sel_y = 0; 6224 tex.src_sel_z = 0; 6225 tex.src_sel_w = 0; 6226 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 6227 tex.coord_type_x = 1; 6228 tex.coord_type_y = 1; 6229 tex.coord_type_z = 1; 6230 tex.coord_type_w = 1; 6231 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6232 alu.op = ALU_OP1_MOV; 6233 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6234 alu.dst.sel = tex.src_gpr; 6235 alu.dst.chan = 0; 6236 alu.last = 1; 6237 alu.dst.write = 1; 6238 r = r600_bytecode_add_alu(ctx->bc, &alu); 6239 if (r) 6240 return r; 6241 6242 r = r600_bytecode_add_tex(ctx->bc, &tex); 6243 if (r) 6244 return r; 6245 } 6246 6247 } 6248 6249 /* for cube forms of lod and bias we need to route things */ 6250 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || 6251 inst->Instruction.Opcode == TGSI_OPCODE_TXL || 6252 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 6253 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { 6254 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6255 alu.op = ALU_OP1_MOV; 6256 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 6257 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 6258 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 6259 else 6260 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6261 alu.dst.sel = ctx->temp_reg; 6262 alu.dst.chan = 2; 6263 alu.last = 1; 6264 alu.dst.write = 1; 6265 r = r600_bytecode_add_alu(ctx->bc, &alu); 6266 if (r) 6267 return r; 6268 } 6269 6270 src_loaded = TRUE; 6271 src_gpr = ctx->temp_reg; 6272 } 6273 6274 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 6275 int temp_h = 0, temp_v = 0; 6276 int start_val = 0; 6277 6278 /* if we've already loaded the src (i.e. CUBE don't reload it). */ 6279 if (src_loaded == TRUE) 6280 start_val = 1; 6281 else 6282 src_loaded = TRUE; 6283 for (i = start_val; i < 3; i++) { 6284 int treg = r600_get_temp(ctx); 6285 6286 if (i == 0) 6287 src_gpr = treg; 6288 else if (i == 1) 6289 temp_h = treg; 6290 else 6291 temp_v = treg; 6292 6293 for (j = 0; j < 4; j++) { 6294 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6295 alu.op = ALU_OP1_MOV; 6296 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 6297 alu.dst.sel = treg; 6298 alu.dst.chan = j; 6299 if (j == 3) 6300 alu.last = 1; 6301 alu.dst.write = 1; 6302 r = r600_bytecode_add_alu(ctx->bc, &alu); 6303 if (r) 6304 return r; 6305 } 6306 } 6307 for (i = 1; i < 3; i++) { 6308 /* set gradients h/v */ 6309 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6310 tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H : 6311 FETCH_OP_SET_GRADIENTS_V; 6312 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6313 tex.sampler_index_mode = sampler_index_mode; 6314 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6315 tex.resource_index_mode = sampler_index_mode; 6316 6317 tex.src_gpr = (i == 1) ? temp_h : temp_v; 6318 tex.src_sel_x = 0; 6319 tex.src_sel_y = 1; 6320 tex.src_sel_z = 2; 6321 tex.src_sel_w = 3; 6322 6323 tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */ 6324 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 6325 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 6326 tex.coord_type_x = 1; 6327 tex.coord_type_y = 1; 6328 tex.coord_type_z = 1; 6329 tex.coord_type_w = 1; 6330 } 6331 r = r600_bytecode_add_tex(ctx->bc, &tex); 6332 if (r) 6333 return r; 6334 } 6335 } 6336 6337 if (src_requires_loading && !src_loaded) { 6338 for (i = 0; i < 4; i++) { 6339 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6340 alu.op = ALU_OP1_MOV; 6341 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6342 alu.dst.sel = ctx->temp_reg; 6343 alu.dst.chan = i; 6344 if (i == 3) 6345 alu.last = 1; 6346 alu.dst.write = 1; 6347 r = r600_bytecode_add_alu(ctx->bc, &alu); 6348 if (r) 6349 return r; 6350 } 6351 src_loaded = TRUE; 6352 src_gpr = ctx->temp_reg; 6353 } 6354 6355 /* get offset values */ 6356 if (inst->Texture.NumOffsets) { 6357 assert(inst->Texture.NumOffsets == 1); 6358 6359 /* The texture offset feature doesn't work with the TXF instruction 6360 * and must be emulated by adding the offset to the texture coordinates. */ 6361 if (txf_add_offsets) { 6362 const struct tgsi_texture_offset *off = inst->TexOffsets; 6363 6364 switch (inst->Texture.Texture) { 6365 case TGSI_TEXTURE_3D: 6366 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6367 alu.op = ALU_OP2_ADD_INT; 6368 alu.src[0].sel = src_gpr; 6369 alu.src[0].chan = 2; 6370 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6371 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ]; 6372 alu.dst.sel = src_gpr; 6373 alu.dst.chan = 2; 6374 alu.dst.write = 1; 6375 alu.last = 1; 6376 r = r600_bytecode_add_alu(ctx->bc, &alu); 6377 if (r) 6378 return r; 6379 /* fall through */ 6380 6381 case TGSI_TEXTURE_2D: 6382 case TGSI_TEXTURE_SHADOW2D: 6383 case TGSI_TEXTURE_RECT: 6384 case TGSI_TEXTURE_SHADOWRECT: 6385 case TGSI_TEXTURE_2D_ARRAY: 6386 case TGSI_TEXTURE_SHADOW2D_ARRAY: 6387 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6388 alu.op = ALU_OP2_ADD_INT; 6389 alu.src[0].sel = src_gpr; 6390 alu.src[0].chan = 1; 6391 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6392 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY]; 6393 alu.dst.sel = src_gpr; 6394 alu.dst.chan = 1; 6395 alu.dst.write = 1; 6396 alu.last = 1; 6397 r = r600_bytecode_add_alu(ctx->bc, &alu); 6398 if (r) 6399 return r; 6400 /* fall through */ 6401 6402 case TGSI_TEXTURE_1D: 6403 case TGSI_TEXTURE_SHADOW1D: 6404 case TGSI_TEXTURE_1D_ARRAY: 6405 case TGSI_TEXTURE_SHADOW1D_ARRAY: 6406 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6407 alu.op = ALU_OP2_ADD_INT; 6408 alu.src[0].sel = src_gpr; 6409 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6410 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX]; 6411 alu.dst.sel = src_gpr; 6412 alu.dst.write = 1; 6413 alu.last = 1; 6414 r = r600_bytecode_add_alu(ctx->bc, &alu); 6415 if (r) 6416 return r; 6417 break; 6418 /* texture offsets do not apply to other texture targets */ 6419 } 6420 } else { 6421 switch (inst->Texture.Texture) { 6422 case TGSI_TEXTURE_3D: 6423 offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 6424 /* fallthrough */ 6425 case TGSI_TEXTURE_2D: 6426 case TGSI_TEXTURE_SHADOW2D: 6427 case TGSI_TEXTURE_RECT: 6428 case TGSI_TEXTURE_SHADOWRECT: 6429 case TGSI_TEXTURE_2D_ARRAY: 6430 case TGSI_TEXTURE_SHADOW2D_ARRAY: 6431 offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 6432 /* fallthrough */ 6433 case TGSI_TEXTURE_1D: 6434 case TGSI_TEXTURE_SHADOW1D: 6435 case TGSI_TEXTURE_1D_ARRAY: 6436 case TGSI_TEXTURE_SHADOW1D_ARRAY: 6437 offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 6438 } 6439 } 6440 } 6441 6442 /* Obtain the sample index for reading a compressed MSAA color texture. 6443 * To read the FMASK, we use the ldfptr instruction, which tells us 6444 * where the samples are stored. 6445 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, 6446 * which is the identity mapping. Each nibble says which physical sample 6447 * should be fetched to get that sample. 6448 * 6449 * Assume src.z contains the sample index. It should be modified like this: 6450 * src.z = (ldfptr() >> (src.z * 4)) & 0xF; 6451 * Then fetch the texel with src. 6452 */ 6453 if (read_compressed_msaa) { 6454 unsigned sample_chan = 3; 6455 unsigned temp = r600_get_temp(ctx); 6456 assert(src_loaded); 6457 6458 /* temp.w = ldfptr() */ 6459 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6460 tex.op = FETCH_OP_LD; 6461 tex.inst_mod = 1; /* to indicate this is ldfptr */ 6462 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6463 tex.sampler_index_mode = sampler_index_mode; 6464 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6465 tex.resource_index_mode = sampler_index_mode; 6466 tex.src_gpr = src_gpr; 6467 tex.dst_gpr = temp; 6468 tex.dst_sel_x = 7; /* mask out these components */ 6469 tex.dst_sel_y = 7; 6470 tex.dst_sel_z = 7; 6471 tex.dst_sel_w = 0; /* store X */ 6472 tex.src_sel_x = 0; 6473 tex.src_sel_y = 1; 6474 tex.src_sel_z = 2; 6475 tex.src_sel_w = 3; 6476 tex.offset_x = offset_x; 6477 tex.offset_y = offset_y; 6478 tex.offset_z = offset_z; 6479 r = r600_bytecode_add_tex(ctx->bc, &tex); 6480 if (r) 6481 return r; 6482 6483 /* temp.x = sample_index*4 */ 6484 if (ctx->bc->chip_class == CAYMAN) { 6485 for (i = 0 ; i < 4; i++) { 6486 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6487 alu.op = ALU_OP2_MULLO_INT; 6488 alu.src[0].sel = src_gpr; 6489 alu.src[0].chan = sample_chan; 6490 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6491 alu.src[1].value = 4; 6492 alu.dst.sel = temp; 6493 alu.dst.chan = i; 6494 alu.dst.write = i == 0; 6495 if (i == 3) 6496 alu.last = 1; 6497 r = r600_bytecode_add_alu(ctx->bc, &alu); 6498 if (r) 6499 return r; 6500 } 6501 } else { 6502 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6503 alu.op = ALU_OP2_MULLO_INT; 6504 alu.src[0].sel = src_gpr; 6505 alu.src[0].chan = sample_chan; 6506 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6507 alu.src[1].value = 4; 6508 alu.dst.sel = temp; 6509 alu.dst.chan = 0; 6510 alu.dst.write = 1; 6511 alu.last = 1; 6512 r = r600_bytecode_add_alu(ctx->bc, &alu); 6513 if (r) 6514 return r; 6515 } 6516 6517 /* sample_index = temp.w >> temp.x */ 6518 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6519 alu.op = ALU_OP2_LSHR_INT; 6520 alu.src[0].sel = temp; 6521 alu.src[0].chan = 3; 6522 alu.src[1].sel = temp; 6523 alu.src[1].chan = 0; 6524 alu.dst.sel = src_gpr; 6525 alu.dst.chan = sample_chan; 6526 alu.dst.write = 1; 6527 alu.last = 1; 6528 r = r600_bytecode_add_alu(ctx->bc, &alu); 6529 if (r) 6530 return r; 6531 6532 /* sample_index & 0xF */ 6533 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6534 alu.op = ALU_OP2_AND_INT; 6535 alu.src[0].sel = src_gpr; 6536 alu.src[0].chan = sample_chan; 6537 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6538 alu.src[1].value = 0xF; 6539 alu.dst.sel = src_gpr; 6540 alu.dst.chan = sample_chan; 6541 alu.dst.write = 1; 6542 alu.last = 1; 6543 r = r600_bytecode_add_alu(ctx->bc, &alu); 6544 if (r) 6545 return r; 6546#if 0 6547 /* visualize the FMASK */ 6548 for (i = 0; i < 4; i++) { 6549 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6550 alu.op = ALU_OP1_INT_TO_FLT; 6551 alu.src[0].sel = src_gpr; 6552 alu.src[0].chan = sample_chan; 6553 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 6554 alu.dst.chan = i; 6555 alu.dst.write = 1; 6556 alu.last = 1; 6557 r = r600_bytecode_add_alu(ctx->bc, &alu); 6558 if (r) 6559 return r; 6560 } 6561 return 0; 6562#endif 6563 } 6564 6565 /* does this shader want a num layers from TXQ for a cube array? */ 6566 if (has_txq_cube_array_z) { 6567 int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6568 6569 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6570 alu.op = ALU_OP1_MOV; 6571 6572 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 6573 if (ctx->bc->chip_class >= EVERGREEN) { 6574 /* channel 1 or 3 of each word */ 6575 alu.src[0].sel += (id / 2); 6576 alu.src[0].chan = ((id % 2) * 2) + 1; 6577 } else { 6578 /* r600 we have them at channel 2 of the second dword */ 6579 alu.src[0].sel += (id * 2) + 1; 6580 alu.src[0].chan = 2; 6581 } 6582 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6583 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 6584 alu.last = 1; 6585 r = r600_bytecode_add_alu(ctx->bc, &alu); 6586 if (r) 6587 return r; 6588 /* disable writemask from texture instruction */ 6589 inst->Dst[0].Register.WriteMask &= ~4; 6590 } 6591 6592 opcode = ctx->inst_info->op; 6593 if (opcode == FETCH_OP_GATHER4 && 6594 inst->TexOffsets[0].File != TGSI_FILE_NULL && 6595 inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) { 6596 opcode = FETCH_OP_GATHER4_O; 6597 6598 /* GATHER4_O/GATHER4_C_O use offset values loaded by 6599 SET_TEXTURE_OFFSETS instruction. The immediate offset values 6600 encoded in the instruction are ignored. */ 6601 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6602 tex.op = FETCH_OP_SET_TEXTURE_OFFSETS; 6603 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6604 tex.sampler_index_mode = sampler_index_mode; 6605 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6606 tex.resource_index_mode = sampler_index_mode; 6607 6608 tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index; 6609 tex.src_sel_x = inst->TexOffsets[0].SwizzleX; 6610 tex.src_sel_y = inst->TexOffsets[0].SwizzleY; 6611 tex.src_sel_z = inst->TexOffsets[0].SwizzleZ; 6612 tex.src_sel_w = 4; 6613 6614 tex.dst_sel_x = 7; 6615 tex.dst_sel_y = 7; 6616 tex.dst_sel_z = 7; 6617 tex.dst_sel_w = 7; 6618 6619 r = r600_bytecode_add_tex(ctx->bc, &tex); 6620 if (r) 6621 return r; 6622 } 6623 6624 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 6625 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 6626 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 6627 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6628 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 6629 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 6630 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6631 switch (opcode) { 6632 case FETCH_OP_SAMPLE: 6633 opcode = FETCH_OP_SAMPLE_C; 6634 break; 6635 case FETCH_OP_SAMPLE_L: 6636 opcode = FETCH_OP_SAMPLE_C_L; 6637 break; 6638 case FETCH_OP_SAMPLE_LB: 6639 opcode = FETCH_OP_SAMPLE_C_LB; 6640 break; 6641 case FETCH_OP_SAMPLE_G: 6642 opcode = FETCH_OP_SAMPLE_C_G; 6643 break; 6644 /* Texture gather variants */ 6645 case FETCH_OP_GATHER4: 6646 opcode = FETCH_OP_GATHER4_C; 6647 break; 6648 case FETCH_OP_GATHER4_O: 6649 opcode = FETCH_OP_GATHER4_C_O; 6650 break; 6651 } 6652 } 6653 6654 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6655 tex.op = opcode; 6656 6657 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6658 tex.sampler_index_mode = sampler_index_mode; 6659 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6660 tex.resource_index_mode = sampler_index_mode; 6661 tex.src_gpr = src_gpr; 6662 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 6663 6664 if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE || 6665 inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) { 6666 tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */ 6667 } 6668 6669 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) { 6670 int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]; 6671 tex.inst_mod = texture_component_select; 6672 6673 if (ctx->bc->chip_class == CAYMAN) { 6674 /* GATHER4 result order is different from TGSI TG4 */ 6675 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 0 : 7; 6676 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 1 : 7; 6677 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 2 : 7; 6678 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 6679 } else { 6680 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 6681 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 6682 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 6683 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 6684 } 6685 } 6686 else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) { 6687 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 6688 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 6689 tex.dst_sel_z = 7; 6690 tex.dst_sel_w = 7; 6691 } 6692 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 6693 tex.dst_sel_x = 3; 6694 tex.dst_sel_y = 7; 6695 tex.dst_sel_z = 7; 6696 tex.dst_sel_w = 7; 6697 } 6698 else { 6699 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 6700 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 6701 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 6702 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 6703 } 6704 6705 6706 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ || 6707 inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 6708 tex.src_sel_x = 4; 6709 tex.src_sel_y = 4; 6710 tex.src_sel_z = 4; 6711 tex.src_sel_w = 4; 6712 } else if (src_loaded) { 6713 tex.src_sel_x = 0; 6714 tex.src_sel_y = 1; 6715 tex.src_sel_z = 2; 6716 tex.src_sel_w = 3; 6717 } else { 6718 tex.src_sel_x = ctx->src[0].swizzle[0]; 6719 tex.src_sel_y = ctx->src[0].swizzle[1]; 6720 tex.src_sel_z = ctx->src[0].swizzle[2]; 6721 tex.src_sel_w = ctx->src[0].swizzle[3]; 6722 tex.src_rel = ctx->src[0].rel; 6723 } 6724 6725 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || 6726 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6727 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6728 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6729 tex.src_sel_x = 1; 6730 tex.src_sel_y = 0; 6731 tex.src_sel_z = 3; 6732 tex.src_sel_w = 2; /* route Z compare or Lod value into W */ 6733 } 6734 6735 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 6736 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 6737 tex.coord_type_x = 1; 6738 tex.coord_type_y = 1; 6739 } 6740 tex.coord_type_z = 1; 6741 tex.coord_type_w = 1; 6742 6743 tex.offset_x = offset_x; 6744 tex.offset_y = offset_y; 6745 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && 6746 (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 6747 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) { 6748 tex.offset_z = 0; 6749 } 6750 else { 6751 tex.offset_z = offset_z; 6752 } 6753 6754 /* Put the depth for comparison in W. 6755 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 6756 * Some instructions expect the depth in Z. */ 6757 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 6758 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 6759 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 6760 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 6761 opcode != FETCH_OP_SAMPLE_C_L && 6762 opcode != FETCH_OP_SAMPLE_C_LB) { 6763 tex.src_sel_w = tex.src_sel_z; 6764 } 6765 6766 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 6767 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 6768 if (opcode == FETCH_OP_SAMPLE_C_L || 6769 opcode == FETCH_OP_SAMPLE_C_LB) { 6770 /* the array index is read from Y */ 6771 tex.coord_type_y = 0; 6772 } else { 6773 /* the array index is read from Z */ 6774 tex.coord_type_z = 0; 6775 tex.src_sel_z = tex.src_sel_y; 6776 } 6777 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 6778 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 6779 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6780 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 6781 (ctx->bc->chip_class >= EVERGREEN))) 6782 /* the array index is read from Z */ 6783 tex.coord_type_z = 0; 6784 6785 /* mask unused source components */ 6786 if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { 6787 switch (inst->Texture.Texture) { 6788 case TGSI_TEXTURE_2D: 6789 case TGSI_TEXTURE_RECT: 6790 tex.src_sel_z = 7; 6791 tex.src_sel_w = 7; 6792 break; 6793 case TGSI_TEXTURE_1D_ARRAY: 6794 tex.src_sel_y = 7; 6795 tex.src_sel_w = 7; 6796 break; 6797 case TGSI_TEXTURE_1D: 6798 tex.src_sel_y = 7; 6799 tex.src_sel_z = 7; 6800 tex.src_sel_w = 7; 6801 break; 6802 } 6803 } 6804 6805 r = r600_bytecode_add_tex(ctx->bc, &tex); 6806 if (r) 6807 return r; 6808 6809 /* add shadow ambient support - gallium doesn't do it yet */ 6810 return 0; 6811} 6812 6813static int tgsi_lrp(struct r600_shader_ctx *ctx) 6814{ 6815 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6816 struct r600_bytecode_alu alu; 6817 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6818 unsigned i, temp_regs[2]; 6819 int r; 6820 6821 /* optimize if it's just an equal balance */ 6822 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 6823 for (i = 0; i < lasti + 1; i++) { 6824 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6825 continue; 6826 6827 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6828 alu.op = ALU_OP2_ADD; 6829 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 6830 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6831 alu.omod = 3; 6832 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6833 alu.dst.chan = i; 6834 if (i == lasti) { 6835 alu.last = 1; 6836 } 6837 r = r600_bytecode_add_alu(ctx->bc, &alu); 6838 if (r) 6839 return r; 6840 } 6841 return 0; 6842 } 6843 6844 /* 1 - src0 */ 6845 for (i = 0; i < lasti + 1; i++) { 6846 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6847 continue; 6848 6849 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6850 alu.op = ALU_OP2_ADD; 6851 alu.src[0].sel = V_SQ_ALU_SRC_1; 6852 alu.src[0].chan = 0; 6853 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6854 r600_bytecode_src_toggle_neg(&alu.src[1]); 6855 alu.dst.sel = ctx->temp_reg; 6856 alu.dst.chan = i; 6857 if (i == lasti) { 6858 alu.last = 1; 6859 } 6860 alu.dst.write = 1; 6861 r = r600_bytecode_add_alu(ctx->bc, &alu); 6862 if (r) 6863 return r; 6864 } 6865 6866 /* (1 - src0) * src2 */ 6867 for (i = 0; i < lasti + 1; i++) { 6868 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6869 continue; 6870 6871 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6872 alu.op = ALU_OP2_MUL; 6873 alu.src[0].sel = ctx->temp_reg; 6874 alu.src[0].chan = i; 6875 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6876 alu.dst.sel = ctx->temp_reg; 6877 alu.dst.chan = i; 6878 if (i == lasti) { 6879 alu.last = 1; 6880 } 6881 alu.dst.write = 1; 6882 r = r600_bytecode_add_alu(ctx->bc, &alu); 6883 if (r) 6884 return r; 6885 } 6886 6887 /* src0 * src1 + (1 - src0) * src2 */ 6888 if (ctx->src[0].abs) 6889 temp_regs[0] = r600_get_temp(ctx); 6890 else 6891 temp_regs[0] = 0; 6892 if (ctx->src[1].abs) 6893 temp_regs[1] = r600_get_temp(ctx); 6894 else 6895 temp_regs[1] = 0; 6896 6897 for (i = 0; i < lasti + 1; i++) { 6898 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6899 continue; 6900 6901 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6902 alu.op = ALU_OP3_MULADD; 6903 alu.is_op3 = 1; 6904 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 6905 if (r) 6906 return r; 6907 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[1]); 6908 if (r) 6909 return r; 6910 alu.src[2].sel = ctx->temp_reg; 6911 alu.src[2].chan = i; 6912 6913 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6914 alu.dst.chan = i; 6915 if (i == lasti) { 6916 alu.last = 1; 6917 } 6918 r = r600_bytecode_add_alu(ctx->bc, &alu); 6919 if (r) 6920 return r; 6921 } 6922 return 0; 6923} 6924 6925static int tgsi_cmp(struct r600_shader_ctx *ctx) 6926{ 6927 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6928 struct r600_bytecode_alu alu; 6929 int i, r, j; 6930 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6931 int temp_regs[3]; 6932 6933 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6934 temp_regs[j] = 0; 6935 if (ctx->src[j].abs) 6936 temp_regs[j] = r600_get_temp(ctx); 6937 } 6938 6939 for (i = 0; i < lasti + 1; i++) { 6940 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6941 continue; 6942 6943 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6944 alu.op = ALU_OP3_CNDGE; 6945 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 6946 if (r) 6947 return r; 6948 r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]); 6949 if (r) 6950 return r; 6951 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]); 6952 if (r) 6953 return r; 6954 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6955 alu.dst.chan = i; 6956 alu.dst.write = 1; 6957 alu.is_op3 = 1; 6958 if (i == lasti) 6959 alu.last = 1; 6960 r = r600_bytecode_add_alu(ctx->bc, &alu); 6961 if (r) 6962 return r; 6963 } 6964 return 0; 6965} 6966 6967static int tgsi_ucmp(struct r600_shader_ctx *ctx) 6968{ 6969 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6970 struct r600_bytecode_alu alu; 6971 int i, r; 6972 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6973 6974 for (i = 0; i < lasti + 1; i++) { 6975 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6976 continue; 6977 6978 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6979 alu.op = ALU_OP3_CNDE_INT; 6980 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6981 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6982 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 6983 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6984 alu.dst.chan = i; 6985 alu.dst.write = 1; 6986 alu.is_op3 = 1; 6987 if (i == lasti) 6988 alu.last = 1; 6989 r = r600_bytecode_add_alu(ctx->bc, &alu); 6990 if (r) 6991 return r; 6992 } 6993 return 0; 6994} 6995 6996static int tgsi_xpd(struct r600_shader_ctx *ctx) 6997{ 6998 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6999 static const unsigned int src0_swizzle[] = {2, 0, 1}; 7000 static const unsigned int src1_swizzle[] = {1, 2, 0}; 7001 struct r600_bytecode_alu alu; 7002 uint32_t use_temp = 0; 7003 int i, r; 7004 7005 if (inst->Dst[0].Register.WriteMask != 0xf) 7006 use_temp = 1; 7007 7008 for (i = 0; i < 4; i++) { 7009 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7010 alu.op = ALU_OP2_MUL; 7011 if (i < 3) { 7012 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 7013 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 7014 } else { 7015 alu.src[0].sel = V_SQ_ALU_SRC_0; 7016 alu.src[0].chan = i; 7017 alu.src[1].sel = V_SQ_ALU_SRC_0; 7018 alu.src[1].chan = i; 7019 } 7020 7021 alu.dst.sel = ctx->temp_reg; 7022 alu.dst.chan = i; 7023 alu.dst.write = 1; 7024 7025 if (i == 3) 7026 alu.last = 1; 7027 r = r600_bytecode_add_alu(ctx->bc, &alu); 7028 if (r) 7029 return r; 7030 } 7031 7032 for (i = 0; i < 4; i++) { 7033 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7034 alu.op = ALU_OP3_MULADD; 7035 7036 if (i < 3) { 7037 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 7038 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 7039 } else { 7040 alu.src[0].sel = V_SQ_ALU_SRC_0; 7041 alu.src[0].chan = i; 7042 alu.src[1].sel = V_SQ_ALU_SRC_0; 7043 alu.src[1].chan = i; 7044 } 7045 7046 alu.src[2].sel = ctx->temp_reg; 7047 alu.src[2].neg = 1; 7048 alu.src[2].chan = i; 7049 7050 if (use_temp) 7051 alu.dst.sel = ctx->temp_reg; 7052 else 7053 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7054 alu.dst.chan = i; 7055 alu.dst.write = 1; 7056 alu.is_op3 = 1; 7057 if (i == 3) 7058 alu.last = 1; 7059 r = r600_bytecode_add_alu(ctx->bc, &alu); 7060 if (r) 7061 return r; 7062 } 7063 if (use_temp) 7064 return tgsi_helper_copy(ctx, inst); 7065 return 0; 7066} 7067 7068static int tgsi_exp(struct r600_shader_ctx *ctx) 7069{ 7070 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7071 struct r600_bytecode_alu alu; 7072 int r; 7073 int i; 7074 7075 /* result.x = 2^floor(src); */ 7076 if (inst->Dst[0].Register.WriteMask & 1) { 7077 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7078 7079 alu.op = ALU_OP1_FLOOR; 7080 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7081 7082 alu.dst.sel = ctx->temp_reg; 7083 alu.dst.chan = 0; 7084 alu.dst.write = 1; 7085 alu.last = 1; 7086 r = r600_bytecode_add_alu(ctx->bc, &alu); 7087 if (r) 7088 return r; 7089 7090 if (ctx->bc->chip_class == CAYMAN) { 7091 for (i = 0; i < 3; i++) { 7092 alu.op = ALU_OP1_EXP_IEEE; 7093 alu.src[0].sel = ctx->temp_reg; 7094 alu.src[0].chan = 0; 7095 7096 alu.dst.sel = ctx->temp_reg; 7097 alu.dst.chan = i; 7098 alu.dst.write = i == 0; 7099 alu.last = i == 2; 7100 r = r600_bytecode_add_alu(ctx->bc, &alu); 7101 if (r) 7102 return r; 7103 } 7104 } else { 7105 alu.op = ALU_OP1_EXP_IEEE; 7106 alu.src[0].sel = ctx->temp_reg; 7107 alu.src[0].chan = 0; 7108 7109 alu.dst.sel = ctx->temp_reg; 7110 alu.dst.chan = 0; 7111 alu.dst.write = 1; 7112 alu.last = 1; 7113 r = r600_bytecode_add_alu(ctx->bc, &alu); 7114 if (r) 7115 return r; 7116 } 7117 } 7118 7119 /* result.y = tmp - floor(tmp); */ 7120 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 7121 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7122 7123 alu.op = ALU_OP1_FRACT; 7124 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7125 7126 alu.dst.sel = ctx->temp_reg; 7127#if 0 7128 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7129 if (r) 7130 return r; 7131#endif 7132 alu.dst.write = 1; 7133 alu.dst.chan = 1; 7134 7135 alu.last = 1; 7136 7137 r = r600_bytecode_add_alu(ctx->bc, &alu); 7138 if (r) 7139 return r; 7140 } 7141 7142 /* result.z = RoughApprox2ToX(tmp);*/ 7143 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 7144 if (ctx->bc->chip_class == CAYMAN) { 7145 for (i = 0; i < 3; i++) { 7146 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7147 alu.op = ALU_OP1_EXP_IEEE; 7148 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7149 7150 alu.dst.sel = ctx->temp_reg; 7151 alu.dst.chan = i; 7152 if (i == 2) { 7153 alu.dst.write = 1; 7154 alu.last = 1; 7155 } 7156 7157 r = r600_bytecode_add_alu(ctx->bc, &alu); 7158 if (r) 7159 return r; 7160 } 7161 } else { 7162 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7163 alu.op = ALU_OP1_EXP_IEEE; 7164 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7165 7166 alu.dst.sel = ctx->temp_reg; 7167 alu.dst.write = 1; 7168 alu.dst.chan = 2; 7169 7170 alu.last = 1; 7171 7172 r = r600_bytecode_add_alu(ctx->bc, &alu); 7173 if (r) 7174 return r; 7175 } 7176 } 7177 7178 /* result.w = 1.0;*/ 7179 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 7180 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7181 7182 alu.op = ALU_OP1_MOV; 7183 alu.src[0].sel = V_SQ_ALU_SRC_1; 7184 alu.src[0].chan = 0; 7185 7186 alu.dst.sel = ctx->temp_reg; 7187 alu.dst.chan = 3; 7188 alu.dst.write = 1; 7189 alu.last = 1; 7190 r = r600_bytecode_add_alu(ctx->bc, &alu); 7191 if (r) 7192 return r; 7193 } 7194 return tgsi_helper_copy(ctx, inst); 7195} 7196 7197static int tgsi_log(struct r600_shader_ctx *ctx) 7198{ 7199 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7200 struct r600_bytecode_alu alu; 7201 int r; 7202 int i; 7203 7204 /* result.x = floor(log2(|src|)); */ 7205 if (inst->Dst[0].Register.WriteMask & 1) { 7206 if (ctx->bc->chip_class == CAYMAN) { 7207 for (i = 0; i < 3; i++) { 7208 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7209 7210 alu.op = ALU_OP1_LOG_IEEE; 7211 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7212 r600_bytecode_src_set_abs(&alu.src[0]); 7213 7214 alu.dst.sel = ctx->temp_reg; 7215 alu.dst.chan = i; 7216 if (i == 0) 7217 alu.dst.write = 1; 7218 if (i == 2) 7219 alu.last = 1; 7220 r = r600_bytecode_add_alu(ctx->bc, &alu); 7221 if (r) 7222 return r; 7223 } 7224 7225 } else { 7226 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7227 7228 alu.op = ALU_OP1_LOG_IEEE; 7229 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7230 r600_bytecode_src_set_abs(&alu.src[0]); 7231 7232 alu.dst.sel = ctx->temp_reg; 7233 alu.dst.chan = 0; 7234 alu.dst.write = 1; 7235 alu.last = 1; 7236 r = r600_bytecode_add_alu(ctx->bc, &alu); 7237 if (r) 7238 return r; 7239 } 7240 7241 alu.op = ALU_OP1_FLOOR; 7242 alu.src[0].sel = ctx->temp_reg; 7243 alu.src[0].chan = 0; 7244 7245 alu.dst.sel = ctx->temp_reg; 7246 alu.dst.chan = 0; 7247 alu.dst.write = 1; 7248 alu.last = 1; 7249 7250 r = r600_bytecode_add_alu(ctx->bc, &alu); 7251 if (r) 7252 return r; 7253 } 7254 7255 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 7256 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 7257 7258 if (ctx->bc->chip_class == CAYMAN) { 7259 for (i = 0; i < 3; i++) { 7260 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7261 7262 alu.op = ALU_OP1_LOG_IEEE; 7263 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7264 r600_bytecode_src_set_abs(&alu.src[0]); 7265 7266 alu.dst.sel = ctx->temp_reg; 7267 alu.dst.chan = i; 7268 if (i == 1) 7269 alu.dst.write = 1; 7270 if (i == 2) 7271 alu.last = 1; 7272 7273 r = r600_bytecode_add_alu(ctx->bc, &alu); 7274 if (r) 7275 return r; 7276 } 7277 } else { 7278 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7279 7280 alu.op = ALU_OP1_LOG_IEEE; 7281 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7282 r600_bytecode_src_set_abs(&alu.src[0]); 7283 7284 alu.dst.sel = ctx->temp_reg; 7285 alu.dst.chan = 1; 7286 alu.dst.write = 1; 7287 alu.last = 1; 7288 7289 r = r600_bytecode_add_alu(ctx->bc, &alu); 7290 if (r) 7291 return r; 7292 } 7293 7294 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7295 7296 alu.op = ALU_OP1_FLOOR; 7297 alu.src[0].sel = ctx->temp_reg; 7298 alu.src[0].chan = 1; 7299 7300 alu.dst.sel = ctx->temp_reg; 7301 alu.dst.chan = 1; 7302 alu.dst.write = 1; 7303 alu.last = 1; 7304 7305 r = r600_bytecode_add_alu(ctx->bc, &alu); 7306 if (r) 7307 return r; 7308 7309 if (ctx->bc->chip_class == CAYMAN) { 7310 for (i = 0; i < 3; i++) { 7311 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7312 alu.op = ALU_OP1_EXP_IEEE; 7313 alu.src[0].sel = ctx->temp_reg; 7314 alu.src[0].chan = 1; 7315 7316 alu.dst.sel = ctx->temp_reg; 7317 alu.dst.chan = i; 7318 if (i == 1) 7319 alu.dst.write = 1; 7320 if (i == 2) 7321 alu.last = 1; 7322 7323 r = r600_bytecode_add_alu(ctx->bc, &alu); 7324 if (r) 7325 return r; 7326 } 7327 } else { 7328 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7329 alu.op = ALU_OP1_EXP_IEEE; 7330 alu.src[0].sel = ctx->temp_reg; 7331 alu.src[0].chan = 1; 7332 7333 alu.dst.sel = ctx->temp_reg; 7334 alu.dst.chan = 1; 7335 alu.dst.write = 1; 7336 alu.last = 1; 7337 7338 r = r600_bytecode_add_alu(ctx->bc, &alu); 7339 if (r) 7340 return r; 7341 } 7342 7343 if (ctx->bc->chip_class == CAYMAN) { 7344 for (i = 0; i < 3; i++) { 7345 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7346 alu.op = ALU_OP1_RECIP_IEEE; 7347 alu.src[0].sel = ctx->temp_reg; 7348 alu.src[0].chan = 1; 7349 7350 alu.dst.sel = ctx->temp_reg; 7351 alu.dst.chan = i; 7352 if (i == 1) 7353 alu.dst.write = 1; 7354 if (i == 2) 7355 alu.last = 1; 7356 7357 r = r600_bytecode_add_alu(ctx->bc, &alu); 7358 if (r) 7359 return r; 7360 } 7361 } else { 7362 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7363 alu.op = ALU_OP1_RECIP_IEEE; 7364 alu.src[0].sel = ctx->temp_reg; 7365 alu.src[0].chan = 1; 7366 7367 alu.dst.sel = ctx->temp_reg; 7368 alu.dst.chan = 1; 7369 alu.dst.write = 1; 7370 alu.last = 1; 7371 7372 r = r600_bytecode_add_alu(ctx->bc, &alu); 7373 if (r) 7374 return r; 7375 } 7376 7377 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7378 7379 alu.op = ALU_OP2_MUL; 7380 7381 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7382 r600_bytecode_src_set_abs(&alu.src[0]); 7383 7384 alu.src[1].sel = ctx->temp_reg; 7385 alu.src[1].chan = 1; 7386 7387 alu.dst.sel = ctx->temp_reg; 7388 alu.dst.chan = 1; 7389 alu.dst.write = 1; 7390 alu.last = 1; 7391 7392 r = r600_bytecode_add_alu(ctx->bc, &alu); 7393 if (r) 7394 return r; 7395 } 7396 7397 /* result.z = log2(|src|);*/ 7398 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 7399 if (ctx->bc->chip_class == CAYMAN) { 7400 for (i = 0; i < 3; i++) { 7401 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7402 7403 alu.op = ALU_OP1_LOG_IEEE; 7404 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7405 r600_bytecode_src_set_abs(&alu.src[0]); 7406 7407 alu.dst.sel = ctx->temp_reg; 7408 if (i == 2) 7409 alu.dst.write = 1; 7410 alu.dst.chan = i; 7411 if (i == 2) 7412 alu.last = 1; 7413 7414 r = r600_bytecode_add_alu(ctx->bc, &alu); 7415 if (r) 7416 return r; 7417 } 7418 } else { 7419 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7420 7421 alu.op = ALU_OP1_LOG_IEEE; 7422 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7423 r600_bytecode_src_set_abs(&alu.src[0]); 7424 7425 alu.dst.sel = ctx->temp_reg; 7426 alu.dst.write = 1; 7427 alu.dst.chan = 2; 7428 alu.last = 1; 7429 7430 r = r600_bytecode_add_alu(ctx->bc, &alu); 7431 if (r) 7432 return r; 7433 } 7434 } 7435 7436 /* result.w = 1.0; */ 7437 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 7438 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7439 7440 alu.op = ALU_OP1_MOV; 7441 alu.src[0].sel = V_SQ_ALU_SRC_1; 7442 alu.src[0].chan = 0; 7443 7444 alu.dst.sel = ctx->temp_reg; 7445 alu.dst.chan = 3; 7446 alu.dst.write = 1; 7447 alu.last = 1; 7448 7449 r = r600_bytecode_add_alu(ctx->bc, &alu); 7450 if (r) 7451 return r; 7452 } 7453 7454 return tgsi_helper_copy(ctx, inst); 7455} 7456 7457static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 7458{ 7459 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7460 struct r600_bytecode_alu alu; 7461 int r; 7462 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7463 unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index); 7464 7465 assert(inst->Dst[0].Register.Index < 3); 7466 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7467 7468 switch (inst->Instruction.Opcode) { 7469 case TGSI_OPCODE_ARL: 7470 alu.op = ALU_OP1_FLT_TO_INT_FLOOR; 7471 break; 7472 case TGSI_OPCODE_ARR: 7473 alu.op = ALU_OP1_FLT_TO_INT; 7474 break; 7475 case TGSI_OPCODE_UARL: 7476 alu.op = ALU_OP1_MOV; 7477 break; 7478 default: 7479 assert(0); 7480 return -1; 7481 } 7482 7483 for (i = 0; i <= lasti; ++i) { 7484 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7485 continue; 7486 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7487 alu.last = i == lasti; 7488 alu.dst.sel = reg; 7489 alu.dst.chan = i; 7490 alu.dst.write = 1; 7491 r = r600_bytecode_add_alu(ctx->bc, &alu); 7492 if (r) 7493 return r; 7494 } 7495 7496 if (inst->Dst[0].Register.Index > 0) 7497 ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0; 7498 else 7499 ctx->bc->ar_loaded = 0; 7500 7501 return 0; 7502} 7503static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 7504{ 7505 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7506 struct r600_bytecode_alu alu; 7507 int r; 7508 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7509 7510 switch (inst->Instruction.Opcode) { 7511 case TGSI_OPCODE_ARL: 7512 memset(&alu, 0, sizeof(alu)); 7513 alu.op = ALU_OP1_FLOOR; 7514 alu.dst.sel = ctx->bc->ar_reg; 7515 alu.dst.write = 1; 7516 for (i = 0; i <= lasti; ++i) { 7517 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 7518 alu.dst.chan = i; 7519 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7520 alu.last = i == lasti; 7521 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 7522 return r; 7523 } 7524 } 7525 7526 memset(&alu, 0, sizeof(alu)); 7527 alu.op = ALU_OP1_FLT_TO_INT; 7528 alu.src[0].sel = ctx->bc->ar_reg; 7529 alu.dst.sel = ctx->bc->ar_reg; 7530 alu.dst.write = 1; 7531 /* FLT_TO_INT is trans-only on r600/r700 */ 7532 alu.last = TRUE; 7533 for (i = 0; i <= lasti; ++i) { 7534 alu.dst.chan = i; 7535 alu.src[0].chan = i; 7536 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 7537 return r; 7538 } 7539 break; 7540 case TGSI_OPCODE_ARR: 7541 memset(&alu, 0, sizeof(alu)); 7542 alu.op = ALU_OP1_FLT_TO_INT; 7543 alu.dst.sel = ctx->bc->ar_reg; 7544 alu.dst.write = 1; 7545 /* FLT_TO_INT is trans-only on r600/r700 */ 7546 alu.last = TRUE; 7547 for (i = 0; i <= lasti; ++i) { 7548 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 7549 alu.dst.chan = i; 7550 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7551 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 7552 return r; 7553 } 7554 } 7555 break; 7556 case TGSI_OPCODE_UARL: 7557 memset(&alu, 0, sizeof(alu)); 7558 alu.op = ALU_OP1_MOV; 7559 alu.dst.sel = ctx->bc->ar_reg; 7560 alu.dst.write = 1; 7561 for (i = 0; i <= lasti; ++i) { 7562 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 7563 alu.dst.chan = i; 7564 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7565 alu.last = i == lasti; 7566 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 7567 return r; 7568 } 7569 } 7570 break; 7571 default: 7572 assert(0); 7573 return -1; 7574 } 7575 7576 ctx->bc->ar_loaded = 0; 7577 return 0; 7578} 7579 7580static int tgsi_opdst(struct r600_shader_ctx *ctx) 7581{ 7582 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7583 struct r600_bytecode_alu alu; 7584 int i, r = 0; 7585 7586 for (i = 0; i < 4; i++) { 7587 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7588 7589 alu.op = ALU_OP2_MUL; 7590 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7591 7592 if (i == 0 || i == 3) { 7593 alu.src[0].sel = V_SQ_ALU_SRC_1; 7594 } else { 7595 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7596 } 7597 7598 if (i == 0 || i == 2) { 7599 alu.src[1].sel = V_SQ_ALU_SRC_1; 7600 } else { 7601 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 7602 } 7603 if (i == 3) 7604 alu.last = 1; 7605 r = r600_bytecode_add_alu(ctx->bc, &alu); 7606 if (r) 7607 return r; 7608 } 7609 return 0; 7610} 7611 7612static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type) 7613{ 7614 struct r600_bytecode_alu alu; 7615 int r; 7616 7617 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7618 alu.op = opcode; 7619 alu.execute_mask = 1; 7620 alu.update_pred = 1; 7621 7622 alu.dst.sel = ctx->temp_reg; 7623 alu.dst.write = 1; 7624 alu.dst.chan = 0; 7625 7626 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7627 alu.src[1].sel = V_SQ_ALU_SRC_0; 7628 alu.src[1].chan = 0; 7629 7630 alu.last = 1; 7631 7632 r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); 7633 if (r) 7634 return r; 7635 return 0; 7636} 7637 7638static int pops(struct r600_shader_ctx *ctx, int pops) 7639{ 7640 unsigned force_pop = ctx->bc->force_add_cf; 7641 7642 if (!force_pop) { 7643 int alu_pop = 3; 7644 if (ctx->bc->cf_last) { 7645 if (ctx->bc->cf_last->op == CF_OP_ALU) 7646 alu_pop = 0; 7647 else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER) 7648 alu_pop = 1; 7649 } 7650 alu_pop += pops; 7651 if (alu_pop == 1) { 7652 ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER; 7653 ctx->bc->force_add_cf = 1; 7654 } else if (alu_pop == 2) { 7655 ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER; 7656 ctx->bc->force_add_cf = 1; 7657 } else { 7658 force_pop = 1; 7659 } 7660 } 7661 7662 if (force_pop) { 7663 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 7664 ctx->bc->cf_last->pop_count = pops; 7665 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 7666 } 7667 7668 return 0; 7669} 7670 7671static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, 7672 unsigned reason) 7673{ 7674 struct r600_stack_info *stack = &ctx->bc->stack; 7675 unsigned elements, entries; 7676 7677 unsigned entry_size = stack->entry_size; 7678 7679 elements = (stack->loop + stack->push_wqm ) * entry_size; 7680 elements += stack->push; 7681 7682 switch (ctx->bc->chip_class) { 7683 case R600: 7684 case R700: 7685 /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on 7686 * the stack must be reserved to hold the current active/continue 7687 * masks */ 7688 if (reason == FC_PUSH_VPM) { 7689 elements += 2; 7690 } 7691 break; 7692 7693 case CAYMAN: 7694 /* r9xx: any stack operation on empty stack consumes 2 additional 7695 * elements */ 7696 elements += 2; 7697 7698 /* fallthrough */ 7699 /* FIXME: do the two elements added above cover the cases for the 7700 * r8xx+ below? */ 7701 7702 case EVERGREEN: 7703 /* r8xx+: 2 extra elements are not always required, but one extra 7704 * element must be added for each of the following cases: 7705 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest 7706 * stack usage. 7707 * (Currently we don't use ALU_ELSE_AFTER.) 7708 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM 7709 * PUSH instruction executed. 7710 * 7711 * NOTE: it seems we also need to reserve additional element in some 7712 * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, 7713 * then STACK_SIZE should be 2 instead of 1 */ 7714 if (reason == FC_PUSH_VPM) { 7715 elements += 1; 7716 } 7717 break; 7718 7719 default: 7720 assert(0); 7721 break; 7722 } 7723 7724 /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4 7725 * for all chips, so we use 4 in the final formula, not the real entry_size 7726 * for the chip */ 7727 entry_size = 4; 7728 7729 entries = (elements + (entry_size - 1)) / entry_size; 7730 7731 if (entries > stack->max_entries) 7732 stack->max_entries = entries; 7733} 7734 7735static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) 7736{ 7737 switch(reason) { 7738 case FC_PUSH_VPM: 7739 --ctx->bc->stack.push; 7740 assert(ctx->bc->stack.push >= 0); 7741 break; 7742 case FC_PUSH_WQM: 7743 --ctx->bc->stack.push_wqm; 7744 assert(ctx->bc->stack.push_wqm >= 0); 7745 break; 7746 case FC_LOOP: 7747 --ctx->bc->stack.loop; 7748 assert(ctx->bc->stack.loop >= 0); 7749 break; 7750 default: 7751 assert(0); 7752 break; 7753 } 7754} 7755 7756static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) 7757{ 7758 switch (reason) { 7759 case FC_PUSH_VPM: 7760 ++ctx->bc->stack.push; 7761 break; 7762 case FC_PUSH_WQM: 7763 ++ctx->bc->stack.push_wqm; 7764 case FC_LOOP: 7765 ++ctx->bc->stack.loop; 7766 break; 7767 default: 7768 assert(0); 7769 } 7770 7771 callstack_update_max_depth(ctx, reason); 7772} 7773 7774static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 7775{ 7776 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 7777 7778 sp->mid = realloc((void *)sp->mid, 7779 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 7780 sp->mid[sp->num_mid] = ctx->bc->cf_last; 7781 sp->num_mid++; 7782} 7783 7784static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 7785{ 7786 ctx->bc->fc_sp++; 7787 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 7788 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 7789} 7790 7791static void fc_poplevel(struct r600_shader_ctx *ctx) 7792{ 7793 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 7794 free(sp->mid); 7795 sp->mid = NULL; 7796 sp->num_mid = 0; 7797 sp->start = NULL; 7798 sp->type = 0; 7799 ctx->bc->fc_sp--; 7800} 7801 7802#if 0 7803static int emit_return(struct r600_shader_ctx *ctx) 7804{ 7805 r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN)); 7806 return 0; 7807} 7808 7809static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 7810{ 7811 7812 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP)); 7813 ctx->bc->cf_last->pop_count = pops; 7814 /* XXX work out offset */ 7815 return 0; 7816} 7817 7818static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 7819{ 7820 return 0; 7821} 7822 7823static void emit_testflag(struct r600_shader_ctx *ctx) 7824{ 7825 7826} 7827 7828static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 7829{ 7830 emit_testflag(ctx); 7831 emit_jump_to_offset(ctx, 1, 4); 7832 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 7833 pops(ctx, ifidx + 1); 7834 emit_return(ctx); 7835} 7836 7837static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 7838{ 7839 emit_testflag(ctx); 7840 7841 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 7842 ctx->bc->cf_last->pop_count = 1; 7843 7844 fc_set_mid(ctx, fc_sp); 7845 7846 pops(ctx, 1); 7847} 7848#endif 7849 7850static int emit_if(struct r600_shader_ctx *ctx, int opcode) 7851{ 7852 int alu_type = CF_OP_ALU_PUSH_BEFORE; 7853 7854 /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by 7855 * LOOP_STARTxxx for nested loops may put the branch stack into a state 7856 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this 7857 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ 7858 if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) { 7859 r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); 7860 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 7861 alu_type = CF_OP_ALU; 7862 } 7863 7864 emit_logic_pred(ctx, opcode, alu_type); 7865 7866 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 7867 7868 fc_pushlevel(ctx, FC_IF); 7869 7870 callstack_push(ctx, FC_PUSH_VPM); 7871 return 0; 7872} 7873 7874static int tgsi_if(struct r600_shader_ctx *ctx) 7875{ 7876 return emit_if(ctx, ALU_OP2_PRED_SETNE); 7877} 7878 7879static int tgsi_uif(struct r600_shader_ctx *ctx) 7880{ 7881 return emit_if(ctx, ALU_OP2_PRED_SETNE_INT); 7882} 7883 7884static int tgsi_else(struct r600_shader_ctx *ctx) 7885{ 7886 r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); 7887 ctx->bc->cf_last->pop_count = 1; 7888 7889 fc_set_mid(ctx, ctx->bc->fc_sp); 7890 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 7891 return 0; 7892} 7893 7894static int tgsi_endif(struct r600_shader_ctx *ctx) 7895{ 7896 pops(ctx, 1); 7897 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 7898 R600_ERR("if/endif unbalanced in shader\n"); 7899 return -1; 7900 } 7901 7902 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 7903 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 7904 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 7905 } else { 7906 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 7907 } 7908 fc_poplevel(ctx); 7909 7910 callstack_pop(ctx, FC_PUSH_VPM); 7911 return 0; 7912} 7913 7914static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 7915{ 7916 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not 7917 * limited to 4096 iterations, like the other LOOP_* instructions. */ 7918 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10); 7919 7920 fc_pushlevel(ctx, FC_LOOP); 7921 7922 /* check stack depth */ 7923 callstack_push(ctx, FC_LOOP); 7924 return 0; 7925} 7926 7927static int tgsi_endloop(struct r600_shader_ctx *ctx) 7928{ 7929 int i; 7930 7931 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); 7932 7933 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 7934 R600_ERR("loop/endloop in shader code are not paired.\n"); 7935 return -EINVAL; 7936 } 7937 7938 /* fixup loop pointers - from r600isa 7939 LOOP END points to CF after LOOP START, 7940 LOOP START point to CF after LOOP END 7941 BRK/CONT point to LOOP END CF 7942 */ 7943 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 7944 7945 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 7946 7947 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 7948 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 7949 } 7950 /* XXX add LOOPRET support */ 7951 fc_poplevel(ctx); 7952 callstack_pop(ctx, FC_LOOP); 7953 return 0; 7954} 7955 7956static int tgsi_loop_breakc(struct r600_shader_ctx *ctx) 7957{ 7958 int r; 7959 unsigned int fscp; 7960 7961 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 7962 { 7963 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 7964 break; 7965 } 7966 if (fscp == 0) { 7967 R600_ERR("BREAKC not inside loop/endloop pair\n"); 7968 return -EINVAL; 7969 } 7970 7971 if (ctx->bc->chip_class == EVERGREEN && 7972 ctx->bc->family != CHIP_CYPRESS && 7973 ctx->bc->family != CHIP_JUNIPER) { 7974 /* HW bug: ALU_BREAK does not save the active mask correctly */ 7975 r = tgsi_uif(ctx); 7976 if (r) 7977 return r; 7978 7979 r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK); 7980 if (r) 7981 return r; 7982 fc_set_mid(ctx, fscp); 7983 7984 return tgsi_endif(ctx); 7985 } else { 7986 r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK); 7987 if (r) 7988 return r; 7989 fc_set_mid(ctx, fscp); 7990 } 7991 7992 return 0; 7993} 7994 7995static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 7996{ 7997 unsigned int fscp; 7998 7999 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 8000 { 8001 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 8002 break; 8003 } 8004 8005 if (fscp == 0) { 8006 R600_ERR("Break not inside loop/endloop pair\n"); 8007 return -EINVAL; 8008 } 8009 8010 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8011 8012 fc_set_mid(ctx, fscp); 8013 8014 return 0; 8015} 8016 8017static int tgsi_gs_emit(struct r600_shader_ctx *ctx) 8018{ 8019 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8020 int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX]; 8021 int r; 8022 8023 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 8024 emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE); 8025 8026 r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8027 if (!r) { 8028 ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream 8029 return emit_inc_ring_offset(ctx, stream, TRUE); 8030 } 8031 return r; 8032} 8033 8034static int tgsi_umad(struct r600_shader_ctx *ctx) 8035{ 8036 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8037 struct r600_bytecode_alu alu; 8038 int i, j, k, r; 8039 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8040 8041 /* src0 * src1 */ 8042 for (i = 0; i < lasti + 1; i++) { 8043 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8044 continue; 8045 8046 if (ctx->bc->chip_class == CAYMAN) { 8047 for (j = 0 ; j < 4; j++) { 8048 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8049 8050 alu.op = ALU_OP2_MULLO_UINT; 8051 for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { 8052 r600_bytecode_src(&alu.src[k], &ctx->src[k], i); 8053 } 8054 alu.dst.chan = j; 8055 alu.dst.sel = ctx->temp_reg; 8056 alu.dst.write = (j == i); 8057 if (j == 3) 8058 alu.last = 1; 8059 r = r600_bytecode_add_alu(ctx->bc, &alu); 8060 if (r) 8061 return r; 8062 } 8063 } else { 8064 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8065 8066 alu.dst.chan = i; 8067 alu.dst.sel = ctx->temp_reg; 8068 alu.dst.write = 1; 8069 8070 alu.op = ALU_OP2_MULLO_UINT; 8071 for (j = 0; j < 2; j++) { 8072 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 8073 } 8074 8075 alu.last = 1; 8076 r = r600_bytecode_add_alu(ctx->bc, &alu); 8077 if (r) 8078 return r; 8079 } 8080 } 8081 8082 8083 for (i = 0; i < lasti + 1; i++) { 8084 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8085 continue; 8086 8087 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8088 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 8089 8090 alu.op = ALU_OP2_ADD_INT; 8091 8092 alu.src[0].sel = ctx->temp_reg; 8093 alu.src[0].chan = i; 8094 8095 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 8096 if (i == lasti) { 8097 alu.last = 1; 8098 } 8099 r = r600_bytecode_add_alu(ctx->bc, &alu); 8100 if (r) 8101 return r; 8102 } 8103 return 0; 8104} 8105 8106static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 8107 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, 8108 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 8109 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 8110 8111 /* XXX: 8112 * For state trackers other than OpenGL, we'll want to use 8113 * _RECIP_IEEE instead. 8114 */ 8115 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 8116 8117 [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, 8118 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 8119 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 8120 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 8121 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 8122 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 8123 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 8124 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 8125 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 8126 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 8127 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 8128 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 8129 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 8130 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 8131 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 8132 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 8133 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 8134 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 8135 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 8136 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 8137 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 8138 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 8139 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 8140 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 8141 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 8142 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 8143 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 8144 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 8145 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 8146 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 8147 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 8148 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 8149 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 8150 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8151 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8152 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 8153 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8154 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8155 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8156 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8157 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 8158 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 8159 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 8160 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 8161 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 8162 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 8163 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 8164 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 8165 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 8166 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 8167 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 8168 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8169 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8170 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8171 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8172 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 8173 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 8174 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_r600_arl}, 8175 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 8176 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 8177 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 8178 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 8179 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 8180 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 8181 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8182 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 8183 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 8184 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 8185 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8186 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 8187 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 8188 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 8189 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 8190 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 8191 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 8192 [TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 8193 [TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 8194 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 8195 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 8196 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 8197 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 8198 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 8199 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 8200 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2_trans}, 8201 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 8202 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 8203 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 8204 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 8205 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 8206 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8207 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 8208 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8209 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 8210 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 8211 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 8212 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 8213 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8214 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 8215 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8216 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8217 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 8218 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 8219 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 8220 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 8221 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 8222 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 8223 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 8224 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 8225 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 8226 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 8227 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 8228 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc}, 8229 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 8230 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 8231 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 8232 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, 8233 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 8234 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 8235 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 8236 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 8237 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 8238 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2_trans}, 8239 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 8240 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, 8241 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 8242 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 8243 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 8244 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 8245 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 8246 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 8247 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 8248 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 8249 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 8250 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 8251 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2_trans}, 8252 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 8253 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2_swap}, 8254 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8255 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 8256 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 8257 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8258 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 8259 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 8260 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 8261 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 8262 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 8263 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 8264 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 8265 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 8266 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 8267 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 8268 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 8269 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 8270 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_r600_arl}, 8271 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 8272 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 8273 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 8274 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8275 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 8276 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8277 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8278 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8279 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 8280 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 8281 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 8282 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 8283 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 8284 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8285 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8286 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8287 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8288 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8289 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8290 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 8291 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8292 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8293 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 8294 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 8295 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_unsupported}, 8296 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_unsupported}, 8297 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_unsupported}, 8298 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_unsupported}, 8299 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_unsupported}, 8300 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_unsupported}, 8301 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_unsupported}, 8302 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_unsupported}, 8303 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_unsupported}, 8304 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_unsupported}, 8305 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_unsupported}, 8306 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_unsupported}, 8307 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_unsupported}, 8308 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 8309}; 8310 8311static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 8312 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 8313 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 8314 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 8315 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 8316 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, 8317 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 8318 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 8319 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 8320 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 8321 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 8322 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 8323 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 8324 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 8325 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 8326 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 8327 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 8328 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 8329 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 8330 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 8331 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 8332 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 8333 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 8334 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 8335 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 8336 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 8337 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 8338 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 8339 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 8340 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 8341 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 8342 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 8343 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 8344 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 8345 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 8346 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 8347 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 8348 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 8349 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8350 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8351 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 8352 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8353 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8354 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8355 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8356 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 8357 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 8358 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 8359 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 8360 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 8361 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 8362 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 8363 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 8364 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 8365 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 8366 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 8367 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8368 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8369 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8370 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8371 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 8372 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 8373 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 8374 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 8375 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 8376 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 8377 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 8378 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 8379 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 8380 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8381 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 8382 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 8383 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 8384 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8385 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 8386 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 8387 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 8388 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 8389 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 8390 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 8391 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8392 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8393 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 8394 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 8395 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 8396 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 8397 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 8398 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 8399 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 8400 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 8401 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 8402 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 8403 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 8404 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 8405 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8406 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 8407 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8408 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 8409 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 8410 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 8411 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 8412 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8413 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 8414 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8415 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8416 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 8417 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 8418 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 8419 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 8420 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 8421 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 8422 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 8423 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 8424 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 8425 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 8426 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 8427 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 8428 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 8429 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 8430 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 8431 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i}, 8432 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 8433 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 8434 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 8435 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 8436 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 8437 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 8438 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 8439 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_f2i}, 8440 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 8441 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 8442 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 8443 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 8444 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 8445 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 8446 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 8447 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 8448 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 8449 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 8450 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 8451 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 8452 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 8453 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8454 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 8455 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 8456 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8457 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 8458 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 8459 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 8460 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 8461 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 8462 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 8463 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 8464 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 8465 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 8466 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 8467 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 8468 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 8469 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 8470 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 8471 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 8472 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 8473 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8474 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 8475 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8476 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8477 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8478 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 8479 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 8480 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 8481 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 8482 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 8483 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8484 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8485 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8486 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8487 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8488 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8489 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 8490 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8491 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8492 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 8493 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 8494 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 8495 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 8496 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 8497 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 8498 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 8499 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 8500 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 8501 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 8502 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 8503 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 8504 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8505 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8506 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8507 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 8508 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 8509 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 8510 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 8511 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 8512 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 8513 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 8514 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 8515 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 8516 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 8517 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 8518 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 8519 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 8520 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 8521 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 8522 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 8523 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 8524 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 8525 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 8526 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 8527 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 8528 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 8529 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 8530 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 8531}; 8532 8533static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 8534 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 8535 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 8536 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 8537 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, 8538 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, 8539 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 8540 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 8541 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 8542 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 8543 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 8544 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 8545 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 8546 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 8547 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 8548 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 8549 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 8550 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 8551 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 8552 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 8553 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 8554 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, 8555 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 8556 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 8557 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 8558 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 8559 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 8560 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 8561 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 8562 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, 8563 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, 8564 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, 8565 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 8566 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 8567 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 8568 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 8569 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 8570 [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig}, 8571 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8572 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8573 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 8574 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8575 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8576 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8577 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8578 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 8579 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 8580 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 8581 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 8582 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, cayman_trig}, 8583 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 8584 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 8585 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 8586 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 8587 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 8588 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 8589 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8590 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8591 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8592 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8593 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 8594 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 8595 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 8596 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 8597 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 8598 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 8599 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 8600 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 8601 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 8602 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8603 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 8604 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 8605 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 8606 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8607 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 8608 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 8609 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 8610 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 8611 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 8612 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 8613 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8614 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8615 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 8616 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 8617 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 8618 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2}, 8619 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 8620 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 8621 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 8622 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 8623 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 8624 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 8625 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 8626 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 8627 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8628 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 8629 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8630 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 8631 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 8632 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 8633 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 8634 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8635 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 8636 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8637 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8638 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 8639 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 8640 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 8641 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 8642 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 8643 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 8644 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 8645 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 8646 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 8647 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 8648 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 8649 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 8650 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 8651 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 8652 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 8653 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2}, 8654 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 8655 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 8656 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 8657 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 8658 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 8659 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 8660 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 8661 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2}, 8662 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2}, 8663 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 8664 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 8665 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 8666 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 8667 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 8668 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 8669 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_INT, cayman_mul_int_instr}, 8670 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 8671 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 8672 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 8673 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 8674 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 8675 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8676 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 8677 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 8678 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8679 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 8680 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 8681 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 8682 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 8683 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 8684 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 8685 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 8686 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 8687 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 8688 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 8689 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 8690 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 8691 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 8692 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 8693 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 8694 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 8695 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8696 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 8697 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8698 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8699 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8700 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 8701 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 8702 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 8703 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 8704 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 8705 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8706 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8707 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8708 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8709 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8710 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8711 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 8712 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8713 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8714 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, cayman_mul_int_instr}, 8715 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, cayman_mul_int_instr}, 8716 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 8717 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 8718 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 8719 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 8720 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 8721 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 8722 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 8723 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 8724 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 8725 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 8726 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8727 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8728 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8729 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 8730 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 8731 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 8732 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 8733 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 8734 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 8735 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 8736 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 8737 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 8738 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 8739 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 8740 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 8741 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 8742 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 8743 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 8744 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 8745 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 8746 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 8747 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 8748 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 8749 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 8750 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 8751 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 8752 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 8753}; 8754