r600_shader.c revision af249a7da9bf2621ab836d5074ef692677b11bbf
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "r600_sq.h" 24#include "r600_formats.h" 25#include "r600_opcodes.h" 26#include "r600_shader.h" 27#include "r600d.h" 28 29#include "sb/sb_public.h" 30 31#include "pipe/p_shader_tokens.h" 32#include "tgsi/tgsi_info.h" 33#include "tgsi/tgsi_parse.h" 34#include "tgsi/tgsi_scan.h" 35#include "tgsi/tgsi_dump.h" 36#include "util/u_memory.h" 37#include "util/u_math.h" 38#include <stdio.h> 39#include <errno.h> 40 41/* CAYMAN notes 42Why CAYMAN got loops for lots of instructions is explained here. 43 44-These 8xx t-slot only ops are implemented in all vector slots. 45MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 46These 8xx t-slot only opcodes become vector ops, with all four 47slots expecting the arguments on sources a and b. Result is 48broadcast to all channels. 49MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64 50These 8xx t-slot only opcodes become vector ops in the z, y, and 51x slots. 52EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 53RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 54SQRT_IEEE/_64 55SIN/COS 56The w slot may have an independent co-issued operation, or if the 57result is required to be in the w slot, the opcode above may be 58issued in the w slot as well. 59The compiler must issue the source argument to slots z, y, and x 60*/ 61 62/* Contents of r0 on entry to various shaders 63 64 VS - .x = VertexID 65 .y = RelVertexID (??) 66 .w = InstanceID 67 68 GS - r0.xyw, r1.xyz = per-vertex offsets 69 r0.z = PrimitiveID 70 71 TCS - .x = PatchID 72 .y = RelPatchID (??) 73 .z = InvocationID 74 .w = tess factor base. 75 76 TES - .x = TessCoord.x 77 - .y = TessCoord.y 78 - .z = RelPatchID (??) 79 - .w = PrimitiveID 80 81 PS - face_gpr.z = SampleMask 82 face_gpr.w = SampleID 83*/ 84#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) 85static int r600_shader_from_tgsi(struct r600_context *rctx, 86 struct r600_pipe_shader *pipeshader, 87 union r600_shader_key key); 88 89static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, 90 int size, unsigned comp_mask) { 91 92 if (!size) 93 return; 94 95 if (ps->num_arrays == ps->max_arrays) { 96 ps->max_arrays += 64; 97 ps->arrays = realloc(ps->arrays, ps->max_arrays * 98 sizeof(struct r600_shader_array)); 99 } 100 101 int n = ps->num_arrays; 102 ++ps->num_arrays; 103 104 ps->arrays[n].comp_mask = comp_mask; 105 ps->arrays[n].gpr_start = start_gpr; 106 ps->arrays[n].gpr_count = size; 107} 108 109static void r600_dump_streamout(struct pipe_stream_output_info *so) 110{ 111 unsigned i; 112 113 fprintf(stderr, "STREAMOUT\n"); 114 for (i = 0; i < so->num_outputs; i++) { 115 unsigned mask = ((1 << so->output[i].num_components) - 1) << 116 so->output[i].start_component; 117 fprintf(stderr, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", 118 i, 119 so->output[i].stream, 120 so->output[i].output_buffer, 121 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, 122 so->output[i].register_index, 123 mask & 1 ? "x" : "", 124 mask & 2 ? "y" : "", 125 mask & 4 ? "z" : "", 126 mask & 8 ? "w" : "", 127 so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); 128 } 129} 130 131static int store_shader(struct pipe_context *ctx, 132 struct r600_pipe_shader *shader) 133{ 134 struct r600_context *rctx = (struct r600_context *)ctx; 135 uint32_t *ptr, i; 136 137 if (shader->bo == NULL) { 138 shader->bo = (struct r600_resource*) 139 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); 140 if (shader->bo == NULL) { 141 return -ENOMEM; 142 } 143 ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); 144 if (R600_BIG_ENDIAN) { 145 for (i = 0; i < shader->shader.bc.ndw; ++i) { 146 ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); 147 } 148 } else { 149 memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); 150 } 151 rctx->b.ws->buffer_unmap(shader->bo->buf); 152 } 153 154 return 0; 155} 156 157int r600_pipe_shader_create(struct pipe_context *ctx, 158 struct r600_pipe_shader *shader, 159 union r600_shader_key key) 160{ 161 struct r600_context *rctx = (struct r600_context *)ctx; 162 struct r600_pipe_shader_selector *sel = shader->selector; 163 int r; 164 bool dump = r600_can_dump_shader(&rctx->screen->b, 165 tgsi_get_processor_type(sel->tokens)); 166 unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); 167 unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 168 unsigned export_shader; 169 170 shader->shader.bc.isa = rctx->isa; 171 172 if (dump) { 173 fprintf(stderr, "--------------------------------------------------------------\n"); 174 tgsi_dump(sel->tokens, 0); 175 176 if (sel->so.num_outputs) { 177 r600_dump_streamout(&sel->so); 178 } 179 } 180 r = r600_shader_from_tgsi(rctx, shader, key); 181 if (r) { 182 R600_ERR("translation from TGSI failed !\n"); 183 goto error; 184 } 185 if (shader->shader.processor_type == PIPE_SHADER_VERTEX) { 186 /* only disable for vertex shaders in tess paths */ 187 if (key.vs.as_ls) 188 use_sb = 0; 189 } 190 use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_CTRL); 191 use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_EVAL); 192 193 /* disable SB for shaders using doubles */ 194 use_sb &= !shader->shader.uses_doubles; 195 196 /* Check if the bytecode has already been built. */ 197 if (!shader->shader.bc.bytecode) { 198 r = r600_bytecode_build(&shader->shader.bc); 199 if (r) { 200 R600_ERR("building bytecode failed !\n"); 201 goto error; 202 } 203 } 204 205 if (dump && !sb_disasm) { 206 fprintf(stderr, "--------------------------------------------------------------\n"); 207 r600_bytecode_disasm(&shader->shader.bc); 208 fprintf(stderr, "______________________________________________________________\n"); 209 } else if ((dump && sb_disasm) || use_sb) { 210 r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, 211 dump, use_sb); 212 if (r) { 213 R600_ERR("r600_sb_bytecode_process failed !\n"); 214 goto error; 215 } 216 } 217 218 if (shader->gs_copy_shader) { 219 if (dump) { 220 // dump copy shader 221 r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, 222 &shader->gs_copy_shader->shader, dump, 0); 223 if (r) 224 goto error; 225 } 226 227 if ((r = store_shader(ctx, shader->gs_copy_shader))) 228 goto error; 229 } 230 231 /* Store the shader in a buffer. */ 232 if ((r = store_shader(ctx, shader))) 233 goto error; 234 235 /* Build state. */ 236 switch (shader->shader.processor_type) { 237 case PIPE_SHADER_TESS_CTRL: 238 evergreen_update_hs_state(ctx, shader); 239 break; 240 case PIPE_SHADER_TESS_EVAL: 241 if (key.tes.as_es) 242 evergreen_update_es_state(ctx, shader); 243 else 244 evergreen_update_vs_state(ctx, shader); 245 break; 246 case PIPE_SHADER_GEOMETRY: 247 if (rctx->b.chip_class >= EVERGREEN) { 248 evergreen_update_gs_state(ctx, shader); 249 evergreen_update_vs_state(ctx, shader->gs_copy_shader); 250 } else { 251 r600_update_gs_state(ctx, shader); 252 r600_update_vs_state(ctx, shader->gs_copy_shader); 253 } 254 break; 255 case PIPE_SHADER_VERTEX: 256 export_shader = key.vs.as_es; 257 if (rctx->b.chip_class >= EVERGREEN) { 258 if (key.vs.as_ls) 259 evergreen_update_ls_state(ctx, shader); 260 else if (key.vs.as_es) 261 evergreen_update_es_state(ctx, shader); 262 else 263 evergreen_update_vs_state(ctx, shader); 264 } else { 265 if (export_shader) 266 r600_update_es_state(ctx, shader); 267 else 268 r600_update_vs_state(ctx, shader); 269 } 270 break; 271 case PIPE_SHADER_FRAGMENT: 272 if (rctx->b.chip_class >= EVERGREEN) { 273 evergreen_update_ps_state(ctx, shader); 274 } else { 275 r600_update_ps_state(ctx, shader); 276 } 277 break; 278 default: 279 r = -EINVAL; 280 goto error; 281 } 282 return 0; 283 284error: 285 r600_pipe_shader_destroy(ctx, shader); 286 return r; 287} 288 289void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 290{ 291 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 292 r600_bytecode_clear(&shader->shader.bc); 293 r600_release_command_buffer(&shader->command_buffer); 294} 295 296/* 297 * tgsi -> r600 shader 298 */ 299struct r600_shader_tgsi_instruction; 300 301struct r600_shader_src { 302 unsigned sel; 303 unsigned swizzle[4]; 304 unsigned neg; 305 unsigned abs; 306 unsigned rel; 307 unsigned kc_bank; 308 boolean kc_rel; /* true if cache bank is indexed */ 309 uint32_t value[4]; 310}; 311 312struct eg_interp { 313 boolean enabled; 314 unsigned ij_index; 315}; 316 317struct r600_shader_ctx { 318 struct tgsi_shader_info info; 319 struct tgsi_parse_context parse; 320 const struct tgsi_token *tokens; 321 unsigned type; 322 unsigned file_offset[TGSI_FILE_COUNT]; 323 unsigned temp_reg; 324 const struct r600_shader_tgsi_instruction *inst_info; 325 struct r600_bytecode *bc; 326 struct r600_shader *shader; 327 struct r600_shader_src src[4]; 328 uint32_t *literals; 329 uint32_t nliterals; 330 uint32_t max_driver_temp_used; 331 /* needed for evergreen interpolation */ 332 struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid 333 /* evergreen/cayman also store sample mask in face register */ 334 int face_gpr; 335 /* sample id is .w component stored in fixed point position register */ 336 int fixed_pt_position_gpr; 337 int colors_used; 338 boolean clip_vertex_write; 339 unsigned cv_output; 340 unsigned edgeflag_output; 341 int fragcoord_input; 342 int native_integers; 343 int next_ring_offset; 344 int gs_out_ring_offset; 345 int gs_next_vertex; 346 struct r600_shader *gs_for_vs; 347 int gs_export_gpr_tregs[4]; 348 const struct pipe_stream_output_info *gs_stream_output_info; 349 unsigned enabled_stream_buffers_mask; 350 unsigned tess_input_info; /* temp with tess input offsets */ 351 unsigned tess_output_info; /* temp with tess input offsets */ 352}; 353 354struct r600_shader_tgsi_instruction { 355 unsigned op; 356 int (*process)(struct r600_shader_ctx *ctx); 357}; 358 359static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind); 360static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 361static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 362static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); 363static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 364static int tgsi_else(struct r600_shader_ctx *ctx); 365static int tgsi_endif(struct r600_shader_ctx *ctx); 366static int tgsi_bgnloop(struct r600_shader_ctx *ctx); 367static int tgsi_endloop(struct r600_shader_ctx *ctx); 368static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 369static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 370 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 371 unsigned int dst_reg); 372static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 373 const struct r600_shader_src *shader_src, 374 unsigned chan); 375static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 376 unsigned dst_reg); 377 378static int tgsi_last_instruction(unsigned writemask) 379{ 380 int i, lasti = 0; 381 382 for (i = 0; i < 4; i++) { 383 if (writemask & (1 << i)) { 384 lasti = i; 385 } 386 } 387 return lasti; 388} 389 390static int tgsi_is_supported(struct r600_shader_ctx *ctx) 391{ 392 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 393 unsigned j; 394 395 if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) { 396 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 397 return -EINVAL; 398 } 399 if (i->Instruction.Predicate) { 400 R600_ERR("predicate unsupported\n"); 401 return -EINVAL; 402 } 403#if 0 404 if (i->Instruction.Label) { 405 R600_ERR("label unsupported\n"); 406 return -EINVAL; 407 } 408#endif 409 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 410 if (i->Src[j].Register.Dimension) { 411 switch (i->Src[j].Register.File) { 412 case TGSI_FILE_CONSTANT: 413 break; 414 case TGSI_FILE_INPUT: 415 if (ctx->type == PIPE_SHADER_GEOMETRY || 416 ctx->type == PIPE_SHADER_TESS_CTRL || 417 ctx->type == PIPE_SHADER_TESS_EVAL) 418 break; 419 case TGSI_FILE_OUTPUT: 420 if (ctx->type == PIPE_SHADER_TESS_CTRL) 421 break; 422 default: 423 R600_ERR("unsupported src %d (file %d, dimension %d)\n", j, 424 i->Src[j].Register.File, 425 i->Src[j].Register.Dimension); 426 return -EINVAL; 427 } 428 } 429 } 430 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 431 if (i->Dst[j].Register.Dimension) { 432 if (ctx->type == PIPE_SHADER_TESS_CTRL) 433 continue; 434 R600_ERR("unsupported dst (dimension)\n"); 435 return -EINVAL; 436 } 437 } 438 return 0; 439} 440 441int eg_get_interpolator_index(unsigned interpolate, unsigned location) 442{ 443 if (interpolate == TGSI_INTERPOLATE_COLOR || 444 interpolate == TGSI_INTERPOLATE_LINEAR || 445 interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 446 { 447 int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR; 448 int loc; 449 450 switch(location) { 451 case TGSI_INTERPOLATE_LOC_CENTER: 452 loc = 1; 453 break; 454 case TGSI_INTERPOLATE_LOC_CENTROID: 455 loc = 2; 456 break; 457 case TGSI_INTERPOLATE_LOC_SAMPLE: 458 default: 459 loc = 0; break; 460 } 461 462 return is_linear * 3 + loc; 463 } 464 465 return -1; 466} 467 468static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, 469 int input) 470{ 471 int i = eg_get_interpolator_index( 472 ctx->shader->input[input].interpolate, 473 ctx->shader->input[input].interpolate_location); 474 assert(i >= 0); 475 ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index; 476} 477 478static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 479{ 480 int i, r; 481 struct r600_bytecode_alu alu; 482 int gpr = 0, base_chan = 0; 483 int ij_index = ctx->shader->input[input].ij_index; 484 485 /* work out gpr and base_chan from index */ 486 gpr = ij_index / 2; 487 base_chan = (2 * (ij_index % 2)) + 1; 488 489 for (i = 0; i < 8; i++) { 490 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 491 492 if (i < 4) 493 alu.op = ALU_OP2_INTERP_ZW; 494 else 495 alu.op = ALU_OP2_INTERP_XY; 496 497 if ((i > 1) && (i < 6)) { 498 alu.dst.sel = ctx->shader->input[input].gpr; 499 alu.dst.write = 1; 500 } 501 502 alu.dst.chan = i % 4; 503 504 alu.src[0].sel = gpr; 505 alu.src[0].chan = (base_chan - (i % 2)); 506 507 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 508 509 alu.bank_swizzle_force = SQ_ALU_VEC_210; 510 if ((i % 4) == 3) 511 alu.last = 1; 512 r = r600_bytecode_add_alu(ctx->bc, &alu); 513 if (r) 514 return r; 515 } 516 return 0; 517} 518 519static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 520{ 521 int i, r; 522 struct r600_bytecode_alu alu; 523 524 for (i = 0; i < 4; i++) { 525 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 526 527 alu.op = ALU_OP1_INTERP_LOAD_P0; 528 529 alu.dst.sel = ctx->shader->input[input].gpr; 530 alu.dst.write = 1; 531 532 alu.dst.chan = i; 533 534 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 535 alu.src[0].chan = i; 536 537 if (i == 3) 538 alu.last = 1; 539 r = r600_bytecode_add_alu(ctx->bc, &alu); 540 if (r) 541 return r; 542 } 543 return 0; 544} 545 546/* 547 * Special export handling in shaders 548 * 549 * shader export ARRAY_BASE for EXPORT_POS: 550 * 60 is position 551 * 61 is misc vector 552 * 62, 63 are clip distance vectors 553 * 554 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 555 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 556 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 557 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 558 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 559 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 560 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 561 * exclusive from render target index) 562 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 563 * 564 * 565 * shader export ARRAY_BASE for EXPORT_PIXEL: 566 * 0-7 CB targets 567 * 61 computed Z vector 568 * 569 * The use of the values exported in the computed Z vector are controlled 570 * by DB_SHADER_CONTROL: 571 * Z_EXPORT_ENABLE - Z as a float in RED 572 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 573 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 574 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 575 * DB_SOURCE_FORMAT - export control restrictions 576 * 577 */ 578 579 580/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 581static int r600_spi_sid(struct r600_shader_io * io) 582{ 583 int index, name = io->name; 584 585 /* These params are handled differently, they don't need 586 * semantic indices, so we'll use 0 for them. 587 */ 588 if (name == TGSI_SEMANTIC_POSITION || 589 name == TGSI_SEMANTIC_PSIZE || 590 name == TGSI_SEMANTIC_EDGEFLAG || 591 name == TGSI_SEMANTIC_FACE || 592 name == TGSI_SEMANTIC_SAMPLEMASK) 593 index = 0; 594 else { 595 if (name == TGSI_SEMANTIC_GENERIC) { 596 /* For generic params simply use sid from tgsi */ 597 index = io->sid; 598 } else { 599 /* For non-generic params - pack name and sid into 8 bits */ 600 index = 0x80 | (name<<3) | (io->sid); 601 } 602 603 /* Make sure that all really used indices have nonzero value, so 604 * we can just compare it to 0 later instead of comparing the name 605 * with different values to detect special cases. */ 606 index++; 607 } 608 609 return index; 610}; 611 612/* we need this to get a common lds index for vs/tcs/tes input/outputs */ 613int r600_get_lds_unique_index(unsigned semantic_name, unsigned index) 614{ 615 switch (semantic_name) { 616 case TGSI_SEMANTIC_POSITION: 617 return 0; 618 case TGSI_SEMANTIC_PSIZE: 619 return 1; 620 case TGSI_SEMANTIC_CLIPDIST: 621 assert(index <= 1); 622 return 2 + index; 623 case TGSI_SEMANTIC_GENERIC: 624 if (index <= 63-4) 625 return 4 + index - 9; 626 else 627 /* same explanation as in the default statement, 628 * the only user hitting this is st/nine. 629 */ 630 return 0; 631 632 /* patch indices are completely separate and thus start from 0 */ 633 case TGSI_SEMANTIC_TESSOUTER: 634 return 0; 635 case TGSI_SEMANTIC_TESSINNER: 636 return 1; 637 case TGSI_SEMANTIC_PATCH: 638 return 2 + index; 639 640 default: 641 /* Don't fail here. The result of this function is only used 642 * for LS, TCS, TES, and GS, where legacy GL semantics can't 643 * occur, but this function is called for all vertex shaders 644 * before it's known whether LS will be compiled or not. 645 */ 646 return 0; 647 } 648} 649 650/* turn input into interpolate on EG */ 651static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 652{ 653 int r = 0; 654 655 if (ctx->shader->input[index].spi_sid) { 656 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 657 if (ctx->shader->input[index].interpolate > 0) { 658 evergreen_interp_assign_ij_index(ctx, index); 659 r = evergreen_interp_alu(ctx, index); 660 } else { 661 r = evergreen_interp_flat(ctx, index); 662 } 663 } 664 return r; 665} 666 667static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 668{ 669 struct r600_bytecode_alu alu; 670 int i, r; 671 int gpr_front = ctx->shader->input[front].gpr; 672 int gpr_back = ctx->shader->input[back].gpr; 673 674 for (i = 0; i < 4; i++) { 675 memset(&alu, 0, sizeof(alu)); 676 alu.op = ALU_OP3_CNDGT; 677 alu.is_op3 = 1; 678 alu.dst.write = 1; 679 alu.dst.sel = gpr_front; 680 alu.src[0].sel = ctx->face_gpr; 681 alu.src[1].sel = gpr_front; 682 alu.src[2].sel = gpr_back; 683 684 alu.dst.chan = i; 685 alu.src[1].chan = i; 686 alu.src[2].chan = i; 687 alu.last = (i==3); 688 689 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 690 return r; 691 } 692 693 return 0; 694} 695 696/* execute a single slot ALU calculation */ 697static int single_alu_op2(struct r600_shader_ctx *ctx, int op, 698 int dst_sel, int dst_chan, 699 int src0_sel, unsigned src0_chan_val, 700 int src1_sel, unsigned src1_chan_val) 701{ 702 struct r600_bytecode_alu alu; 703 int r, i; 704 705 if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) { 706 for (i = 0; i < 4; i++) { 707 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 708 alu.op = op; 709 alu.src[0].sel = src0_sel; 710 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 711 alu.src[0].value = src0_chan_val; 712 else 713 alu.src[0].chan = src0_chan_val; 714 alu.src[1].sel = src1_sel; 715 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 716 alu.src[1].value = src1_chan_val; 717 else 718 alu.src[1].chan = src1_chan_val; 719 alu.dst.sel = dst_sel; 720 alu.dst.chan = i; 721 alu.dst.write = i == dst_chan; 722 alu.last = (i == 3); 723 r = r600_bytecode_add_alu(ctx->bc, &alu); 724 if (r) 725 return r; 726 } 727 return 0; 728 } 729 730 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 731 alu.op = op; 732 alu.src[0].sel = src0_sel; 733 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 734 alu.src[0].value = src0_chan_val; 735 else 736 alu.src[0].chan = src0_chan_val; 737 alu.src[1].sel = src1_sel; 738 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 739 alu.src[1].value = src1_chan_val; 740 else 741 alu.src[1].chan = src1_chan_val; 742 alu.dst.sel = dst_sel; 743 alu.dst.chan = dst_chan; 744 alu.dst.write = 1; 745 alu.last = 1; 746 r = r600_bytecode_add_alu(ctx->bc, &alu); 747 if (r) 748 return r; 749 return 0; 750} 751 752/* execute a single slot ALU calculation */ 753static int single_alu_op3(struct r600_shader_ctx *ctx, int op, 754 int dst_sel, int dst_chan, 755 int src0_sel, unsigned src0_chan_val, 756 int src1_sel, unsigned src1_chan_val, 757 int src2_sel, unsigned src2_chan_val) 758{ 759 struct r600_bytecode_alu alu; 760 int r; 761 762 /* validate this for other ops */ 763 assert(op == ALU_OP3_MULADD_UINT24); 764 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 765 alu.op = op; 766 alu.src[0].sel = src0_sel; 767 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 768 alu.src[0].value = src0_chan_val; 769 else 770 alu.src[0].chan = src0_chan_val; 771 alu.src[1].sel = src1_sel; 772 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 773 alu.src[1].value = src1_chan_val; 774 else 775 alu.src[1].chan = src1_chan_val; 776 alu.src[2].sel = src2_sel; 777 if (src2_sel == V_SQ_ALU_SRC_LITERAL) 778 alu.src[2].value = src2_chan_val; 779 else 780 alu.src[2].chan = src2_chan_val; 781 alu.dst.sel = dst_sel; 782 alu.dst.chan = dst_chan; 783 alu.is_op3 = 1; 784 alu.last = 1; 785 r = r600_bytecode_add_alu(ctx->bc, &alu); 786 if (r) 787 return r; 788 return 0; 789} 790 791/* put it in temp_reg.x */ 792static int get_lds_offset0(struct r600_shader_ctx *ctx, 793 int rel_patch_chan, 794 int temp_reg, bool is_patch_var) 795{ 796 int r; 797 798 /* MUL temp.x, patch_stride (input_vals.x), rel_patch_id (r0.y (tcs)) */ 799 /* ADD 800 Dimension - patch0_offset (input_vals.z), 801 Non-dim - patch0_data_offset (input_vals.w) 802 */ 803 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 804 temp_reg, 0, 805 ctx->tess_output_info, 0, 806 0, rel_patch_chan, 807 ctx->tess_output_info, is_patch_var ? 3 : 2); 808 if (r) 809 return r; 810 return 0; 811} 812 813static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index) 814{ 815 return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg; 816} 817 818static int r600_get_temp(struct r600_shader_ctx *ctx) 819{ 820 return ctx->temp_reg + ctx->max_driver_temp_used++; 821} 822 823static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid) 824{ 825 int i; 826 i = ctx->shader->noutput++; 827 ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID; 828 ctx->shader->output[i].sid = 0; 829 ctx->shader->output[i].gpr = 0; 830 ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT; 831 ctx->shader->output[i].write_mask = 0x4; 832 ctx->shader->output[i].spi_sid = prim_id_sid; 833 834 return 0; 835} 836 837static int tgsi_barrier(struct r600_shader_ctx *ctx) 838{ 839 struct r600_bytecode_alu alu; 840 int r; 841 842 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 843 alu.op = ctx->inst_info->op; 844 alu.last = 1; 845 846 r = r600_bytecode_add_alu(ctx->bc, &alu); 847 if (r) 848 return r; 849 return 0; 850} 851 852static int tgsi_declaration(struct r600_shader_ctx *ctx) 853{ 854 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 855 int r, i, j, count = d->Range.Last - d->Range.First + 1; 856 857 switch (d->Declaration.File) { 858 case TGSI_FILE_INPUT: 859 for (j = 0; j < count; j++) { 860 i = ctx->shader->ninput + j; 861 assert(i < Elements(ctx->shader->input)); 862 ctx->shader->input[i].name = d->Semantic.Name; 863 ctx->shader->input[i].sid = d->Semantic.Index + j; 864 ctx->shader->input[i].interpolate = d->Interp.Interpolate; 865 ctx->shader->input[i].interpolate_location = d->Interp.Location; 866 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j; 867 if (ctx->type == PIPE_SHADER_FRAGMENT) { 868 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 869 switch (ctx->shader->input[i].name) { 870 case TGSI_SEMANTIC_FACE: 871 if (ctx->face_gpr != -1) 872 ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ 873 else 874 ctx->face_gpr = ctx->shader->input[i].gpr; 875 break; 876 case TGSI_SEMANTIC_COLOR: 877 ctx->colors_used++; 878 break; 879 case TGSI_SEMANTIC_POSITION: 880 ctx->fragcoord_input = i; 881 break; 882 case TGSI_SEMANTIC_PRIMID: 883 /* set this for now */ 884 ctx->shader->gs_prim_id_input = true; 885 ctx->shader->ps_prim_id_input = i; 886 break; 887 } 888 if (ctx->bc->chip_class >= EVERGREEN) { 889 if ((r = evergreen_interp_input(ctx, i))) 890 return r; 891 } 892 } else if (ctx->type == PIPE_SHADER_GEOMETRY) { 893 /* FIXME probably skip inputs if they aren't passed in the ring */ 894 ctx->shader->input[i].ring_offset = ctx->next_ring_offset; 895 ctx->next_ring_offset += 16; 896 if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) 897 ctx->shader->gs_prim_id_input = true; 898 } 899 } 900 ctx->shader->ninput += count; 901 break; 902 case TGSI_FILE_OUTPUT: 903 for (j = 0; j < count; j++) { 904 i = ctx->shader->noutput + j; 905 assert(i < Elements(ctx->shader->output)); 906 ctx->shader->output[i].name = d->Semantic.Name; 907 ctx->shader->output[i].sid = d->Semantic.Index + j; 908 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j; 909 ctx->shader->output[i].interpolate = d->Interp.Interpolate; 910 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 911 if (ctx->type == PIPE_SHADER_VERTEX || 912 ctx->type == PIPE_SHADER_GEOMETRY || 913 ctx->type == PIPE_SHADER_TESS_EVAL) { 914 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 915 switch (d->Semantic.Name) { 916 case TGSI_SEMANTIC_CLIPDIST: 917 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << 918 ((d->Semantic.Index + j) << 2); 919 break; 920 case TGSI_SEMANTIC_PSIZE: 921 ctx->shader->vs_out_misc_write = 1; 922 ctx->shader->vs_out_point_size = 1; 923 break; 924 case TGSI_SEMANTIC_EDGEFLAG: 925 ctx->shader->vs_out_misc_write = 1; 926 ctx->shader->vs_out_edgeflag = 1; 927 ctx->edgeflag_output = i; 928 break; 929 case TGSI_SEMANTIC_VIEWPORT_INDEX: 930 ctx->shader->vs_out_misc_write = 1; 931 ctx->shader->vs_out_viewport = 1; 932 break; 933 case TGSI_SEMANTIC_LAYER: 934 ctx->shader->vs_out_misc_write = 1; 935 ctx->shader->vs_out_layer = 1; 936 break; 937 case TGSI_SEMANTIC_CLIPVERTEX: 938 ctx->clip_vertex_write = TRUE; 939 ctx->cv_output = i; 940 break; 941 } 942 if (ctx->type == PIPE_SHADER_GEOMETRY) { 943 ctx->gs_out_ring_offset += 16; 944 } 945 } else if (ctx->type == PIPE_SHADER_FRAGMENT) { 946 switch (d->Semantic.Name) { 947 case TGSI_SEMANTIC_COLOR: 948 ctx->shader->nr_ps_max_color_exports++; 949 break; 950 } 951 } 952 } 953 ctx->shader->noutput += count; 954 break; 955 case TGSI_FILE_TEMPORARY: 956 if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 957 if (d->Array.ArrayID) { 958 r600_add_gpr_array(ctx->shader, 959 ctx->file_offset[TGSI_FILE_TEMPORARY] + 960 d->Range.First, 961 d->Range.Last - d->Range.First + 1, 0x0F); 962 } 963 } 964 break; 965 966 case TGSI_FILE_CONSTANT: 967 case TGSI_FILE_SAMPLER: 968 case TGSI_FILE_SAMPLER_VIEW: 969 case TGSI_FILE_ADDRESS: 970 break; 971 972 case TGSI_FILE_SYSTEM_VALUE: 973 if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK || 974 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID || 975 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) { 976 break; /* Already handled from allocate_system_value_inputs */ 977 } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 978 if (!ctx->native_integers) { 979 struct r600_bytecode_alu alu; 980 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 981 982 alu.op = ALU_OP1_INT_TO_FLT; 983 alu.src[0].sel = 0; 984 alu.src[0].chan = 3; 985 986 alu.dst.sel = 0; 987 alu.dst.chan = 3; 988 alu.dst.write = 1; 989 alu.last = 1; 990 991 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 992 return r; 993 } 994 break; 995 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 996 break; 997 else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID) 998 break; 999 else if (d->Semantic.Name == TGSI_SEMANTIC_TESSINNER || 1000 d->Semantic.Name == TGSI_SEMANTIC_TESSOUTER) { 1001 int param = r600_get_lds_unique_index(d->Semantic.Name, 0); 1002 int dreg = d->Semantic.Name == TGSI_SEMANTIC_TESSINNER ? 3 : 2; 1003 unsigned temp_reg = r600_get_temp(ctx); 1004 1005 r = get_lds_offset0(ctx, 2, temp_reg, true); 1006 if (r) 1007 return r; 1008 1009 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1010 temp_reg, 0, 1011 temp_reg, 0, 1012 V_SQ_ALU_SRC_LITERAL, param * 16); 1013 if (r) 1014 return r; 1015 1016 do_lds_fetch_values(ctx, temp_reg, dreg); 1017 } 1018 else if (d->Semantic.Name == TGSI_SEMANTIC_TESSCOORD) { 1019 /* MOV r1.x, r0.x; 1020 MOV r1.y, r0.y; 1021 */ 1022 for (i = 0; i < 2; i++) { 1023 struct r600_bytecode_alu alu; 1024 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1025 alu.op = ALU_OP1_MOV; 1026 alu.src[0].sel = 0; 1027 alu.src[0].chan = 0 + i; 1028 alu.dst.sel = 1; 1029 alu.dst.chan = 0 + i; 1030 alu.dst.write = 1; 1031 alu.last = (i == 1) ? 1 : 0; 1032 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1033 return r; 1034 } 1035 /* ADD r1.z, 1.0f, -r0.x */ 1036 struct r600_bytecode_alu alu; 1037 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1038 alu.op = ALU_OP2_ADD; 1039 alu.src[0].sel = V_SQ_ALU_SRC_1; 1040 alu.src[1].sel = 1; 1041 alu.src[1].chan = 0; 1042 alu.src[1].neg = 1; 1043 alu.dst.sel = 1; 1044 alu.dst.chan = 2; 1045 alu.dst.write = 1; 1046 alu.last = 1; 1047 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1048 return r; 1049 1050 /* ADD r1.z, r1.z, -r1.y */ 1051 alu.op = ALU_OP2_ADD; 1052 alu.src[0].sel = 1; 1053 alu.src[0].chan = 2; 1054 alu.src[1].sel = 1; 1055 alu.src[1].chan = 1; 1056 alu.src[1].neg = 1; 1057 alu.dst.sel = 1; 1058 alu.dst.chan = 2; 1059 alu.dst.write = 1; 1060 alu.last = 1; 1061 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1062 return r; 1063 break; 1064 } 1065 break; 1066 default: 1067 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 1068 return -EINVAL; 1069 } 1070 return 0; 1071} 1072 1073static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset) 1074{ 1075 struct tgsi_parse_context parse; 1076 struct { 1077 boolean enabled; 1078 int *reg; 1079 unsigned name, alternate_name; 1080 } inputs[2] = { 1081 { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */ 1082 1083 { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */ 1084 }; 1085 int i, k, num_regs = 0; 1086 1087 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 1088 return 0; 1089 } 1090 1091 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 1092 while (!tgsi_parse_end_of_tokens(&parse)) { 1093 tgsi_parse_token(&parse); 1094 1095 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 1096 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 1097 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 1098 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 1099 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 1100 { 1101 int interpolate, location, k; 1102 1103 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 1104 location = TGSI_INTERPOLATE_LOC_CENTER; 1105 inputs[1].enabled = true; /* needs SAMPLEID */ 1106 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 1107 location = TGSI_INTERPOLATE_LOC_CENTER; 1108 /* Needs sample positions, currently those are always available */ 1109 } else { 1110 location = TGSI_INTERPOLATE_LOC_CENTROID; 1111 } 1112 1113 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 1114 k = eg_get_interpolator_index(interpolate, location); 1115 ctx->eg_interpolators[k].enabled = true; 1116 } 1117 } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { 1118 struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration; 1119 if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1120 for (k = 0; k < Elements(inputs); k++) { 1121 if (d->Semantic.Name == inputs[k].name || 1122 d->Semantic.Name == inputs[k].alternate_name) { 1123 inputs[k].enabled = true; 1124 } 1125 } 1126 } 1127 } 1128 } 1129 1130 tgsi_parse_free(&parse); 1131 1132 for (i = 0; i < Elements(inputs); i++) { 1133 boolean enabled = inputs[i].enabled; 1134 int *reg = inputs[i].reg; 1135 unsigned name = inputs[i].name; 1136 1137 if (enabled) { 1138 int gpr = gpr_offset + num_regs++; 1139 1140 // add to inputs, allocate a gpr 1141 k = ctx->shader->ninput ++; 1142 ctx->shader->input[k].name = name; 1143 ctx->shader->input[k].sid = 0; 1144 ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT; 1145 ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER; 1146 *reg = ctx->shader->input[k].gpr = gpr; 1147 } 1148 } 1149 1150 return gpr_offset + num_regs; 1151} 1152 1153/* 1154 * for evergreen we need to scan the shader to find the number of GPRs we need to 1155 * reserve for interpolation and system values 1156 * 1157 * we need to know if we are going to emit 1158 * any sample or centroid inputs 1159 * if perspective and linear are required 1160*/ 1161static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 1162{ 1163 unsigned i; 1164 int num_baryc; 1165 struct tgsi_parse_context parse; 1166 1167 memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators)); 1168 1169 for (i = 0; i < ctx->info.num_inputs; i++) { 1170 int k; 1171 /* skip position/face/mask/sampleid */ 1172 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 1173 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE || 1174 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK || 1175 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID) 1176 continue; 1177 1178 k = eg_get_interpolator_index( 1179 ctx->info.input_interpolate[i], 1180 ctx->info.input_interpolate_loc[i]); 1181 if (k >= 0) 1182 ctx->eg_interpolators[k].enabled = TRUE; 1183 } 1184 1185 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 1186 return 0; 1187 } 1188 1189 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 1190 while (!tgsi_parse_end_of_tokens(&parse)) { 1191 tgsi_parse_token(&parse); 1192 1193 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 1194 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 1195 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 1196 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 1197 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 1198 { 1199 int interpolate, location, k; 1200 1201 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 1202 location = TGSI_INTERPOLATE_LOC_CENTER; 1203 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 1204 location = TGSI_INTERPOLATE_LOC_CENTER; 1205 } else { 1206 location = TGSI_INTERPOLATE_LOC_CENTROID; 1207 } 1208 1209 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 1210 k = eg_get_interpolator_index(interpolate, location); 1211 ctx->eg_interpolators[k].enabled = true; 1212 } 1213 } 1214 } 1215 1216 tgsi_parse_free(&parse); 1217 1218 /* assign gpr to each interpolator according to priority */ 1219 num_baryc = 0; 1220 for (i = 0; i < Elements(ctx->eg_interpolators); i++) { 1221 if (ctx->eg_interpolators[i].enabled) { 1222 ctx->eg_interpolators[i].ij_index = num_baryc; 1223 num_baryc ++; 1224 } 1225 } 1226 1227 /* XXX PULL MODEL and LINE STIPPLE */ 1228 1229 num_baryc = (num_baryc + 1) >> 1; 1230 return allocate_system_value_inputs(ctx, num_baryc); 1231} 1232 1233/* sample_id_sel == NULL means fetch for current sample */ 1234static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel) 1235{ 1236 struct r600_bytecode_vtx vtx; 1237 int r, t1; 1238 1239 assert(ctx->fixed_pt_position_gpr != -1); 1240 1241 t1 = r600_get_temp(ctx); 1242 1243 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 1244 vtx.op = FETCH_OP_VFETCH; 1245 vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 1246 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1247 if (sample_id == NULL) { 1248 vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; 1249 vtx.src_sel_x = 3; 1250 } 1251 else { 1252 struct r600_bytecode_alu alu; 1253 1254 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1255 alu.op = ALU_OP1_MOV; 1256 r600_bytecode_src(&alu.src[0], sample_id, chan_sel); 1257 alu.dst.sel = t1; 1258 alu.dst.write = 1; 1259 alu.last = 1; 1260 r = r600_bytecode_add_alu(ctx->bc, &alu); 1261 if (r) 1262 return r; 1263 1264 vtx.src_gpr = t1; 1265 vtx.src_sel_x = 0; 1266 } 1267 vtx.mega_fetch_count = 16; 1268 vtx.dst_gpr = t1; 1269 vtx.dst_sel_x = 0; 1270 vtx.dst_sel_y = 1; 1271 vtx.dst_sel_z = 2; 1272 vtx.dst_sel_w = 3; 1273 vtx.data_format = FMT_32_32_32_32_FLOAT; 1274 vtx.num_format_all = 2; 1275 vtx.format_comp_all = 1; 1276 vtx.use_const_fields = 0; 1277 vtx.offset = 1; // first element is size of buffer 1278 vtx.endian = r600_endian_swap(32); 1279 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 1280 1281 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 1282 if (r) 1283 return r; 1284 1285 return t1; 1286} 1287 1288static void tgsi_src(struct r600_shader_ctx *ctx, 1289 const struct tgsi_full_src_register *tgsi_src, 1290 struct r600_shader_src *r600_src) 1291{ 1292 memset(r600_src, 0, sizeof(*r600_src)); 1293 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 1294 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 1295 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 1296 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 1297 r600_src->neg = tgsi_src->Register.Negate; 1298 r600_src->abs = tgsi_src->Register.Absolute; 1299 1300 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 1301 int index; 1302 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 1303 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 1304 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 1305 1306 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 1307 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs); 1308 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 1309 return; 1310 } 1311 index = tgsi_src->Register.Index; 1312 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 1313 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 1314 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 1315 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) { 1316 r600_src->swizzle[0] = 2; // Z value 1317 r600_src->swizzle[1] = 2; 1318 r600_src->swizzle[2] = 2; 1319 r600_src->swizzle[3] = 2; 1320 r600_src->sel = ctx->face_gpr; 1321 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) { 1322 r600_src->swizzle[0] = 3; // W value 1323 r600_src->swizzle[1] = 3; 1324 r600_src->swizzle[2] = 3; 1325 r600_src->swizzle[3] = 3; 1326 r600_src->sel = ctx->fixed_pt_position_gpr; 1327 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) { 1328 r600_src->swizzle[0] = 0; 1329 r600_src->swizzle[1] = 1; 1330 r600_src->swizzle[2] = 4; 1331 r600_src->swizzle[3] = 4; 1332 r600_src->sel = load_sample_position(ctx, NULL, -1); 1333 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 1334 r600_src->swizzle[0] = 3; 1335 r600_src->swizzle[1] = 3; 1336 r600_src->swizzle[2] = 3; 1337 r600_src->swizzle[3] = 3; 1338 r600_src->sel = 0; 1339 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 1340 r600_src->swizzle[0] = 0; 1341 r600_src->swizzle[1] = 0; 1342 r600_src->swizzle[2] = 0; 1343 r600_src->swizzle[3] = 0; 1344 r600_src->sel = 0; 1345 } else if (ctx->type != PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1346 r600_src->swizzle[0] = 3; 1347 r600_src->swizzle[1] = 3; 1348 r600_src->swizzle[2] = 3; 1349 r600_src->swizzle[3] = 3; 1350 r600_src->sel = 1; 1351 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1352 r600_src->swizzle[0] = 2; 1353 r600_src->swizzle[1] = 2; 1354 r600_src->swizzle[2] = 2; 1355 r600_src->swizzle[3] = 2; 1356 r600_src->sel = 0; 1357 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSCOORD) { 1358 r600_src->sel = 1; 1359 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSINNER) { 1360 r600_src->sel = 3; 1361 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSOUTER) { 1362 r600_src->sel = 2; 1363 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTICESIN) { 1364 if (ctx->type == PIPE_SHADER_TESS_CTRL) { 1365 r600_src->sel = ctx->tess_input_info; 1366 r600_src->swizzle[0] = 2; 1367 r600_src->swizzle[1] = 2; 1368 r600_src->swizzle[2] = 2; 1369 r600_src->swizzle[3] = 2; 1370 } else { 1371 r600_src->sel = ctx->tess_input_info; 1372 r600_src->swizzle[0] = 3; 1373 r600_src->swizzle[1] = 3; 1374 r600_src->swizzle[2] = 3; 1375 r600_src->swizzle[3] = 3; 1376 } 1377 } else if (ctx->type == PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1378 r600_src->sel = 0; 1379 r600_src->swizzle[0] = 0; 1380 r600_src->swizzle[1] = 0; 1381 r600_src->swizzle[2] = 0; 1382 r600_src->swizzle[3] = 0; 1383 } else if (ctx->type == PIPE_SHADER_TESS_EVAL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1384 r600_src->sel = 0; 1385 r600_src->swizzle[0] = 3; 1386 r600_src->swizzle[1] = 3; 1387 r600_src->swizzle[2] = 3; 1388 r600_src->swizzle[3] = 3; 1389 } 1390 } else { 1391 if (tgsi_src->Register.Indirect) 1392 r600_src->rel = V_SQ_REL_RELATIVE; 1393 r600_src->sel = tgsi_src->Register.Index; 1394 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 1395 } 1396 if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { 1397 if (tgsi_src->Register.Dimension) { 1398 r600_src->kc_bank = tgsi_src->Dimension.Index; 1399 if (tgsi_src->Dimension.Indirect) { 1400 r600_src->kc_rel = 1; 1401 } 1402 } 1403 } 1404} 1405 1406static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 1407 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 1408 unsigned int dst_reg) 1409{ 1410 struct r600_bytecode_vtx vtx; 1411 unsigned int ar_reg; 1412 int r; 1413 1414 if (offset) { 1415 struct r600_bytecode_alu alu; 1416 1417 memset(&alu, 0, sizeof(alu)); 1418 1419 alu.op = ALU_OP2_ADD_INT; 1420 alu.src[0].sel = ctx->bc->ar_reg; 1421 alu.src[0].chan = ar_chan; 1422 1423 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1424 alu.src[1].value = offset; 1425 1426 alu.dst.sel = dst_reg; 1427 alu.dst.chan = ar_chan; 1428 alu.dst.write = 1; 1429 alu.last = 1; 1430 1431 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1432 return r; 1433 1434 ar_reg = dst_reg; 1435 } else { 1436 ar_reg = ctx->bc->ar_reg; 1437 } 1438 1439 memset(&vtx, 0, sizeof(vtx)); 1440 vtx.buffer_id = cb_idx; 1441 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1442 vtx.src_gpr = ar_reg; 1443 vtx.src_sel_x = ar_chan; 1444 vtx.mega_fetch_count = 16; 1445 vtx.dst_gpr = dst_reg; 1446 vtx.dst_sel_x = 0; /* SEL_X */ 1447 vtx.dst_sel_y = 1; /* SEL_Y */ 1448 vtx.dst_sel_z = 2; /* SEL_Z */ 1449 vtx.dst_sel_w = 3; /* SEL_W */ 1450 vtx.data_format = FMT_32_32_32_32_FLOAT; 1451 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 1452 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 1453 vtx.endian = r600_endian_swap(32); 1454 vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE; 1455 1456 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1457 return r; 1458 1459 return 0; 1460} 1461 1462static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1463{ 1464 struct r600_bytecode_vtx vtx; 1465 int r; 1466 unsigned index = src->Register.Index; 1467 unsigned vtx_id = src->Dimension.Index; 1468 int offset_reg = vtx_id / 3; 1469 int offset_chan = vtx_id % 3; 1470 int t2 = 0; 1471 1472 /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, 1473 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ 1474 1475 if (offset_reg == 0 && offset_chan == 2) 1476 offset_chan = 3; 1477 1478 if (src->Dimension.Indirect || src->Register.Indirect) 1479 t2 = r600_get_temp(ctx); 1480 1481 if (src->Dimension.Indirect) { 1482 int treg[3]; 1483 struct r600_bytecode_alu alu; 1484 int r, i; 1485 unsigned addr_reg; 1486 addr_reg = get_address_file_reg(ctx, src->DimIndirect.Index); 1487 if (src->DimIndirect.Index > 0) { 1488 r = single_alu_op2(ctx, ALU_OP1_MOV, 1489 ctx->bc->ar_reg, 0, 1490 addr_reg, 0, 1491 0, 0); 1492 if (r) 1493 return r; 1494 } 1495 /* 1496 we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt. 1497 at least this is what fglrx seems to do. */ 1498 for (i = 0; i < 3; i++) { 1499 treg[i] = r600_get_temp(ctx); 1500 } 1501 r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F); 1502 1503 for (i = 0; i < 3; i++) { 1504 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1505 alu.op = ALU_OP1_MOV; 1506 alu.src[0].sel = 0; 1507 alu.src[0].chan = i == 2 ? 3 : i; 1508 alu.dst.sel = treg[i]; 1509 alu.dst.chan = 0; 1510 alu.dst.write = 1; 1511 alu.last = 1; 1512 r = r600_bytecode_add_alu(ctx->bc, &alu); 1513 if (r) 1514 return r; 1515 } 1516 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1517 alu.op = ALU_OP1_MOV; 1518 alu.src[0].sel = treg[0]; 1519 alu.src[0].rel = 1; 1520 alu.dst.sel = t2; 1521 alu.dst.write = 1; 1522 alu.last = 1; 1523 r = r600_bytecode_add_alu(ctx->bc, &alu); 1524 if (r) 1525 return r; 1526 offset_reg = t2; 1527 offset_chan = 0; 1528 } 1529 1530 if (src->Register.Indirect) { 1531 int addr_reg; 1532 unsigned first = ctx->info.input_array_first[src->Indirect.ArrayID]; 1533 1534 addr_reg = get_address_file_reg(ctx, src->Indirect.Index); 1535 1536 /* pull the value from index_reg */ 1537 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1538 t2, 1, 1539 addr_reg, 0, 1540 V_SQ_ALU_SRC_LITERAL, first); 1541 if (r) 1542 return r; 1543 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1544 t2, 0, 1545 t2, 1, 1546 V_SQ_ALU_SRC_LITERAL, 4, 1547 offset_reg, offset_chan); 1548 if (r) 1549 return r; 1550 offset_reg = t2; 1551 offset_chan = 0; 1552 index = src->Register.Index - first; 1553 } 1554 1555 memset(&vtx, 0, sizeof(vtx)); 1556 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1557 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1558 vtx.src_gpr = offset_reg; 1559 vtx.src_sel_x = offset_chan; 1560 vtx.offset = index * 16; /*bytes*/ 1561 vtx.mega_fetch_count = 16; 1562 vtx.dst_gpr = dst_reg; 1563 vtx.dst_sel_x = 0; /* SEL_X */ 1564 vtx.dst_sel_y = 1; /* SEL_Y */ 1565 vtx.dst_sel_z = 2; /* SEL_Z */ 1566 vtx.dst_sel_w = 3; /* SEL_W */ 1567 if (ctx->bc->chip_class >= EVERGREEN) { 1568 vtx.use_const_fields = 1; 1569 } else { 1570 vtx.data_format = FMT_32_32_32_32_FLOAT; 1571 } 1572 1573 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1574 return r; 1575 1576 return 0; 1577} 1578 1579static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) 1580{ 1581 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1582 unsigned i; 1583 1584 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1585 struct tgsi_full_src_register *src = &inst->Src[i]; 1586 1587 if (src->Register.File == TGSI_FILE_INPUT) { 1588 if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) { 1589 /* primitive id is in R0.z */ 1590 ctx->src[i].sel = 0; 1591 ctx->src[i].swizzle[0] = 2; 1592 } 1593 } 1594 if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { 1595 int treg = r600_get_temp(ctx); 1596 1597 fetch_gs_input(ctx, src, treg); 1598 ctx->src[i].sel = treg; 1599 ctx->src[i].rel = 0; 1600 } 1601 } 1602 return 0; 1603} 1604 1605 1606/* Tessellation shaders pass outputs to the next shader using LDS. 1607 * 1608 * LS outputs = TCS(HS) inputs 1609 * TCS(HS) outputs = TES(DS) inputs 1610 * 1611 * The LDS layout is: 1612 * - TCS inputs for patch 0 1613 * - TCS inputs for patch 1 1614 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) 1615 * - ... 1616 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset 1617 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset 1618 * - TCS outputs for patch 1 1619 * - Per-patch TCS outputs for patch 1 1620 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) 1621 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) 1622 * - ... 1623 * 1624 * All three shaders VS(LS), TCS, TES share the same LDS space. 1625 */ 1626/* this will return with the dw address in temp_reg.x */ 1627static int r600_get_byte_address(struct r600_shader_ctx *ctx, int temp_reg, 1628 const struct tgsi_full_dst_register *dst, 1629 const struct tgsi_full_src_register *src, 1630 int stride_bytes_reg, int stride_bytes_chan) 1631{ 1632 struct tgsi_full_dst_register reg; 1633 ubyte *name, *index, *array_first; 1634 int r; 1635 int param; 1636 struct tgsi_shader_info *info = &ctx->info; 1637 /* Set the register description. The address computation is the same 1638 * for sources and destinations. */ 1639 if (src) { 1640 reg.Register.File = src->Register.File; 1641 reg.Register.Index = src->Register.Index; 1642 reg.Register.Indirect = src->Register.Indirect; 1643 reg.Register.Dimension = src->Register.Dimension; 1644 reg.Indirect = src->Indirect; 1645 reg.Dimension = src->Dimension; 1646 reg.DimIndirect = src->DimIndirect; 1647 } else 1648 reg = *dst; 1649 1650 /* If the register is 2-dimensional (e.g. an array of vertices 1651 * in a primitive), calculate the base address of the vertex. */ 1652 if (reg.Register.Dimension) { 1653 int sel, chan; 1654 if (reg.Dimension.Indirect) { 1655 unsigned addr_reg; 1656 assert (reg.DimIndirect.File == TGSI_FILE_ADDRESS); 1657 1658 addr_reg = get_address_file_reg(ctx, reg.DimIndirect.Index); 1659 /* pull the value from index_reg */ 1660 sel = addr_reg; 1661 chan = 0; 1662 } else { 1663 sel = V_SQ_ALU_SRC_LITERAL; 1664 chan = reg.Dimension.Index; 1665 } 1666 1667 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1668 temp_reg, 0, 1669 stride_bytes_reg, stride_bytes_chan, 1670 sel, chan, 1671 temp_reg, 0); 1672 if (r) 1673 return r; 1674 } 1675 1676 if (reg.Register.File == TGSI_FILE_INPUT) { 1677 name = info->input_semantic_name; 1678 index = info->input_semantic_index; 1679 array_first = info->input_array_first; 1680 } else if (reg.Register.File == TGSI_FILE_OUTPUT) { 1681 name = info->output_semantic_name; 1682 index = info->output_semantic_index; 1683 array_first = info->output_array_first; 1684 } else { 1685 assert(0); 1686 return -1; 1687 } 1688 if (reg.Register.Indirect) { 1689 int addr_reg; 1690 int first; 1691 /* Add the relative address of the element. */ 1692 if (reg.Indirect.ArrayID) 1693 first = array_first[reg.Indirect.ArrayID]; 1694 else 1695 first = reg.Register.Index; 1696 1697 addr_reg = get_address_file_reg(ctx, reg.Indirect.Index); 1698 1699 /* pull the value from index_reg */ 1700 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1701 temp_reg, 0, 1702 V_SQ_ALU_SRC_LITERAL, 16, 1703 addr_reg, 0, 1704 temp_reg, 0); 1705 if (r) 1706 return r; 1707 1708 param = r600_get_lds_unique_index(name[first], 1709 index[first]); 1710 1711 } else { 1712 param = r600_get_lds_unique_index(name[reg.Register.Index], 1713 index[reg.Register.Index]); 1714 } 1715 1716 /* add to base_addr - passed in temp_reg.x */ 1717 if (param) { 1718 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1719 temp_reg, 0, 1720 temp_reg, 0, 1721 V_SQ_ALU_SRC_LITERAL, param * 16); 1722 if (r) 1723 return r; 1724 1725 } 1726 return 0; 1727} 1728 1729static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 1730 unsigned dst_reg) 1731{ 1732 struct r600_bytecode_alu alu; 1733 int r, i; 1734 1735 if ((ctx->bc->cf_last->ndw>>1) >= 0x60) 1736 ctx->bc->force_add_cf = 1; 1737 for (i = 1; i < 4; i++) { 1738 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1739 temp_reg, i, 1740 temp_reg, 0, 1741 V_SQ_ALU_SRC_LITERAL, 4 * i); 1742 if (r) 1743 return r; 1744 } 1745 for (i = 0; i < 4; i++) { 1746 /* emit an LDS_READ_RET */ 1747 memset(&alu, 0, sizeof(alu)); 1748 alu.op = LDS_OP1_LDS_READ_RET; 1749 alu.src[0].sel = temp_reg; 1750 alu.src[0].chan = i; 1751 alu.src[1].sel = V_SQ_ALU_SRC_0; 1752 alu.src[2].sel = V_SQ_ALU_SRC_0; 1753 alu.dst.chan = 0; 1754 alu.is_lds_idx_op = true; 1755 alu.last = 1; 1756 r = r600_bytecode_add_alu(ctx->bc, &alu); 1757 if (r) 1758 return r; 1759 } 1760 for (i = 0; i < 4; i++) { 1761 /* then read from LDS_OQ_A_POP */ 1762 memset(&alu, 0, sizeof(alu)); 1763 1764 alu.op = ALU_OP1_MOV; 1765 alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; 1766 alu.src[0].chan = 0; 1767 alu.dst.sel = dst_reg; 1768 alu.dst.chan = i; 1769 alu.dst.write = 1; 1770 alu.last = 1; 1771 r = r600_bytecode_add_alu(ctx->bc, &alu); 1772 if (r) 1773 return r; 1774 } 1775 return 0; 1776} 1777 1778static int fetch_tes_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1779{ 1780 int r; 1781 unsigned temp_reg = r600_get_temp(ctx); 1782 1783 r = get_lds_offset0(ctx, 2, temp_reg, 1784 src->Register.Dimension ? false : true); 1785 if (r) 1786 return r; 1787 1788 /* the base address is now in temp.x */ 1789 r = r600_get_byte_address(ctx, temp_reg, 1790 NULL, src, ctx->tess_output_info, 1); 1791 if (r) 1792 return r; 1793 1794 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1795 if (r) 1796 return r; 1797 return 0; 1798} 1799 1800static int fetch_tcs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1801{ 1802 int r; 1803 unsigned temp_reg = r600_get_temp(ctx); 1804 1805 /* t.x = ips * r0.y */ 1806 r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 1807 temp_reg, 0, 1808 ctx->tess_input_info, 0, 1809 0, 1); 1810 1811 if (r) 1812 return r; 1813 1814 /* the base address is now in temp.x */ 1815 r = r600_get_byte_address(ctx, temp_reg, 1816 NULL, src, ctx->tess_input_info, 1); 1817 if (r) 1818 return r; 1819 1820 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1821 if (r) 1822 return r; 1823 return 0; 1824} 1825 1826static int fetch_tcs_output(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1827{ 1828 int r; 1829 unsigned temp_reg = r600_get_temp(ctx); 1830 1831 r = get_lds_offset0(ctx, 1, temp_reg, 1832 src->Register.Dimension ? false : true); 1833 if (r) 1834 return r; 1835 /* the base address is now in temp.x */ 1836 r = r600_get_byte_address(ctx, temp_reg, 1837 NULL, src, 1838 ctx->tess_output_info, 1); 1839 if (r) 1840 return r; 1841 1842 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1843 if (r) 1844 return r; 1845 return 0; 1846} 1847 1848static int tgsi_split_lds_inputs(struct r600_shader_ctx *ctx) 1849{ 1850 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1851 unsigned i; 1852 1853 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1854 struct tgsi_full_src_register *src = &inst->Src[i]; 1855 1856 if (ctx->type == PIPE_SHADER_TESS_EVAL && src->Register.File == TGSI_FILE_INPUT) { 1857 int treg = r600_get_temp(ctx); 1858 fetch_tes_input(ctx, src, treg); 1859 ctx->src[i].sel = treg; 1860 ctx->src[i].rel = 0; 1861 } 1862 if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_INPUT) { 1863 int treg = r600_get_temp(ctx); 1864 fetch_tcs_input(ctx, src, treg); 1865 ctx->src[i].sel = treg; 1866 ctx->src[i].rel = 0; 1867 } 1868 if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_OUTPUT) { 1869 int treg = r600_get_temp(ctx); 1870 fetch_tcs_output(ctx, src, treg); 1871 ctx->src[i].sel = treg; 1872 ctx->src[i].rel = 0; 1873 } 1874 } 1875 return 0; 1876} 1877 1878static int tgsi_split_constant(struct r600_shader_ctx *ctx) 1879{ 1880 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1881 struct r600_bytecode_alu alu; 1882 int i, j, k, nconst, r; 1883 1884 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 1885 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 1886 nconst++; 1887 } 1888 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 1889 } 1890 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 1891 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 1892 continue; 1893 } 1894 1895 if (ctx->src[i].rel) { 1896 int chan = inst->Src[i].Indirect.Swizzle; 1897 int treg = r600_get_temp(ctx); 1898 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg))) 1899 return r; 1900 1901 ctx->src[i].kc_bank = 0; 1902 ctx->src[i].kc_rel = 0; 1903 ctx->src[i].sel = treg; 1904 ctx->src[i].rel = 0; 1905 j--; 1906 } else if (j > 0) { 1907 int treg = r600_get_temp(ctx); 1908 for (k = 0; k < 4; k++) { 1909 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1910 alu.op = ALU_OP1_MOV; 1911 alu.src[0].sel = ctx->src[i].sel; 1912 alu.src[0].chan = k; 1913 alu.src[0].rel = ctx->src[i].rel; 1914 alu.src[0].kc_bank = ctx->src[i].kc_bank; 1915 alu.src[0].kc_rel = ctx->src[i].kc_rel; 1916 alu.dst.sel = treg; 1917 alu.dst.chan = k; 1918 alu.dst.write = 1; 1919 if (k == 3) 1920 alu.last = 1; 1921 r = r600_bytecode_add_alu(ctx->bc, &alu); 1922 if (r) 1923 return r; 1924 } 1925 ctx->src[i].sel = treg; 1926 ctx->src[i].rel =0; 1927 j--; 1928 } 1929 } 1930 return 0; 1931} 1932 1933/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 1934static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 1935{ 1936 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1937 struct r600_bytecode_alu alu; 1938 int i, j, k, nliteral, r; 1939 1940 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 1941 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1942 nliteral++; 1943 } 1944 } 1945 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 1946 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1947 int treg = r600_get_temp(ctx); 1948 for (k = 0; k < 4; k++) { 1949 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1950 alu.op = ALU_OP1_MOV; 1951 alu.src[0].sel = ctx->src[i].sel; 1952 alu.src[0].chan = k; 1953 alu.src[0].value = ctx->src[i].value[k]; 1954 alu.dst.sel = treg; 1955 alu.dst.chan = k; 1956 alu.dst.write = 1; 1957 if (k == 3) 1958 alu.last = 1; 1959 r = r600_bytecode_add_alu(ctx->bc, &alu); 1960 if (r) 1961 return r; 1962 } 1963 ctx->src[i].sel = treg; 1964 j--; 1965 } 1966 } 1967 return 0; 1968} 1969 1970static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 1971{ 1972 int i, r, count = ctx->shader->ninput; 1973 1974 for (i = 0; i < count; i++) { 1975 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1976 r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); 1977 if (r) 1978 return r; 1979 } 1980 } 1981 return 0; 1982} 1983 1984static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so, 1985 int stream, unsigned *stream_item_size) 1986{ 1987 unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; 1988 unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; 1989 int i, j, r; 1990 1991 /* Sanity checking. */ 1992 if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) { 1993 R600_ERR("Too many stream outputs: %d\n", so->num_outputs); 1994 r = -EINVAL; 1995 goto out_err; 1996 } 1997 for (i = 0; i < so->num_outputs; i++) { 1998 if (so->output[i].output_buffer >= 4) { 1999 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", 2000 so->output[i].output_buffer); 2001 r = -EINVAL; 2002 goto out_err; 2003 } 2004 } 2005 2006 /* Initialize locations where the outputs are stored. */ 2007 for (i = 0; i < so->num_outputs; i++) { 2008 2009 so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr; 2010 start_comp[i] = so->output[i].start_component; 2011 /* Lower outputs with dst_offset < start_component. 2012 * 2013 * We can only output 4D vectors with a write mask, e.g. we can 2014 * only output the W component at offset 3, etc. If we want 2015 * to store Y, Z, or W at buffer offset 0, we need to use MOV 2016 * to move it to X and output X. */ 2017 if (so->output[i].dst_offset < so->output[i].start_component) { 2018 unsigned tmp = r600_get_temp(ctx); 2019 2020 for (j = 0; j < so->output[i].num_components; j++) { 2021 struct r600_bytecode_alu alu; 2022 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2023 alu.op = ALU_OP1_MOV; 2024 alu.src[0].sel = so_gpr[i]; 2025 alu.src[0].chan = so->output[i].start_component + j; 2026 2027 alu.dst.sel = tmp; 2028 alu.dst.chan = j; 2029 alu.dst.write = 1; 2030 if (j == so->output[i].num_components - 1) 2031 alu.last = 1; 2032 r = r600_bytecode_add_alu(ctx->bc, &alu); 2033 if (r) 2034 return r; 2035 } 2036 start_comp[i] = 0; 2037 so_gpr[i] = tmp; 2038 } 2039 } 2040 2041 /* Write outputs to buffers. */ 2042 for (i = 0; i < so->num_outputs; i++) { 2043 struct r600_bytecode_output output; 2044 2045 if (stream != -1 && stream != so->output[i].output_buffer) 2046 continue; 2047 2048 memset(&output, 0, sizeof(struct r600_bytecode_output)); 2049 output.gpr = so_gpr[i]; 2050 output.elem_size = so->output[i].num_components - 1; 2051 if (output.elem_size == 2) 2052 output.elem_size = 3; // 3 not supported, write 4 with junk at end 2053 output.array_base = so->output[i].dst_offset - start_comp[i]; 2054 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2055 output.burst_count = 1; 2056 /* array_size is an upper limit for the burst_count 2057 * with MEM_STREAM instructions */ 2058 output.array_size = 0xFFF; 2059 output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i]; 2060 2061 if (ctx->bc->chip_class >= EVERGREEN) { 2062 switch (so->output[i].output_buffer) { 2063 case 0: 2064 output.op = CF_OP_MEM_STREAM0_BUF0; 2065 break; 2066 case 1: 2067 output.op = CF_OP_MEM_STREAM0_BUF1; 2068 break; 2069 case 2: 2070 output.op = CF_OP_MEM_STREAM0_BUF2; 2071 break; 2072 case 3: 2073 output.op = CF_OP_MEM_STREAM0_BUF3; 2074 break; 2075 } 2076 output.op += so->output[i].stream * 4; 2077 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); 2078 ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4; 2079 } else { 2080 switch (so->output[i].output_buffer) { 2081 case 0: 2082 output.op = CF_OP_MEM_STREAM0; 2083 break; 2084 case 1: 2085 output.op = CF_OP_MEM_STREAM1; 2086 break; 2087 case 2: 2088 output.op = CF_OP_MEM_STREAM2; 2089 break; 2090 case 3: 2091 output.op = CF_OP_MEM_STREAM3; 2092 break; 2093 } 2094 ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer; 2095 } 2096 r = r600_bytecode_add_output(ctx->bc, &output); 2097 if (r) 2098 goto out_err; 2099 } 2100 return 0; 2101out_err: 2102 return r; 2103} 2104 2105static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) 2106{ 2107 struct r600_bytecode_alu alu; 2108 unsigned reg; 2109 2110 if (!ctx->shader->vs_out_edgeflag) 2111 return; 2112 2113 reg = ctx->shader->output[ctx->edgeflag_output].gpr; 2114 2115 /* clamp(x, 0, 1) */ 2116 memset(&alu, 0, sizeof(alu)); 2117 alu.op = ALU_OP1_MOV; 2118 alu.src[0].sel = reg; 2119 alu.dst.sel = reg; 2120 alu.dst.write = 1; 2121 alu.dst.clamp = 1; 2122 alu.last = 1; 2123 r600_bytecode_add_alu(ctx->bc, &alu); 2124 2125 memset(&alu, 0, sizeof(alu)); 2126 alu.op = ALU_OP1_FLT_TO_INT; 2127 alu.src[0].sel = reg; 2128 alu.dst.sel = reg; 2129 alu.dst.write = 1; 2130 alu.last = 1; 2131 r600_bytecode_add_alu(ctx->bc, &alu); 2132} 2133 2134static int generate_gs_copy_shader(struct r600_context *rctx, 2135 struct r600_pipe_shader *gs, 2136 struct pipe_stream_output_info *so) 2137{ 2138 struct r600_shader_ctx ctx = {}; 2139 struct r600_shader *gs_shader = &gs->shader; 2140 struct r600_pipe_shader *cshader; 2141 int ocnt = gs_shader->noutput; 2142 struct r600_bytecode_alu alu; 2143 struct r600_bytecode_vtx vtx; 2144 struct r600_bytecode_output output; 2145 struct r600_bytecode_cf *cf_jump, *cf_pop, 2146 *last_exp_pos = NULL, *last_exp_param = NULL; 2147 int i, j, next_clip_pos = 61, next_param = 0; 2148 int ring; 2149 bool only_ring_0 = true; 2150 cshader = calloc(1, sizeof(struct r600_pipe_shader)); 2151 if (!cshader) 2152 return 0; 2153 2154 memcpy(cshader->shader.output, gs_shader->output, ocnt * 2155 sizeof(struct r600_shader_io)); 2156 2157 cshader->shader.noutput = ocnt; 2158 2159 ctx.shader = &cshader->shader; 2160 ctx.bc = &ctx.shader->bc; 2161 ctx.type = ctx.bc->type = PIPE_SHADER_VERTEX; 2162 2163 r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, 2164 rctx->screen->has_compressed_msaa_texturing); 2165 2166 ctx.bc->isa = rctx->isa; 2167 2168 cf_jump = NULL; 2169 memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes)); 2170 2171 /* R0.x = R0.x & 0x3fffffff */ 2172 memset(&alu, 0, sizeof(alu)); 2173 alu.op = ALU_OP2_AND_INT; 2174 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2175 alu.src[1].value = 0x3fffffff; 2176 alu.dst.write = 1; 2177 r600_bytecode_add_alu(ctx.bc, &alu); 2178 2179 /* R0.y = R0.x >> 30 */ 2180 memset(&alu, 0, sizeof(alu)); 2181 alu.op = ALU_OP2_LSHR_INT; 2182 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2183 alu.src[1].value = 0x1e; 2184 alu.dst.chan = 1; 2185 alu.dst.write = 1; 2186 alu.last = 1; 2187 r600_bytecode_add_alu(ctx.bc, &alu); 2188 2189 /* fetch vertex data from GSVS ring */ 2190 for (i = 0; i < ocnt; ++i) { 2191 struct r600_shader_io *out = &ctx.shader->output[i]; 2192 2193 out->gpr = i + 1; 2194 out->ring_offset = i * 16; 2195 2196 memset(&vtx, 0, sizeof(vtx)); 2197 vtx.op = FETCH_OP_VFETCH; 2198 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 2199 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2200 vtx.mega_fetch_count = 16; 2201 vtx.offset = out->ring_offset; 2202 vtx.dst_gpr = out->gpr; 2203 vtx.src_gpr = 0; 2204 vtx.dst_sel_x = 0; 2205 vtx.dst_sel_y = 1; 2206 vtx.dst_sel_z = 2; 2207 vtx.dst_sel_w = 3; 2208 if (rctx->b.chip_class >= EVERGREEN) { 2209 vtx.use_const_fields = 1; 2210 } else { 2211 vtx.data_format = FMT_32_32_32_32_FLOAT; 2212 } 2213 2214 r600_bytecode_add_vtx(ctx.bc, &vtx); 2215 } 2216 ctx.temp_reg = i + 1; 2217 for (ring = 3; ring >= 0; --ring) { 2218 bool enabled = false; 2219 for (i = 0; i < so->num_outputs; i++) { 2220 if (so->output[i].stream == ring) { 2221 enabled = true; 2222 if (ring > 0) 2223 only_ring_0 = false; 2224 break; 2225 } 2226 } 2227 if (ring != 0 && !enabled) { 2228 cshader->shader.ring_item_sizes[ring] = 0; 2229 continue; 2230 } 2231 2232 if (cf_jump) { 2233 // Patch up jump label 2234 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2235 cf_pop = ctx.bc->cf_last; 2236 2237 cf_jump->cf_addr = cf_pop->id + 2; 2238 cf_jump->pop_count = 1; 2239 cf_pop->cf_addr = cf_pop->id + 2; 2240 cf_pop->pop_count = 1; 2241 } 2242 2243 /* PRED_SETE_INT __, R0.y, ring */ 2244 memset(&alu, 0, sizeof(alu)); 2245 alu.op = ALU_OP2_PRED_SETE_INT; 2246 alu.src[0].chan = 1; 2247 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2248 alu.src[1].value = ring; 2249 alu.execute_mask = 1; 2250 alu.update_pred = 1; 2251 alu.last = 1; 2252 r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); 2253 2254 r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); 2255 cf_jump = ctx.bc->cf_last; 2256 2257 if (enabled) 2258 emit_streamout(&ctx, so, only_ring_0 ? -1 : ring, &cshader->shader.ring_item_sizes[ring]); 2259 cshader->shader.ring_item_sizes[ring] = ocnt * 16; 2260 } 2261 2262 /* bc adds nops - copy it */ 2263 if (ctx.bc->chip_class == R600) { 2264 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2265 alu.op = ALU_OP0_NOP; 2266 alu.last = 1; 2267 r600_bytecode_add_alu(ctx.bc, &alu); 2268 2269 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2270 } 2271 2272 /* export vertex data */ 2273 /* XXX factor out common code with r600_shader_from_tgsi ? */ 2274 for (i = 0; i < ocnt; ++i) { 2275 struct r600_shader_io *out = &ctx.shader->output[i]; 2276 bool instream0 = true; 2277 if (out->name == TGSI_SEMANTIC_CLIPVERTEX) 2278 continue; 2279 2280 for (j = 0; j < so->num_outputs; j++) { 2281 if (so->output[j].register_index == i) { 2282 if (so->output[j].stream == 0) 2283 break; 2284 if (so->output[j].stream > 0) 2285 instream0 = false; 2286 } 2287 } 2288 if (!instream0) 2289 continue; 2290 memset(&output, 0, sizeof(output)); 2291 output.gpr = out->gpr; 2292 output.elem_size = 3; 2293 output.swizzle_x = 0; 2294 output.swizzle_y = 1; 2295 output.swizzle_z = 2; 2296 output.swizzle_w = 3; 2297 output.burst_count = 1; 2298 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2299 output.op = CF_OP_EXPORT; 2300 switch (out->name) { 2301 case TGSI_SEMANTIC_POSITION: 2302 output.array_base = 60; 2303 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2304 break; 2305 2306 case TGSI_SEMANTIC_PSIZE: 2307 output.array_base = 61; 2308 if (next_clip_pos == 61) 2309 next_clip_pos = 62; 2310 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2311 output.swizzle_y = 7; 2312 output.swizzle_z = 7; 2313 output.swizzle_w = 7; 2314 ctx.shader->vs_out_misc_write = 1; 2315 ctx.shader->vs_out_point_size = 1; 2316 break; 2317 case TGSI_SEMANTIC_LAYER: 2318 if (out->spi_sid) { 2319 /* duplicate it as PARAM to pass to the pixel shader */ 2320 output.array_base = next_param++; 2321 r600_bytecode_add_output(ctx.bc, &output); 2322 last_exp_param = ctx.bc->cf_last; 2323 } 2324 output.array_base = 61; 2325 if (next_clip_pos == 61) 2326 next_clip_pos = 62; 2327 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2328 output.swizzle_x = 7; 2329 output.swizzle_y = 7; 2330 output.swizzle_z = 0; 2331 output.swizzle_w = 7; 2332 ctx.shader->vs_out_misc_write = 1; 2333 ctx.shader->vs_out_layer = 1; 2334 break; 2335 case TGSI_SEMANTIC_VIEWPORT_INDEX: 2336 if (out->spi_sid) { 2337 /* duplicate it as PARAM to pass to the pixel shader */ 2338 output.array_base = next_param++; 2339 r600_bytecode_add_output(ctx.bc, &output); 2340 last_exp_param = ctx.bc->cf_last; 2341 } 2342 output.array_base = 61; 2343 if (next_clip_pos == 61) 2344 next_clip_pos = 62; 2345 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2346 ctx.shader->vs_out_misc_write = 1; 2347 ctx.shader->vs_out_viewport = 1; 2348 output.swizzle_x = 7; 2349 output.swizzle_y = 7; 2350 output.swizzle_z = 7; 2351 output.swizzle_w = 0; 2352 break; 2353 case TGSI_SEMANTIC_CLIPDIST: 2354 /* spi_sid is 0 for clipdistance outputs that were generated 2355 * for clipvertex - we don't need to pass them to PS */ 2356 ctx.shader->clip_dist_write = gs->shader.clip_dist_write; 2357 if (out->spi_sid) { 2358 /* duplicate it as PARAM to pass to the pixel shader */ 2359 output.array_base = next_param++; 2360 r600_bytecode_add_output(ctx.bc, &output); 2361 last_exp_param = ctx.bc->cf_last; 2362 } 2363 output.array_base = next_clip_pos++; 2364 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2365 break; 2366 case TGSI_SEMANTIC_FOG: 2367 output.swizzle_y = 4; /* 0 */ 2368 output.swizzle_z = 4; /* 0 */ 2369 output.swizzle_w = 5; /* 1 */ 2370 break; 2371 default: 2372 output.array_base = next_param++; 2373 break; 2374 } 2375 r600_bytecode_add_output(ctx.bc, &output); 2376 if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) 2377 last_exp_param = ctx.bc->cf_last; 2378 else 2379 last_exp_pos = ctx.bc->cf_last; 2380 } 2381 2382 if (!last_exp_pos) { 2383 memset(&output, 0, sizeof(output)); 2384 output.gpr = 0; 2385 output.elem_size = 3; 2386 output.swizzle_x = 7; 2387 output.swizzle_y = 7; 2388 output.swizzle_z = 7; 2389 output.swizzle_w = 7; 2390 output.burst_count = 1; 2391 output.type = 2; 2392 output.op = CF_OP_EXPORT; 2393 output.array_base = 60; 2394 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2395 r600_bytecode_add_output(ctx.bc, &output); 2396 last_exp_pos = ctx.bc->cf_last; 2397 } 2398 2399 if (!last_exp_param) { 2400 memset(&output, 0, sizeof(output)); 2401 output.gpr = 0; 2402 output.elem_size = 3; 2403 output.swizzle_x = 7; 2404 output.swizzle_y = 7; 2405 output.swizzle_z = 7; 2406 output.swizzle_w = 7; 2407 output.burst_count = 1; 2408 output.type = 2; 2409 output.op = CF_OP_EXPORT; 2410 output.array_base = next_param++; 2411 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2412 r600_bytecode_add_output(ctx.bc, &output); 2413 last_exp_param = ctx.bc->cf_last; 2414 } 2415 2416 last_exp_pos->op = CF_OP_EXPORT_DONE; 2417 last_exp_param->op = CF_OP_EXPORT_DONE; 2418 2419 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2420 cf_pop = ctx.bc->cf_last; 2421 2422 cf_jump->cf_addr = cf_pop->id + 2; 2423 cf_jump->pop_count = 1; 2424 cf_pop->cf_addr = cf_pop->id + 2; 2425 cf_pop->pop_count = 1; 2426 2427 if (ctx.bc->chip_class == CAYMAN) 2428 cm_bytecode_add_cf_end(ctx.bc); 2429 else { 2430 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2431 ctx.bc->cf_last->end_of_program = 1; 2432 } 2433 2434 gs->gs_copy_shader = cshader; 2435 cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 2436 2437 ctx.bc->nstack = 1; 2438 2439 return r600_bytecode_build(ctx.bc); 2440} 2441 2442static int emit_inc_ring_offset(struct r600_shader_ctx *ctx, int idx, bool ind) 2443{ 2444 if (ind) { 2445 struct r600_bytecode_alu alu; 2446 int r; 2447 2448 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2449 alu.op = ALU_OP2_ADD_INT; 2450 alu.src[0].sel = ctx->gs_export_gpr_tregs[idx]; 2451 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2452 alu.src[1].value = ctx->gs_out_ring_offset >> 4; 2453 alu.dst.sel = ctx->gs_export_gpr_tregs[idx]; 2454 alu.dst.write = 1; 2455 alu.last = 1; 2456 r = r600_bytecode_add_alu(ctx->bc, &alu); 2457 if (r) 2458 return r; 2459 } 2460 return 0; 2461} 2462 2463static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind) 2464{ 2465 struct r600_bytecode_output output; 2466 int i, k, ring_offset; 2467 int effective_stream = stream == -1 ? 0 : stream; 2468 int idx = 0; 2469 2470 for (i = 0; i < ctx->shader->noutput; i++) { 2471 if (ctx->gs_for_vs) { 2472 /* for ES we need to lookup corresponding ring offset expected by GS 2473 * (map this output to GS input by name and sid) */ 2474 /* FIXME precompute offsets */ 2475 ring_offset = -1; 2476 for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { 2477 struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; 2478 struct r600_shader_io *out = &ctx->shader->output[i]; 2479 if (in->name == out->name && in->sid == out->sid) 2480 ring_offset = in->ring_offset; 2481 } 2482 2483 if (ring_offset == -1) 2484 continue; 2485 } else { 2486 ring_offset = idx * 16; 2487 idx++; 2488 } 2489 2490 if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) 2491 continue; 2492 /* next_ring_offset after parsing input decls contains total size of 2493 * single vertex data, gs_next_vertex - current vertex index */ 2494 if (!ind) 2495 ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex; 2496 2497 memset(&output, 0, sizeof(struct r600_bytecode_output)); 2498 output.gpr = ctx->shader->output[i].gpr; 2499 output.elem_size = 3; 2500 output.comp_mask = 0xF; 2501 output.burst_count = 1; 2502 2503 if (ind) 2504 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 2505 else 2506 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2507 2508 switch (stream) { 2509 default: 2510 case 0: 2511 output.op = CF_OP_MEM_RING; break; 2512 case 1: 2513 output.op = CF_OP_MEM_RING1; break; 2514 case 2: 2515 output.op = CF_OP_MEM_RING2; break; 2516 case 3: 2517 output.op = CF_OP_MEM_RING3; break; 2518 } 2519 2520 if (ind) { 2521 output.array_base = ring_offset >> 2; /* in dwords */ 2522 output.array_size = 0xfff; 2523 output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream]; 2524 } else 2525 output.array_base = ring_offset >> 2; /* in dwords */ 2526 r600_bytecode_add_output(ctx->bc, &output); 2527 } 2528 2529 ++ctx->gs_next_vertex; 2530 return 0; 2531} 2532 2533 2534static int r600_fetch_tess_io_info(struct r600_shader_ctx *ctx) 2535{ 2536 int r; 2537 struct r600_bytecode_vtx vtx; 2538 int temp_val = ctx->temp_reg; 2539 /* need to store the TCS output somewhere */ 2540 r = single_alu_op2(ctx, ALU_OP1_MOV, 2541 temp_val, 0, 2542 V_SQ_ALU_SRC_LITERAL, 0, 2543 0, 0); 2544 if (r) 2545 return r; 2546 2547 /* used by VS/TCS */ 2548 if (ctx->tess_input_info) { 2549 /* fetch tcs input values into resv space */ 2550 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2551 vtx.op = FETCH_OP_VFETCH; 2552 vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2553 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2554 vtx.mega_fetch_count = 16; 2555 vtx.data_format = FMT_32_32_32_32; 2556 vtx.num_format_all = 2; 2557 vtx.format_comp_all = 1; 2558 vtx.use_const_fields = 0; 2559 vtx.endian = r600_endian_swap(32); 2560 vtx.srf_mode_all = 1; 2561 vtx.offset = 0; 2562 vtx.dst_gpr = ctx->tess_input_info; 2563 vtx.dst_sel_x = 0; 2564 vtx.dst_sel_y = 1; 2565 vtx.dst_sel_z = 2; 2566 vtx.dst_sel_w = 3; 2567 vtx.src_gpr = temp_val; 2568 vtx.src_sel_x = 0; 2569 2570 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2571 if (r) 2572 return r; 2573 } 2574 2575 /* used by TCS/TES */ 2576 if (ctx->tess_output_info) { 2577 /* fetch tcs output values into resv space */ 2578 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2579 vtx.op = FETCH_OP_VFETCH; 2580 vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2581 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2582 vtx.mega_fetch_count = 16; 2583 vtx.data_format = FMT_32_32_32_32; 2584 vtx.num_format_all = 2; 2585 vtx.format_comp_all = 1; 2586 vtx.use_const_fields = 0; 2587 vtx.endian = r600_endian_swap(32); 2588 vtx.srf_mode_all = 1; 2589 vtx.offset = 16; 2590 vtx.dst_gpr = ctx->tess_output_info; 2591 vtx.dst_sel_x = 0; 2592 vtx.dst_sel_y = 1; 2593 vtx.dst_sel_z = 2; 2594 vtx.dst_sel_w = 3; 2595 vtx.src_gpr = temp_val; 2596 vtx.src_sel_x = 0; 2597 2598 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2599 if (r) 2600 return r; 2601 } 2602 return 0; 2603} 2604 2605static int emit_lds_vs_writes(struct r600_shader_ctx *ctx) 2606{ 2607 int i, j, r; 2608 int temp_reg; 2609 2610 /* fetch tcs input values into input_vals */ 2611 ctx->tess_input_info = r600_get_temp(ctx); 2612 ctx->tess_output_info = 0; 2613 r = r600_fetch_tess_io_info(ctx); 2614 if (r) 2615 return r; 2616 2617 temp_reg = r600_get_temp(ctx); 2618 /* dst reg contains LDS address stride * idx */ 2619 /* MUL vertexID, vertex_dw_stride */ 2620 r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 2621 temp_reg, 0, 2622 ctx->tess_input_info, 1, 2623 0, 1); /* rel id in r0.y? */ 2624 if (r) 2625 return r; 2626 2627 for (i = 0; i < ctx->shader->noutput; i++) { 2628 struct r600_bytecode_alu alu; 2629 int param = r600_get_lds_unique_index(ctx->shader->output[i].name, ctx->shader->output[i].sid); 2630 2631 if (param) { 2632 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2633 temp_reg, 1, 2634 temp_reg, 0, 2635 V_SQ_ALU_SRC_LITERAL, param * 16); 2636 if (r) 2637 return r; 2638 } 2639 2640 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2641 temp_reg, 2, 2642 temp_reg, param ? 1 : 0, 2643 V_SQ_ALU_SRC_LITERAL, 8); 2644 if (r) 2645 return r; 2646 2647 2648 for (j = 0; j < 2; j++) { 2649 int chan = (j == 1) ? 2 : (param ? 1 : 0); 2650 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2651 alu.op = LDS_OP3_LDS_WRITE_REL; 2652 alu.src[0].sel = temp_reg; 2653 alu.src[0].chan = chan; 2654 alu.src[1].sel = ctx->shader->output[i].gpr; 2655 alu.src[1].chan = j * 2; 2656 alu.src[2].sel = ctx->shader->output[i].gpr; 2657 alu.src[2].chan = (j * 2) + 1; 2658 alu.last = 1; 2659 alu.dst.chan = 0; 2660 alu.lds_idx = 1; 2661 alu.is_lds_idx_op = true; 2662 r = r600_bytecode_add_alu(ctx->bc, &alu); 2663 if (r) 2664 return r; 2665 } 2666 } 2667 return 0; 2668} 2669 2670static int r600_store_tcs_output(struct r600_shader_ctx *ctx) 2671{ 2672 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2673 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 2674 int i, r, lasti; 2675 int temp_reg = r600_get_temp(ctx); 2676 struct r600_bytecode_alu alu; 2677 unsigned write_mask = dst->Register.WriteMask; 2678 2679 if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT) 2680 return 0; 2681 2682 r = get_lds_offset0(ctx, 1, temp_reg, dst->Register.Dimension ? false : true); 2683 if (r) 2684 return r; 2685 2686 /* the base address is now in temp.x */ 2687 r = r600_get_byte_address(ctx, temp_reg, 2688 &inst->Dst[0], NULL, ctx->tess_output_info, 1); 2689 if (r) 2690 return r; 2691 2692 /* LDS write */ 2693 lasti = tgsi_last_instruction(write_mask); 2694 for (i = 1; i <= lasti; i++) { 2695 2696 if (!(write_mask & (1 << i))) 2697 continue; 2698 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2699 temp_reg, i, 2700 temp_reg, 0, 2701 V_SQ_ALU_SRC_LITERAL, 4 * i); 2702 if (r) 2703 return r; 2704 } 2705 2706 for (i = 0; i <= lasti; i++) { 2707 if (!(write_mask & (1 << i))) 2708 continue; 2709 2710 if ((i == 0 && ((write_mask & 3) == 3)) || 2711 (i == 2 && ((write_mask & 0xc) == 0xc))) { 2712 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2713 alu.op = LDS_OP3_LDS_WRITE_REL; 2714 alu.src[0].sel = temp_reg; 2715 alu.src[0].chan = i; 2716 2717 alu.src[1].sel = dst->Register.Index; 2718 alu.src[1].sel += ctx->file_offset[dst->Register.File]; 2719 alu.src[1].chan = i; 2720 2721 alu.src[2].sel = dst->Register.Index; 2722 alu.src[2].sel += ctx->file_offset[dst->Register.File]; 2723 alu.src[2].chan = i + 1; 2724 alu.lds_idx = 1; 2725 alu.dst.chan = 0; 2726 alu.last = 1; 2727 alu.is_lds_idx_op = true; 2728 r = r600_bytecode_add_alu(ctx->bc, &alu); 2729 if (r) 2730 return r; 2731 i += 1; 2732 continue; 2733 } 2734 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2735 alu.op = LDS_OP2_LDS_WRITE; 2736 alu.src[0].sel = temp_reg; 2737 alu.src[0].chan = i; 2738 2739 alu.src[1].sel = dst->Register.Index; 2740 alu.src[1].sel += ctx->file_offset[dst->Register.File]; 2741 alu.src[1].chan = i; 2742 2743 alu.src[2].sel = V_SQ_ALU_SRC_0; 2744 alu.dst.chan = 0; 2745 alu.last = 1; 2746 alu.is_lds_idx_op = true; 2747 r = r600_bytecode_add_alu(ctx->bc, &alu); 2748 if (r) 2749 return r; 2750 } 2751 return 0; 2752} 2753 2754static int r600_tess_factor_read(struct r600_shader_ctx *ctx, 2755 int output_idx) 2756{ 2757 int param; 2758 unsigned temp_reg = r600_get_temp(ctx); 2759 unsigned name = ctx->shader->output[output_idx].name; 2760 int dreg = ctx->shader->output[output_idx].gpr; 2761 int r; 2762 2763 param = r600_get_lds_unique_index(name, 0); 2764 r = get_lds_offset0(ctx, 1, temp_reg, true); 2765 if (r) 2766 return r; 2767 2768 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2769 temp_reg, 0, 2770 temp_reg, 0, 2771 V_SQ_ALU_SRC_LITERAL, param * 16); 2772 if (r) 2773 return r; 2774 2775 do_lds_fetch_values(ctx, temp_reg, dreg); 2776 return 0; 2777} 2778 2779static int r600_emit_tess_factor(struct r600_shader_ctx *ctx) 2780{ 2781 unsigned i; 2782 int stride, outer_comps, inner_comps; 2783 int tessinner_idx = -1, tessouter_idx = -1; 2784 int r; 2785 int temp_reg = r600_get_temp(ctx); 2786 int treg[3] = {-1, -1, -1}; 2787 struct r600_bytecode_alu alu; 2788 struct r600_bytecode_cf *cf_jump, *cf_pop; 2789 2790 /* only execute factor emission for invocation 0 */ 2791 /* PRED_SETE_INT __, R0.x, 0 */ 2792 memset(&alu, 0, sizeof(alu)); 2793 alu.op = ALU_OP2_PRED_SETE_INT; 2794 alu.src[0].chan = 2; 2795 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2796 alu.execute_mask = 1; 2797 alu.update_pred = 1; 2798 alu.last = 1; 2799 r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE); 2800 2801 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 2802 cf_jump = ctx->bc->cf_last; 2803 2804 treg[0] = r600_get_temp(ctx); 2805 switch (ctx->shader->tcs_prim_mode) { 2806 case PIPE_PRIM_LINES: 2807 stride = 8; /* 2 dwords, 1 vec2 store */ 2808 outer_comps = 2; 2809 inner_comps = 0; 2810 break; 2811 case PIPE_PRIM_TRIANGLES: 2812 stride = 16; /* 4 dwords, 1 vec4 store */ 2813 outer_comps = 3; 2814 inner_comps = 1; 2815 treg[1] = r600_get_temp(ctx); 2816 break; 2817 case PIPE_PRIM_QUADS: 2818 stride = 24; /* 6 dwords, 2 stores (vec4 + vec2) */ 2819 outer_comps = 4; 2820 inner_comps = 2; 2821 treg[1] = r600_get_temp(ctx); 2822 treg[2] = r600_get_temp(ctx); 2823 break; 2824 default: 2825 assert(0); 2826 return -1; 2827 } 2828 2829 /* R0 is InvocationID, RelPatchID, PatchID, tf_base */ 2830 /* TF_WRITE takes index in R.x, value in R.y */ 2831 for (i = 0; i < ctx->shader->noutput; i++) { 2832 if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSINNER) 2833 tessinner_idx = i; 2834 if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSOUTER) 2835 tessouter_idx = i; 2836 } 2837 2838 if (tessouter_idx == -1) 2839 return -1; 2840 2841 if (tessinner_idx == -1 && inner_comps) 2842 return -1; 2843 2844 if (tessouter_idx != -1) { 2845 r = r600_tess_factor_read(ctx, tessouter_idx); 2846 if (r) 2847 return r; 2848 } 2849 2850 if (tessinner_idx != -1) { 2851 r = r600_tess_factor_read(ctx, tessinner_idx); 2852 if (r) 2853 return r; 2854 } 2855 2856 /* r.x = tf_base(r0.w) + relpatchid(r0.y) * tf_stride */ 2857 /* r.x = relpatchid(r0.y) * tf_stride */ 2858 2859 /* multiply incoming r0.y * stride - t.x = r0.y * stride */ 2860 /* add incoming r0.w to it: t.x = t.x + r0.w */ 2861 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 2862 temp_reg, 0, 2863 0, 1, 2864 V_SQ_ALU_SRC_LITERAL, stride, 2865 0, 3); 2866 if (r) 2867 return r; 2868 2869 for (i = 0; i < outer_comps + inner_comps; i++) { 2870 int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx; 2871 int out_comp = i >= outer_comps ? i - outer_comps : i; 2872 2873 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2874 treg[i / 2], (2 * (i % 2)), 2875 temp_reg, 0, 2876 V_SQ_ALU_SRC_LITERAL, 4 * i); 2877 if (r) 2878 return r; 2879 r = single_alu_op2(ctx, ALU_OP1_MOV, 2880 treg[i / 2], 1 + (2 * (i%2)), 2881 ctx->shader->output[out_idx].gpr, out_comp, 2882 0, 0); 2883 if (r) 2884 return r; 2885 } 2886 for (i = 0; i < outer_comps + inner_comps; i++) { 2887 struct r600_bytecode_gds gds; 2888 2889 memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 2890 gds.src_gpr = treg[i / 2]; 2891 gds.src_sel_x = 2 * (i % 2); 2892 gds.src_sel_y = 1 + (2 * (i % 2)); 2893 gds.src_sel_z = 4; 2894 gds.dst_sel_x = 7; 2895 gds.dst_sel_y = 7; 2896 gds.dst_sel_z = 7; 2897 gds.dst_sel_w = 7; 2898 gds.op = FETCH_OP_TF_WRITE; 2899 r = r600_bytecode_add_gds(ctx->bc, &gds); 2900 if (r) 2901 return r; 2902 } 2903 2904 // Patch up jump label 2905 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 2906 cf_pop = ctx->bc->cf_last; 2907 2908 cf_jump->cf_addr = cf_pop->id + 2; 2909 cf_jump->pop_count = 1; 2910 cf_pop->cf_addr = cf_pop->id + 2; 2911 cf_pop->pop_count = 1; 2912 2913 return 0; 2914} 2915 2916static int r600_shader_from_tgsi(struct r600_context *rctx, 2917 struct r600_pipe_shader *pipeshader, 2918 union r600_shader_key key) 2919{ 2920 struct r600_screen *rscreen = rctx->screen; 2921 struct r600_shader *shader = &pipeshader->shader; 2922 struct tgsi_token *tokens = pipeshader->selector->tokens; 2923 struct pipe_stream_output_info so = pipeshader->selector->so; 2924 struct tgsi_full_immediate *immediate; 2925 struct r600_shader_ctx ctx; 2926 struct r600_bytecode_output output[32]; 2927 unsigned output_done, noutput; 2928 unsigned opcode; 2929 int i, j, k, r = 0; 2930 int next_param_base = 0, next_clip_base; 2931 int max_color_exports = MAX2(key.ps.nr_cbufs, 1); 2932 bool indirect_gprs; 2933 bool ring_outputs = false; 2934 bool lds_outputs = false; 2935 bool lds_inputs = false; 2936 bool pos_emitted = false; 2937 2938 ctx.bc = &shader->bc; 2939 ctx.shader = shader; 2940 ctx.native_integers = true; 2941 2942 r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, 2943 rscreen->has_compressed_msaa_texturing); 2944 ctx.tokens = tokens; 2945 tgsi_scan_shader(tokens, &ctx.info); 2946 shader->indirect_files = ctx.info.indirect_files; 2947 2948 shader->uses_doubles = ctx.info.uses_doubles; 2949 2950 indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER)); 2951 tgsi_parse_init(&ctx.parse, tokens); 2952 ctx.type = ctx.info.processor; 2953 shader->processor_type = ctx.type; 2954 ctx.bc->type = shader->processor_type; 2955 2956 switch (ctx.type) { 2957 case PIPE_SHADER_VERTEX: 2958 shader->vs_as_gs_a = key.vs.as_gs_a; 2959 shader->vs_as_es = key.vs.as_es; 2960 shader->vs_as_ls = key.vs.as_ls; 2961 if (shader->vs_as_es) 2962 ring_outputs = true; 2963 if (shader->vs_as_ls) 2964 lds_outputs = true; 2965 break; 2966 case PIPE_SHADER_GEOMETRY: 2967 ring_outputs = true; 2968 break; 2969 case PIPE_SHADER_TESS_CTRL: 2970 shader->tcs_prim_mode = key.tcs.prim_mode; 2971 lds_outputs = true; 2972 lds_inputs = true; 2973 break; 2974 case PIPE_SHADER_TESS_EVAL: 2975 shader->tes_as_es = key.tes.as_es; 2976 lds_inputs = true; 2977 if (shader->tes_as_es) 2978 ring_outputs = true; 2979 break; 2980 case PIPE_SHADER_FRAGMENT: 2981 shader->two_side = key.ps.color_two_side; 2982 break; 2983 default: 2984 break; 2985 } 2986 2987 if (shader->vs_as_es || shader->tes_as_es) { 2988 ctx.gs_for_vs = &rctx->gs_shader->current->shader; 2989 } else { 2990 ctx.gs_for_vs = NULL; 2991 } 2992 2993 ctx.next_ring_offset = 0; 2994 ctx.gs_out_ring_offset = 0; 2995 ctx.gs_next_vertex = 0; 2996 ctx.gs_stream_output_info = &so; 2997 2998 ctx.face_gpr = -1; 2999 ctx.fixed_pt_position_gpr = -1; 3000 ctx.fragcoord_input = -1; 3001 ctx.colors_used = 0; 3002 ctx.clip_vertex_write = 0; 3003 3004 shader->nr_ps_color_exports = 0; 3005 shader->nr_ps_max_color_exports = 0; 3006 3007 3008 /* register allocations */ 3009 /* Values [0,127] correspond to GPR[0..127]. 3010 * Values [128,159] correspond to constant buffer bank 0 3011 * Values [160,191] correspond to constant buffer bank 1 3012 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 3013 * Values [256,287] correspond to constant buffer bank 2 (EG) 3014 * Values [288,319] correspond to constant buffer bank 3 (EG) 3015 * Other special values are shown in the list below. 3016 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 3017 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 3018 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 3019 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 3020 * 248 SQ_ALU_SRC_0: special constant 0.0. 3021 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 3022 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 3023 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 3024 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 3025 * 253 SQ_ALU_SRC_LITERAL: literal constant. 3026 * 254 SQ_ALU_SRC_PV: previous vector result. 3027 * 255 SQ_ALU_SRC_PS: previous scalar result. 3028 */ 3029 for (i = 0; i < TGSI_FILE_COUNT; i++) { 3030 ctx.file_offset[i] = 0; 3031 } 3032 3033 if (ctx.type == PIPE_SHADER_VERTEX) { 3034 ctx.file_offset[TGSI_FILE_INPUT] = 1; 3035 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); 3036 } 3037 if (ctx.type == PIPE_SHADER_FRAGMENT) { 3038 if (ctx.bc->chip_class >= EVERGREEN) 3039 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 3040 else 3041 ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); 3042 } 3043 if (ctx.type == PIPE_SHADER_GEOMETRY) { 3044 /* FIXME 1 would be enough in some cases (3 or less input vertices) */ 3045 ctx.file_offset[TGSI_FILE_INPUT] = 2; 3046 } 3047 if (ctx.type == PIPE_SHADER_TESS_CTRL) 3048 ctx.file_offset[TGSI_FILE_INPUT] = 1; 3049 if (ctx.type == PIPE_SHADER_TESS_EVAL) { 3050 bool add_tesscoord = false, add_tess_inout = false; 3051 ctx.file_offset[TGSI_FILE_INPUT] = 1; 3052 for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) { 3053 /* if we have tesscoord save one reg */ 3054 if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSCOORD) 3055 add_tesscoord = true; 3056 if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSINNER || 3057 ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSOUTER) 3058 add_tess_inout = true; 3059 } 3060 if (add_tesscoord || add_tess_inout) 3061 ctx.file_offset[TGSI_FILE_INPUT]++; 3062 if (add_tess_inout) 3063 ctx.file_offset[TGSI_FILE_INPUT]+=2; 3064 } 3065 3066 ctx.file_offset[TGSI_FILE_OUTPUT] = 3067 ctx.file_offset[TGSI_FILE_INPUT] + 3068 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 3069 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 3070 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 3071 3072 /* Outside the GPR range. This will be translated to one of the 3073 * kcache banks later. */ 3074 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 3075 3076 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 3077 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 3078 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 3079 ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1; 3080 ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2; 3081 3082 if (ctx.type == PIPE_SHADER_TESS_CTRL) { 3083 ctx.tess_input_info = ctx.bc->ar_reg + 3; 3084 ctx.tess_output_info = ctx.bc->ar_reg + 4; 3085 ctx.temp_reg = ctx.bc->ar_reg + 5; 3086 } else if (ctx.type == PIPE_SHADER_TESS_EVAL) { 3087 ctx.tess_input_info = 0; 3088 ctx.tess_output_info = ctx.bc->ar_reg + 3; 3089 ctx.temp_reg = ctx.bc->ar_reg + 4; 3090 } else if (ctx.type == PIPE_SHADER_GEOMETRY) { 3091 ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3; 3092 ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4; 3093 ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5; 3094 ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6; 3095 ctx.temp_reg = ctx.bc->ar_reg + 7; 3096 } else { 3097 ctx.temp_reg = ctx.bc->ar_reg + 3; 3098 } 3099 3100 shader->max_arrays = 0; 3101 shader->num_arrays = 0; 3102 if (indirect_gprs) { 3103 3104 if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { 3105 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], 3106 ctx.file_offset[TGSI_FILE_OUTPUT] - 3107 ctx.file_offset[TGSI_FILE_INPUT], 3108 0x0F); 3109 } 3110 if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 3111 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT], 3112 ctx.file_offset[TGSI_FILE_TEMPORARY] - 3113 ctx.file_offset[TGSI_FILE_OUTPUT], 3114 0x0F); 3115 } 3116 } 3117 3118 ctx.nliterals = 0; 3119 ctx.literals = NULL; 3120 3121 shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && 3122 ctx.info.colors_written == 1; 3123 shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 3124 shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]; 3125 3126 if (shader->vs_as_gs_a) 3127 vs_add_primid_output(&ctx, key.vs.prim_id_out); 3128 3129 if (ctx.type == PIPE_SHADER_TESS_EVAL) 3130 r600_fetch_tess_io_info(&ctx); 3131 3132 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 3133 tgsi_parse_token(&ctx.parse); 3134 switch (ctx.parse.FullToken.Token.Type) { 3135 case TGSI_TOKEN_TYPE_IMMEDIATE: 3136 immediate = &ctx.parse.FullToken.FullImmediate; 3137 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 3138 if(ctx.literals == NULL) { 3139 r = -ENOMEM; 3140 goto out_err; 3141 } 3142 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 3143 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 3144 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 3145 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 3146 ctx.nliterals++; 3147 break; 3148 case TGSI_TOKEN_TYPE_DECLARATION: 3149 r = tgsi_declaration(&ctx); 3150 if (r) 3151 goto out_err; 3152 break; 3153 case TGSI_TOKEN_TYPE_INSTRUCTION: 3154 case TGSI_TOKEN_TYPE_PROPERTY: 3155 break; 3156 default: 3157 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 3158 r = -EINVAL; 3159 goto out_err; 3160 } 3161 } 3162 3163 shader->ring_item_sizes[0] = ctx.next_ring_offset; 3164 shader->ring_item_sizes[1] = 0; 3165 shader->ring_item_sizes[2] = 0; 3166 shader->ring_item_sizes[3] = 0; 3167 3168 /* Process two side if needed */ 3169 if (shader->two_side && ctx.colors_used) { 3170 int i, count = ctx.shader->ninput; 3171 unsigned next_lds_loc = ctx.shader->nlds; 3172 3173 /* additional inputs will be allocated right after the existing inputs, 3174 * we won't need them after the color selection, so we don't need to 3175 * reserve these gprs for the rest of the shader code and to adjust 3176 * output offsets etc. */ 3177 int gpr = ctx.file_offset[TGSI_FILE_INPUT] + 3178 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 3179 3180 /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */ 3181 if (ctx.face_gpr == -1) { 3182 i = ctx.shader->ninput++; 3183 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; 3184 ctx.shader->input[i].spi_sid = 0; 3185 ctx.shader->input[i].gpr = gpr++; 3186 ctx.face_gpr = ctx.shader->input[i].gpr; 3187 } 3188 3189 for (i = 0; i < count; i++) { 3190 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { 3191 int ni = ctx.shader->ninput++; 3192 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); 3193 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 3194 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); 3195 ctx.shader->input[ni].gpr = gpr++; 3196 // TGSI to LLVM needs to know the lds position of inputs. 3197 // Non LLVM path computes it later (in process_twoside_color) 3198 ctx.shader->input[ni].lds_pos = next_lds_loc++; 3199 ctx.shader->input[i].back_color_input = ni; 3200 if (ctx.bc->chip_class >= EVERGREEN) { 3201 if ((r = evergreen_interp_input(&ctx, ni))) 3202 return r; 3203 } 3204 } 3205 } 3206 } 3207 3208 if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) 3209 shader->nr_ps_max_color_exports = 8; 3210 3211 if (ctx.fragcoord_input >= 0) { 3212 if (ctx.bc->chip_class == CAYMAN) { 3213 for (j = 0 ; j < 4; j++) { 3214 struct r600_bytecode_alu alu; 3215 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3216 alu.op = ALU_OP1_RECIP_IEEE; 3217 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3218 alu.src[0].chan = 3; 3219 3220 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 3221 alu.dst.chan = j; 3222 alu.dst.write = (j == 3); 3223 alu.last = 1; 3224 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3225 return r; 3226 } 3227 } else { 3228 struct r600_bytecode_alu alu; 3229 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3230 alu.op = ALU_OP1_RECIP_IEEE; 3231 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3232 alu.src[0].chan = 3; 3233 3234 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 3235 alu.dst.chan = 3; 3236 alu.dst.write = 1; 3237 alu.last = 1; 3238 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3239 return r; 3240 } 3241 } 3242 3243 if (ctx.type == PIPE_SHADER_GEOMETRY) { 3244 struct r600_bytecode_alu alu; 3245 int r; 3246 3247 /* GS thread with no output workaround - emit a cut at start of GS */ 3248 if (ctx.bc->chip_class == R600) 3249 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); 3250 3251 for (j = 0; j < 4; j++) { 3252 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3253 alu.op = ALU_OP1_MOV; 3254 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 3255 alu.src[0].value = 0; 3256 alu.dst.sel = ctx.gs_export_gpr_tregs[j]; 3257 alu.dst.write = 1; 3258 alu.last = 1; 3259 r = r600_bytecode_add_alu(ctx.bc, &alu); 3260 if (r) 3261 return r; 3262 } 3263 } 3264 3265 if (ctx.type == PIPE_SHADER_TESS_CTRL) 3266 r600_fetch_tess_io_info(&ctx); 3267 3268 if (shader->two_side && ctx.colors_used) { 3269 if ((r = process_twoside_color_inputs(&ctx))) 3270 return r; 3271 } 3272 3273 tgsi_parse_init(&ctx.parse, tokens); 3274 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 3275 tgsi_parse_token(&ctx.parse); 3276 switch (ctx.parse.FullToken.Token.Type) { 3277 case TGSI_TOKEN_TYPE_INSTRUCTION: 3278 r = tgsi_is_supported(&ctx); 3279 if (r) 3280 goto out_err; 3281 ctx.max_driver_temp_used = 0; 3282 /* reserve first tmp for everyone */ 3283 r600_get_temp(&ctx); 3284 3285 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 3286 if ((r = tgsi_split_constant(&ctx))) 3287 goto out_err; 3288 if ((r = tgsi_split_literal_constant(&ctx))) 3289 goto out_err; 3290 if (ctx.type == PIPE_SHADER_GEOMETRY) { 3291 if ((r = tgsi_split_gs_inputs(&ctx))) 3292 goto out_err; 3293 } else if (lds_inputs) { 3294 if ((r = tgsi_split_lds_inputs(&ctx))) 3295 goto out_err; 3296 } 3297 if (ctx.bc->chip_class == CAYMAN) 3298 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 3299 else if (ctx.bc->chip_class >= EVERGREEN) 3300 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 3301 else 3302 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 3303 r = ctx.inst_info->process(&ctx); 3304 if (r) 3305 goto out_err; 3306 3307 if (ctx.type == PIPE_SHADER_TESS_CTRL) { 3308 r = r600_store_tcs_output(&ctx); 3309 if (r) 3310 goto out_err; 3311 } 3312 break; 3313 default: 3314 break; 3315 } 3316 } 3317 3318 /* Reset the temporary register counter. */ 3319 ctx.max_driver_temp_used = 0; 3320 3321 noutput = shader->noutput; 3322 3323 if (!ring_outputs && ctx.clip_vertex_write) { 3324 unsigned clipdist_temp[2]; 3325 3326 clipdist_temp[0] = r600_get_temp(&ctx); 3327 clipdist_temp[1] = r600_get_temp(&ctx); 3328 3329 /* need to convert a clipvertex write into clipdistance writes and not export 3330 the clip vertex anymore */ 3331 3332 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 3333 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 3334 shader->output[noutput].gpr = clipdist_temp[0]; 3335 noutput++; 3336 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 3337 shader->output[noutput].gpr = clipdist_temp[1]; 3338 noutput++; 3339 3340 /* reset spi_sid for clipvertex output to avoid confusing spi */ 3341 shader->output[ctx.cv_output].spi_sid = 0; 3342 3343 shader->clip_dist_write = 0xFF; 3344 3345 for (i = 0; i < 8; i++) { 3346 int oreg = i >> 2; 3347 int ochan = i & 3; 3348 3349 for (j = 0; j < 4; j++) { 3350 struct r600_bytecode_alu alu; 3351 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3352 alu.op = ALU_OP2_DOT4; 3353 alu.src[0].sel = shader->output[ctx.cv_output].gpr; 3354 alu.src[0].chan = j; 3355 3356 alu.src[1].sel = 512 + i; 3357 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 3358 alu.src[1].chan = j; 3359 3360 alu.dst.sel = clipdist_temp[oreg]; 3361 alu.dst.chan = j; 3362 alu.dst.write = (j == ochan); 3363 if (j == 3) 3364 alu.last = 1; 3365 r = r600_bytecode_add_alu(ctx.bc, &alu); 3366 if (r) 3367 return r; 3368 } 3369 } 3370 } 3371 3372 /* Add stream outputs. */ 3373 if (so.num_outputs) { 3374 bool emit = false; 3375 if (!lds_outputs && !ring_outputs && ctx.type == PIPE_SHADER_VERTEX) 3376 emit = true; 3377 if (!ring_outputs && ctx.type == PIPE_SHADER_TESS_EVAL) 3378 emit = true; 3379 if (emit) 3380 emit_streamout(&ctx, &so, -1, NULL); 3381 } 3382 pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 3383 convert_edgeflag_to_int(&ctx); 3384 3385 if (ctx.type == PIPE_SHADER_TESS_CTRL) 3386 r600_emit_tess_factor(&ctx); 3387 3388 if (lds_outputs) { 3389 if (ctx.type == PIPE_SHADER_VERTEX) { 3390 if (ctx.shader->noutput) 3391 emit_lds_vs_writes(&ctx); 3392 } 3393 } else if (ring_outputs) { 3394 if (shader->vs_as_es || shader->tes_as_es) { 3395 ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx); 3396 ctx.gs_export_gpr_tregs[1] = -1; 3397 ctx.gs_export_gpr_tregs[2] = -1; 3398 ctx.gs_export_gpr_tregs[3] = -1; 3399 3400 emit_gs_ring_writes(&ctx, &so, -1, FALSE); 3401 } 3402 } else { 3403 /* Export output */ 3404 next_clip_base = shader->vs_out_misc_write ? 62 : 61; 3405 3406 for (i = 0, j = 0; i < noutput; i++, j++) { 3407 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3408 output[j].gpr = shader->output[i].gpr; 3409 output[j].elem_size = 3; 3410 output[j].swizzle_x = 0; 3411 output[j].swizzle_y = 1; 3412 output[j].swizzle_z = 2; 3413 output[j].swizzle_w = 3; 3414 output[j].burst_count = 1; 3415 output[j].type = -1; 3416 output[j].op = CF_OP_EXPORT; 3417 switch (ctx.type) { 3418 case PIPE_SHADER_VERTEX: 3419 case PIPE_SHADER_TESS_EVAL: 3420 switch (shader->output[i].name) { 3421 case TGSI_SEMANTIC_POSITION: 3422 output[j].array_base = 60; 3423 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3424 pos_emitted = true; 3425 break; 3426 3427 case TGSI_SEMANTIC_PSIZE: 3428 output[j].array_base = 61; 3429 output[j].swizzle_y = 7; 3430 output[j].swizzle_z = 7; 3431 output[j].swizzle_w = 7; 3432 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3433 pos_emitted = true; 3434 break; 3435 case TGSI_SEMANTIC_EDGEFLAG: 3436 output[j].array_base = 61; 3437 output[j].swizzle_x = 7; 3438 output[j].swizzle_y = 0; 3439 output[j].swizzle_z = 7; 3440 output[j].swizzle_w = 7; 3441 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3442 pos_emitted = true; 3443 break; 3444 case TGSI_SEMANTIC_LAYER: 3445 /* spi_sid is 0 for outputs that are 3446 * not consumed by PS */ 3447 if (shader->output[i].spi_sid) { 3448 output[j].array_base = next_param_base++; 3449 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3450 j++; 3451 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3452 } 3453 output[j].array_base = 61; 3454 output[j].swizzle_x = 7; 3455 output[j].swizzle_y = 7; 3456 output[j].swizzle_z = 0; 3457 output[j].swizzle_w = 7; 3458 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3459 pos_emitted = true; 3460 break; 3461 case TGSI_SEMANTIC_VIEWPORT_INDEX: 3462 /* spi_sid is 0 for outputs that are 3463 * not consumed by PS */ 3464 if (shader->output[i].spi_sid) { 3465 output[j].array_base = next_param_base++; 3466 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3467 j++; 3468 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3469 } 3470 output[j].array_base = 61; 3471 output[j].swizzle_x = 7; 3472 output[j].swizzle_y = 7; 3473 output[j].swizzle_z = 7; 3474 output[j].swizzle_w = 0; 3475 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3476 pos_emitted = true; 3477 break; 3478 case TGSI_SEMANTIC_CLIPVERTEX: 3479 j--; 3480 break; 3481 case TGSI_SEMANTIC_CLIPDIST: 3482 output[j].array_base = next_clip_base++; 3483 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3484 pos_emitted = true; 3485 /* spi_sid is 0 for clipdistance outputs that were generated 3486 * for clipvertex - we don't need to pass them to PS */ 3487 if (shader->output[i].spi_sid) { 3488 j++; 3489 /* duplicate it as PARAM to pass to the pixel shader */ 3490 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3491 output[j].array_base = next_param_base++; 3492 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3493 } 3494 break; 3495 case TGSI_SEMANTIC_FOG: 3496 output[j].swizzle_y = 4; /* 0 */ 3497 output[j].swizzle_z = 4; /* 0 */ 3498 output[j].swizzle_w = 5; /* 1 */ 3499 break; 3500 case TGSI_SEMANTIC_PRIMID: 3501 output[j].swizzle_x = 2; 3502 output[j].swizzle_y = 4; /* 0 */ 3503 output[j].swizzle_z = 4; /* 0 */ 3504 output[j].swizzle_w = 4; /* 0 */ 3505 break; 3506 } 3507 3508 break; 3509 case PIPE_SHADER_FRAGMENT: 3510 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 3511 /* never export more colors than the number of CBs */ 3512 if (shader->output[i].sid >= max_color_exports) { 3513 /* skip export */ 3514 j--; 3515 continue; 3516 } 3517 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 3518 output[j].array_base = shader->output[i].sid; 3519 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3520 shader->nr_ps_color_exports++; 3521 if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { 3522 for (k = 1; k < max_color_exports; k++) { 3523 j++; 3524 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3525 output[j].gpr = shader->output[i].gpr; 3526 output[j].elem_size = 3; 3527 output[j].swizzle_x = 0; 3528 output[j].swizzle_y = 1; 3529 output[j].swizzle_z = 2; 3530 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 3531 output[j].burst_count = 1; 3532 output[j].array_base = k; 3533 output[j].op = CF_OP_EXPORT; 3534 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3535 shader->nr_ps_color_exports++; 3536 } 3537 } 3538 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 3539 output[j].array_base = 61; 3540 output[j].swizzle_x = 2; 3541 output[j].swizzle_y = 7; 3542 output[j].swizzle_z = output[j].swizzle_w = 7; 3543 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3544 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 3545 output[j].array_base = 61; 3546 output[j].swizzle_x = 7; 3547 output[j].swizzle_y = 1; 3548 output[j].swizzle_z = output[j].swizzle_w = 7; 3549 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3550 } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 3551 output[j].array_base = 61; 3552 output[j].swizzle_x = 7; 3553 output[j].swizzle_y = 7; 3554 output[j].swizzle_z = 0; 3555 output[j].swizzle_w = 7; 3556 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3557 } else { 3558 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 3559 r = -EINVAL; 3560 goto out_err; 3561 } 3562 break; 3563 case PIPE_SHADER_TESS_CTRL: 3564 break; 3565 default: 3566 R600_ERR("unsupported processor type %d\n", ctx.type); 3567 r = -EINVAL; 3568 goto out_err; 3569 } 3570 3571 if (output[j].type==-1) { 3572 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3573 output[j].array_base = next_param_base++; 3574 } 3575 } 3576 3577 /* add fake position export */ 3578 if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && pos_emitted == false) { 3579 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3580 output[j].gpr = 0; 3581 output[j].elem_size = 3; 3582 output[j].swizzle_x = 7; 3583 output[j].swizzle_y = 7; 3584 output[j].swizzle_z = 7; 3585 output[j].swizzle_w = 7; 3586 output[j].burst_count = 1; 3587 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3588 output[j].array_base = 60; 3589 output[j].op = CF_OP_EXPORT; 3590 j++; 3591 } 3592 3593 /* add fake param output for vertex shader if no param is exported */ 3594 if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && next_param_base == 0) { 3595 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3596 output[j].gpr = 0; 3597 output[j].elem_size = 3; 3598 output[j].swizzle_x = 7; 3599 output[j].swizzle_y = 7; 3600 output[j].swizzle_z = 7; 3601 output[j].swizzle_w = 7; 3602 output[j].burst_count = 1; 3603 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3604 output[j].array_base = 0; 3605 output[j].op = CF_OP_EXPORT; 3606 j++; 3607 } 3608 3609 /* add fake pixel export */ 3610 if (ctx.type == PIPE_SHADER_FRAGMENT && shader->nr_ps_color_exports == 0) { 3611 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3612 output[j].gpr = 0; 3613 output[j].elem_size = 3; 3614 output[j].swizzle_x = 7; 3615 output[j].swizzle_y = 7; 3616 output[j].swizzle_z = 7; 3617 output[j].swizzle_w = 7; 3618 output[j].burst_count = 1; 3619 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3620 output[j].array_base = 0; 3621 output[j].op = CF_OP_EXPORT; 3622 j++; 3623 shader->nr_ps_color_exports++; 3624 } 3625 3626 noutput = j; 3627 3628 /* set export done on last export of each type */ 3629 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 3630 if (!(output_done & (1 << output[i].type))) { 3631 output_done |= (1 << output[i].type); 3632 output[i].op = CF_OP_EXPORT_DONE; 3633 } 3634 } 3635 /* add output to bytecode */ 3636 for (i = 0; i < noutput; i++) { 3637 r = r600_bytecode_add_output(ctx.bc, &output[i]); 3638 if (r) 3639 goto out_err; 3640 } 3641 } 3642 3643 /* add program end */ 3644 if (ctx.bc->chip_class == CAYMAN) 3645 cm_bytecode_add_cf_end(ctx.bc); 3646 else { 3647 const struct cf_op_info *last = NULL; 3648 3649 if (ctx.bc->cf_last) 3650 last = r600_isa_cf(ctx.bc->cf_last->op); 3651 3652 /* alu clause instructions don't have EOP bit, so add NOP */ 3653 if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS) 3654 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 3655 3656 ctx.bc->cf_last->end_of_program = 1; 3657 } 3658 3659 /* check GPR limit - we have 124 = 128 - 4 3660 * (4 are reserved as alu clause temporary registers) */ 3661 if (ctx.bc->ngpr > 124) { 3662 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 3663 r = -ENOMEM; 3664 goto out_err; 3665 } 3666 3667 if (ctx.type == PIPE_SHADER_GEOMETRY) { 3668 if ((r = generate_gs_copy_shader(rctx, pipeshader, &so))) 3669 return r; 3670 } 3671 3672 free(ctx.literals); 3673 tgsi_parse_free(&ctx.parse); 3674 return 0; 3675out_err: 3676 free(ctx.literals); 3677 tgsi_parse_free(&ctx.parse); 3678 return r; 3679} 3680 3681static int tgsi_unsupported(struct r600_shader_ctx *ctx) 3682{ 3683 const unsigned tgsi_opcode = 3684 ctx->parse.FullToken.FullInstruction.Instruction.Opcode; 3685 R600_ERR("%s tgsi opcode unsupported\n", 3686 tgsi_get_opcode_name(tgsi_opcode)); 3687 return -EINVAL; 3688} 3689 3690static int tgsi_end(struct r600_shader_ctx *ctx) 3691{ 3692 return 0; 3693} 3694 3695static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 3696 const struct r600_shader_src *shader_src, 3697 unsigned chan) 3698{ 3699 bc_src->sel = shader_src->sel; 3700 bc_src->chan = shader_src->swizzle[chan]; 3701 bc_src->neg = shader_src->neg; 3702 bc_src->abs = shader_src->abs; 3703 bc_src->rel = shader_src->rel; 3704 bc_src->value = shader_src->value[bc_src->chan]; 3705 bc_src->kc_bank = shader_src->kc_bank; 3706 bc_src->kc_rel = shader_src->kc_rel; 3707} 3708 3709static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 3710{ 3711 bc_src->abs = 1; 3712 bc_src->neg = 0; 3713} 3714 3715static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 3716{ 3717 bc_src->neg = !bc_src->neg; 3718} 3719 3720static void tgsi_dst(struct r600_shader_ctx *ctx, 3721 const struct tgsi_full_dst_register *tgsi_dst, 3722 unsigned swizzle, 3723 struct r600_bytecode_alu_dst *r600_dst) 3724{ 3725 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3726 3727 r600_dst->sel = tgsi_dst->Register.Index; 3728 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 3729 r600_dst->chan = swizzle; 3730 r600_dst->write = 1; 3731 if (inst->Instruction.Saturate) { 3732 r600_dst->clamp = 1; 3733 } 3734 if (ctx->type == PIPE_SHADER_TESS_CTRL) { 3735 if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) { 3736 return; 3737 } 3738 } 3739 if (tgsi_dst->Register.Indirect) 3740 r600_dst->rel = V_SQ_REL_RELATIVE; 3741 3742} 3743 3744static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap) 3745{ 3746 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3747 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3748 struct r600_bytecode_alu alu; 3749 int i, j, r, lasti = tgsi_last_instruction(write_mask); 3750 int use_tmp = 0; 3751 3752 if (singledest) { 3753 switch (write_mask) { 3754 case 0x1: 3755 write_mask = 0x3; 3756 break; 3757 case 0x2: 3758 use_tmp = 1; 3759 write_mask = 0x3; 3760 break; 3761 case 0x4: 3762 write_mask = 0xc; 3763 break; 3764 case 0x8: 3765 write_mask = 0xc; 3766 use_tmp = 3; 3767 break; 3768 } 3769 } 3770 3771 lasti = tgsi_last_instruction(write_mask); 3772 for (i = 0; i <= lasti; i++) { 3773 3774 if (!(write_mask & (1 << i))) 3775 continue; 3776 3777 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3778 3779 if (singledest) { 3780 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3781 if (use_tmp) { 3782 alu.dst.sel = ctx->temp_reg; 3783 alu.dst.chan = i; 3784 alu.dst.write = 1; 3785 } 3786 if (i == 1 || i == 3) 3787 alu.dst.write = 0; 3788 } else 3789 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3790 3791 alu.op = ctx->inst_info->op; 3792 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) { 3793 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3794 } else if (!swap) { 3795 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3796 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 3797 } 3798 } else { 3799 r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i)); 3800 r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i)); 3801 } 3802 3803 /* handle some special cases */ 3804 if (i == 1 || i == 3) { 3805 switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { 3806 case TGSI_OPCODE_SUB: 3807 r600_bytecode_src_toggle_neg(&alu.src[1]); 3808 break; 3809 case TGSI_OPCODE_DABS: 3810 r600_bytecode_src_set_abs(&alu.src[0]); 3811 break; 3812 default: 3813 break; 3814 } 3815 } 3816 if (i == lasti) { 3817 alu.last = 1; 3818 } 3819 r = r600_bytecode_add_alu(ctx->bc, &alu); 3820 if (r) 3821 return r; 3822 } 3823 3824 if (use_tmp) { 3825 write_mask = inst->Dst[0].Register.WriteMask; 3826 3827 /* move result from temp to dst */ 3828 for (i = 0; i <= lasti; i++) { 3829 if (!(write_mask & (1 << i))) 3830 continue; 3831 3832 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3833 alu.op = ALU_OP1_MOV; 3834 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3835 alu.src[0].sel = ctx->temp_reg; 3836 alu.src[0].chan = use_tmp - 1; 3837 alu.last = (i == lasti); 3838 3839 r = r600_bytecode_add_alu(ctx->bc, &alu); 3840 if (r) 3841 return r; 3842 } 3843 } 3844 return 0; 3845} 3846 3847static int tgsi_op2_64(struct r600_shader_ctx *ctx) 3848{ 3849 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3850 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3851 /* confirm writemasking */ 3852 if ((write_mask & 0x3) != 0x3 && 3853 (write_mask & 0xc) != 0xc) { 3854 fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask); 3855 return -1; 3856 } 3857 return tgsi_op2_64_params(ctx, false, false); 3858} 3859 3860static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx) 3861{ 3862 return tgsi_op2_64_params(ctx, true, false); 3863} 3864 3865static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx) 3866{ 3867 return tgsi_op2_64_params(ctx, true, true); 3868} 3869 3870static int tgsi_op3_64(struct r600_shader_ctx *ctx) 3871{ 3872 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3873 struct r600_bytecode_alu alu; 3874 int i, j, r; 3875 int lasti = 3; 3876 int tmp = r600_get_temp(ctx); 3877 3878 for (i = 0; i < lasti + 1; i++) { 3879 3880 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3881 alu.op = ctx->inst_info->op; 3882 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3883 r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1); 3884 } 3885 3886 if (inst->Dst[0].Register.WriteMask & (1 << i)) 3887 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3888 else 3889 alu.dst.sel = tmp; 3890 3891 alu.dst.chan = i; 3892 alu.is_op3 = 1; 3893 if (i == lasti) { 3894 alu.last = 1; 3895 } 3896 r = r600_bytecode_add_alu(ctx->bc, &alu); 3897 if (r) 3898 return r; 3899 } 3900 return 0; 3901} 3902 3903static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 3904{ 3905 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3906 struct r600_bytecode_alu alu; 3907 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3908 int i, j, r, lasti = tgsi_last_instruction(write_mask); 3909 /* use temp register if trans_only and more than one dst component */ 3910 int use_tmp = trans_only && (write_mask ^ (1 << lasti)); 3911 3912 for (i = 0; i <= lasti; i++) { 3913 if (!(write_mask & (1 << i))) 3914 continue; 3915 3916 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3917 if (use_tmp) { 3918 alu.dst.sel = ctx->temp_reg; 3919 alu.dst.chan = i; 3920 alu.dst.write = 1; 3921 } else 3922 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3923 3924 alu.op = ctx->inst_info->op; 3925 if (!swap) { 3926 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3927 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3928 } 3929 } else { 3930 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3931 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3932 } 3933 /* handle some special cases */ 3934 switch (inst->Instruction.Opcode) { 3935 case TGSI_OPCODE_SUB: 3936 r600_bytecode_src_toggle_neg(&alu.src[1]); 3937 break; 3938 case TGSI_OPCODE_ABS: 3939 r600_bytecode_src_set_abs(&alu.src[0]); 3940 break; 3941 default: 3942 break; 3943 } 3944 if (i == lasti || trans_only) { 3945 alu.last = 1; 3946 } 3947 r = r600_bytecode_add_alu(ctx->bc, &alu); 3948 if (r) 3949 return r; 3950 } 3951 3952 if (use_tmp) { 3953 /* move result from temp to dst */ 3954 for (i = 0; i <= lasti; i++) { 3955 if (!(write_mask & (1 << i))) 3956 continue; 3957 3958 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3959 alu.op = ALU_OP1_MOV; 3960 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3961 alu.src[0].sel = ctx->temp_reg; 3962 alu.src[0].chan = i; 3963 alu.last = (i == lasti); 3964 3965 r = r600_bytecode_add_alu(ctx->bc, &alu); 3966 if (r) 3967 return r; 3968 } 3969 } 3970 return 0; 3971} 3972 3973static int tgsi_op2(struct r600_shader_ctx *ctx) 3974{ 3975 return tgsi_op2_s(ctx, 0, 0); 3976} 3977 3978static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 3979{ 3980 return tgsi_op2_s(ctx, 1, 0); 3981} 3982 3983static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 3984{ 3985 return tgsi_op2_s(ctx, 0, 1); 3986} 3987 3988static int tgsi_ineg(struct r600_shader_ctx *ctx) 3989{ 3990 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3991 struct r600_bytecode_alu alu; 3992 int i, r; 3993 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3994 3995 for (i = 0; i < lasti + 1; i++) { 3996 3997 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3998 continue; 3999 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4000 alu.op = ctx->inst_info->op; 4001 4002 alu.src[0].sel = V_SQ_ALU_SRC_0; 4003 4004 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4005 4006 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4007 4008 if (i == lasti) { 4009 alu.last = 1; 4010 } 4011 r = r600_bytecode_add_alu(ctx->bc, &alu); 4012 if (r) 4013 return r; 4014 } 4015 return 0; 4016 4017} 4018 4019static int tgsi_dneg(struct r600_shader_ctx *ctx) 4020{ 4021 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4022 struct r600_bytecode_alu alu; 4023 int i, r; 4024 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4025 4026 for (i = 0; i < lasti + 1; i++) { 4027 4028 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4029 continue; 4030 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4031 alu.op = ALU_OP1_MOV; 4032 4033 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4034 4035 if (i == 1 || i == 3) 4036 r600_bytecode_src_toggle_neg(&alu.src[0]); 4037 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4038 4039 if (i == lasti) { 4040 alu.last = 1; 4041 } 4042 r = r600_bytecode_add_alu(ctx->bc, &alu); 4043 if (r) 4044 return r; 4045 } 4046 return 0; 4047 4048} 4049 4050static int tgsi_dfracexp(struct r600_shader_ctx *ctx) 4051{ 4052 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4053 struct r600_bytecode_alu alu; 4054 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4055 int i, j, r; 4056 int firsti = write_mask == 0xc ? 2 : 0; 4057 4058 for (i = 0; i <= 3; i++) { 4059 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4060 alu.op = ctx->inst_info->op; 4061 4062 alu.dst.sel = ctx->temp_reg; 4063 alu.dst.chan = i; 4064 alu.dst.write = 1; 4065 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4066 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 4067 } 4068 4069 if (i == 3) 4070 alu.last = 1; 4071 4072 r = r600_bytecode_add_alu(ctx->bc, &alu); 4073 if (r) 4074 return r; 4075 } 4076 4077 /* MOV first two channels to writemask dst0 */ 4078 for (i = 0; i <= 1; i++) { 4079 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4080 alu.op = ALU_OP1_MOV; 4081 alu.src[0].chan = i + 2; 4082 alu.src[0].sel = ctx->temp_reg; 4083 4084 tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst); 4085 alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1; 4086 alu.last = 1; 4087 r = r600_bytecode_add_alu(ctx->bc, &alu); 4088 if (r) 4089 return r; 4090 } 4091 4092 for (i = 0; i <= 3; i++) { 4093 if (inst->Dst[1].Register.WriteMask & (1 << i)) { 4094 /* MOV third channels to writemask dst1 */ 4095 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4096 alu.op = ALU_OP1_MOV; 4097 alu.src[0].chan = 1; 4098 alu.src[0].sel = ctx->temp_reg; 4099 4100 tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst); 4101 alu.last = 1; 4102 r = r600_bytecode_add_alu(ctx->bc, &alu); 4103 if (r) 4104 return r; 4105 break; 4106 } 4107 } 4108 return 0; 4109} 4110 4111 4112static int egcm_int_to_double(struct r600_shader_ctx *ctx) 4113{ 4114 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4115 struct r600_bytecode_alu alu; 4116 int i, r; 4117 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4118 4119 assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D || 4120 inst->Instruction.Opcode == TGSI_OPCODE_U2D); 4121 4122 for (i = 0; i <= (lasti+1)/2; i++) { 4123 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4124 alu.op = ctx->inst_info->op; 4125 4126 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4127 alu.dst.sel = ctx->temp_reg; 4128 alu.dst.chan = i; 4129 alu.dst.write = 1; 4130 alu.last = 1; 4131 4132 r = r600_bytecode_add_alu(ctx->bc, &alu); 4133 if (r) 4134 return r; 4135 } 4136 4137 for (i = 0; i <= lasti; i++) { 4138 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4139 alu.op = ALU_OP1_FLT32_TO_FLT64; 4140 4141 alu.src[0].chan = i/2; 4142 if (i%2 == 0) 4143 alu.src[0].sel = ctx->temp_reg; 4144 else { 4145 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 4146 alu.src[0].value = 0x0; 4147 } 4148 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4149 alu.last = i == lasti; 4150 4151 r = r600_bytecode_add_alu(ctx->bc, &alu); 4152 if (r) 4153 return r; 4154 } 4155 4156 return 0; 4157} 4158 4159static int egcm_double_to_int(struct r600_shader_ctx *ctx) 4160{ 4161 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4162 struct r600_bytecode_alu alu; 4163 int i, r; 4164 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4165 4166 assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I || 4167 inst->Instruction.Opcode == TGSI_OPCODE_D2U); 4168 4169 for (i = 0; i <= lasti; i++) { 4170 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4171 alu.op = ALU_OP1_FLT64_TO_FLT32; 4172 4173 r600_bytecode_src(&alu.src[0], &ctx->src[0], fp64_switch(i)); 4174 alu.dst.chan = i; 4175 alu.dst.sel = ctx->temp_reg; 4176 alu.dst.write = i%2 == 0; 4177 alu.last = i == lasti; 4178 4179 r = r600_bytecode_add_alu(ctx->bc, &alu); 4180 if (r) 4181 return r; 4182 } 4183 4184 for (i = 0; i <= (lasti+1)/2; i++) { 4185 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4186 alu.op = ctx->inst_info->op; 4187 4188 alu.src[0].chan = i*2; 4189 alu.src[0].sel = ctx->temp_reg; 4190 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4191 alu.last = 1; 4192 4193 r = r600_bytecode_add_alu(ctx->bc, &alu); 4194 if (r) 4195 return r; 4196 } 4197 4198 return 0; 4199} 4200 4201static int cayman_emit_double_instr(struct r600_shader_ctx *ctx) 4202{ 4203 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4204 int i, r; 4205 struct r600_bytecode_alu alu; 4206 int last_slot = 3; 4207 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4208 int t1 = ctx->temp_reg; 4209 4210 /* these have to write the result to X/Y by the looks of it */ 4211 for (i = 0 ; i < last_slot; i++) { 4212 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4213 alu.op = ctx->inst_info->op; 4214 4215 /* should only be one src regs */ 4216 assert (inst->Instruction.NumSrcRegs == 1); 4217 4218 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 4219 r600_bytecode_src(&alu.src[1], &ctx->src[0], 0); 4220 4221 /* RSQ should take the absolute value of src */ 4222 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ || 4223 ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) { 4224 r600_bytecode_src_set_abs(&alu.src[1]); 4225 } 4226 alu.dst.sel = t1; 4227 alu.dst.chan = i; 4228 alu.dst.write = (i == 0 || i == 1); 4229 4230 if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1) 4231 alu.last = 1; 4232 r = r600_bytecode_add_alu(ctx->bc, &alu); 4233 if (r) 4234 return r; 4235 } 4236 4237 for (i = 0 ; i <= lasti; i++) { 4238 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4239 continue; 4240 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4241 alu.op = ALU_OP1_MOV; 4242 alu.src[0].sel = t1; 4243 alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1; 4244 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4245 alu.dst.write = 1; 4246 if (i == lasti) 4247 alu.last = 1; 4248 r = r600_bytecode_add_alu(ctx->bc, &alu); 4249 if (r) 4250 return r; 4251 } 4252 return 0; 4253} 4254 4255static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 4256{ 4257 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4258 int i, j, r; 4259 struct r600_bytecode_alu alu; 4260 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4261 4262 for (i = 0 ; i < last_slot; i++) { 4263 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4264 alu.op = ctx->inst_info->op; 4265 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4266 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 4267 4268 /* RSQ should take the absolute value of src */ 4269 if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) { 4270 r600_bytecode_src_set_abs(&alu.src[j]); 4271 } 4272 } 4273 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4274 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4275 4276 if (i == last_slot - 1) 4277 alu.last = 1; 4278 r = r600_bytecode_add_alu(ctx->bc, &alu); 4279 if (r) 4280 return r; 4281 } 4282 return 0; 4283} 4284 4285static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 4286{ 4287 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4288 int i, j, k, r; 4289 struct r600_bytecode_alu alu; 4290 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4291 int t1 = ctx->temp_reg; 4292 4293 for (k = 0; k <= lasti; k++) { 4294 if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 4295 continue; 4296 4297 for (i = 0 ; i < 4; i++) { 4298 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4299 alu.op = ctx->inst_info->op; 4300 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4301 r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 4302 } 4303 alu.dst.sel = t1; 4304 alu.dst.chan = i; 4305 alu.dst.write = (i == k); 4306 if (i == 3) 4307 alu.last = 1; 4308 r = r600_bytecode_add_alu(ctx->bc, &alu); 4309 if (r) 4310 return r; 4311 } 4312 } 4313 4314 for (i = 0 ; i <= lasti; i++) { 4315 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4316 continue; 4317 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4318 alu.op = ALU_OP1_MOV; 4319 alu.src[0].sel = t1; 4320 alu.src[0].chan = i; 4321 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4322 alu.dst.write = 1; 4323 if (i == lasti) 4324 alu.last = 1; 4325 r = r600_bytecode_add_alu(ctx->bc, &alu); 4326 if (r) 4327 return r; 4328 } 4329 4330 return 0; 4331} 4332 4333 4334static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) 4335{ 4336 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4337 int i, j, k, r; 4338 struct r600_bytecode_alu alu; 4339 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4340 int t1 = ctx->temp_reg; 4341 4342 for (k = 0; k < 2; k++) { 4343 if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2)))) 4344 continue; 4345 4346 for (i = 0; i < 4; i++) { 4347 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4348 alu.op = ctx->inst_info->op; 4349 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4350 r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1)); 4351 } 4352 alu.dst.sel = t1; 4353 alu.dst.chan = i; 4354 alu.dst.write = 1; 4355 if (i == 3) 4356 alu.last = 1; 4357 r = r600_bytecode_add_alu(ctx->bc, &alu); 4358 if (r) 4359 return r; 4360 } 4361 } 4362 4363 for (i = 0; i <= lasti; i++) { 4364 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4365 continue; 4366 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4367 alu.op = ALU_OP1_MOV; 4368 alu.src[0].sel = t1; 4369 alu.src[0].chan = i; 4370 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4371 alu.dst.write = 1; 4372 if (i == lasti) 4373 alu.last = 1; 4374 r = r600_bytecode_add_alu(ctx->bc, &alu); 4375 if (r) 4376 return r; 4377 } 4378 4379 return 0; 4380} 4381 4382/* 4383 * r600 - trunc to -PI..PI range 4384 * r700 - normalize by dividing by 2PI 4385 * see fdo bug 27901 4386 */ 4387static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 4388{ 4389 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 4390 static float double_pi = 3.1415926535 * 2; 4391 static float neg_pi = -3.1415926535; 4392 4393 int r; 4394 struct r600_bytecode_alu alu; 4395 4396 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4397 alu.op = ALU_OP3_MULADD; 4398 alu.is_op3 = 1; 4399 4400 alu.dst.chan = 0; 4401 alu.dst.sel = ctx->temp_reg; 4402 alu.dst.write = 1; 4403 4404 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4405 4406 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4407 alu.src[1].chan = 0; 4408 alu.src[1].value = *(uint32_t *)&half_inv_pi; 4409 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 4410 alu.src[2].chan = 0; 4411 alu.last = 1; 4412 r = r600_bytecode_add_alu(ctx->bc, &alu); 4413 if (r) 4414 return r; 4415 4416 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4417 alu.op = ALU_OP1_FRACT; 4418 4419 alu.dst.chan = 0; 4420 alu.dst.sel = ctx->temp_reg; 4421 alu.dst.write = 1; 4422 4423 alu.src[0].sel = ctx->temp_reg; 4424 alu.src[0].chan = 0; 4425 alu.last = 1; 4426 r = r600_bytecode_add_alu(ctx->bc, &alu); 4427 if (r) 4428 return r; 4429 4430 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4431 alu.op = ALU_OP3_MULADD; 4432 alu.is_op3 = 1; 4433 4434 alu.dst.chan = 0; 4435 alu.dst.sel = ctx->temp_reg; 4436 alu.dst.write = 1; 4437 4438 alu.src[0].sel = ctx->temp_reg; 4439 alu.src[0].chan = 0; 4440 4441 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4442 alu.src[1].chan = 0; 4443 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 4444 alu.src[2].chan = 0; 4445 4446 if (ctx->bc->chip_class == R600) { 4447 alu.src[1].value = *(uint32_t *)&double_pi; 4448 alu.src[2].value = *(uint32_t *)&neg_pi; 4449 } else { 4450 alu.src[1].sel = V_SQ_ALU_SRC_1; 4451 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 4452 alu.src[2].neg = 1; 4453 } 4454 4455 alu.last = 1; 4456 r = r600_bytecode_add_alu(ctx->bc, &alu); 4457 if (r) 4458 return r; 4459 return 0; 4460} 4461 4462static int cayman_trig(struct r600_shader_ctx *ctx) 4463{ 4464 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4465 struct r600_bytecode_alu alu; 4466 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4467 int i, r; 4468 4469 r = tgsi_setup_trig(ctx); 4470 if (r) 4471 return r; 4472 4473 4474 for (i = 0; i < last_slot; i++) { 4475 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4476 alu.op = ctx->inst_info->op; 4477 alu.dst.chan = i; 4478 4479 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4480 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4481 4482 alu.src[0].sel = ctx->temp_reg; 4483 alu.src[0].chan = 0; 4484 if (i == last_slot - 1) 4485 alu.last = 1; 4486 r = r600_bytecode_add_alu(ctx->bc, &alu); 4487 if (r) 4488 return r; 4489 } 4490 return 0; 4491} 4492 4493static int tgsi_trig(struct r600_shader_ctx *ctx) 4494{ 4495 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4496 struct r600_bytecode_alu alu; 4497 int i, r; 4498 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4499 4500 r = tgsi_setup_trig(ctx); 4501 if (r) 4502 return r; 4503 4504 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4505 alu.op = ctx->inst_info->op; 4506 alu.dst.chan = 0; 4507 alu.dst.sel = ctx->temp_reg; 4508 alu.dst.write = 1; 4509 4510 alu.src[0].sel = ctx->temp_reg; 4511 alu.src[0].chan = 0; 4512 alu.last = 1; 4513 r = r600_bytecode_add_alu(ctx->bc, &alu); 4514 if (r) 4515 return r; 4516 4517 /* replicate result */ 4518 for (i = 0; i < lasti + 1; i++) { 4519 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4520 continue; 4521 4522 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4523 alu.op = ALU_OP1_MOV; 4524 4525 alu.src[0].sel = ctx->temp_reg; 4526 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4527 if (i == lasti) 4528 alu.last = 1; 4529 r = r600_bytecode_add_alu(ctx->bc, &alu); 4530 if (r) 4531 return r; 4532 } 4533 return 0; 4534} 4535 4536static int tgsi_scs(struct r600_shader_ctx *ctx) 4537{ 4538 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4539 struct r600_bytecode_alu alu; 4540 int i, r; 4541 4542 /* We'll only need the trig stuff if we are going to write to the 4543 * X or Y components of the destination vector. 4544 */ 4545 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 4546 r = tgsi_setup_trig(ctx); 4547 if (r) 4548 return r; 4549 } 4550 4551 /* dst.x = COS */ 4552 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4553 if (ctx->bc->chip_class == CAYMAN) { 4554 for (i = 0 ; i < 3; i++) { 4555 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4556 alu.op = ALU_OP1_COS; 4557 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4558 4559 if (i == 0) 4560 alu.dst.write = 1; 4561 else 4562 alu.dst.write = 0; 4563 alu.src[0].sel = ctx->temp_reg; 4564 alu.src[0].chan = 0; 4565 if (i == 2) 4566 alu.last = 1; 4567 r = r600_bytecode_add_alu(ctx->bc, &alu); 4568 if (r) 4569 return r; 4570 } 4571 } else { 4572 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4573 alu.op = ALU_OP1_COS; 4574 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4575 4576 alu.src[0].sel = ctx->temp_reg; 4577 alu.src[0].chan = 0; 4578 alu.last = 1; 4579 r = r600_bytecode_add_alu(ctx->bc, &alu); 4580 if (r) 4581 return r; 4582 } 4583 } 4584 4585 /* dst.y = SIN */ 4586 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4587 if (ctx->bc->chip_class == CAYMAN) { 4588 for (i = 0 ; i < 3; i++) { 4589 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4590 alu.op = ALU_OP1_SIN; 4591 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4592 if (i == 1) 4593 alu.dst.write = 1; 4594 else 4595 alu.dst.write = 0; 4596 alu.src[0].sel = ctx->temp_reg; 4597 alu.src[0].chan = 0; 4598 if (i == 2) 4599 alu.last = 1; 4600 r = r600_bytecode_add_alu(ctx->bc, &alu); 4601 if (r) 4602 return r; 4603 } 4604 } else { 4605 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4606 alu.op = ALU_OP1_SIN; 4607 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 4608 4609 alu.src[0].sel = ctx->temp_reg; 4610 alu.src[0].chan = 0; 4611 alu.last = 1; 4612 r = r600_bytecode_add_alu(ctx->bc, &alu); 4613 if (r) 4614 return r; 4615 } 4616 } 4617 4618 /* dst.z = 0.0; */ 4619 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4620 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4621 4622 alu.op = ALU_OP1_MOV; 4623 4624 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 4625 4626 alu.src[0].sel = V_SQ_ALU_SRC_0; 4627 alu.src[0].chan = 0; 4628 4629 alu.last = 1; 4630 4631 r = r600_bytecode_add_alu(ctx->bc, &alu); 4632 if (r) 4633 return r; 4634 } 4635 4636 /* dst.w = 1.0; */ 4637 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4638 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4639 4640 alu.op = ALU_OP1_MOV; 4641 4642 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 4643 4644 alu.src[0].sel = V_SQ_ALU_SRC_1; 4645 alu.src[0].chan = 0; 4646 4647 alu.last = 1; 4648 4649 r = r600_bytecode_add_alu(ctx->bc, &alu); 4650 if (r) 4651 return r; 4652 } 4653 4654 return 0; 4655} 4656 4657static int tgsi_kill(struct r600_shader_ctx *ctx) 4658{ 4659 const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4660 struct r600_bytecode_alu alu; 4661 int i, r; 4662 4663 for (i = 0; i < 4; i++) { 4664 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4665 alu.op = ctx->inst_info->op; 4666 4667 alu.dst.chan = i; 4668 4669 alu.src[0].sel = V_SQ_ALU_SRC_0; 4670 4671 if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) { 4672 alu.src[1].sel = V_SQ_ALU_SRC_1; 4673 alu.src[1].neg = 1; 4674 } else { 4675 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4676 } 4677 if (i == 3) { 4678 alu.last = 1; 4679 } 4680 r = r600_bytecode_add_alu(ctx->bc, &alu); 4681 if (r) 4682 return r; 4683 } 4684 4685 /* kill must be last in ALU */ 4686 ctx->bc->force_add_cf = 1; 4687 ctx->shader->uses_kill = TRUE; 4688 return 0; 4689} 4690 4691static int tgsi_lit(struct r600_shader_ctx *ctx) 4692{ 4693 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4694 struct r600_bytecode_alu alu; 4695 int r; 4696 4697 /* tmp.x = max(src.y, 0.0) */ 4698 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4699 alu.op = ALU_OP2_MAX; 4700 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 4701 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 4702 alu.src[1].chan = 1; 4703 4704 alu.dst.sel = ctx->temp_reg; 4705 alu.dst.chan = 0; 4706 alu.dst.write = 1; 4707 4708 alu.last = 1; 4709 r = r600_bytecode_add_alu(ctx->bc, &alu); 4710 if (r) 4711 return r; 4712 4713 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 4714 { 4715 int chan; 4716 int sel; 4717 unsigned i; 4718 4719 if (ctx->bc->chip_class == CAYMAN) { 4720 for (i = 0; i < 3; i++) { 4721 /* tmp.z = log(tmp.x) */ 4722 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4723 alu.op = ALU_OP1_LOG_CLAMPED; 4724 alu.src[0].sel = ctx->temp_reg; 4725 alu.src[0].chan = 0; 4726 alu.dst.sel = ctx->temp_reg; 4727 alu.dst.chan = i; 4728 if (i == 2) { 4729 alu.dst.write = 1; 4730 alu.last = 1; 4731 } else 4732 alu.dst.write = 0; 4733 4734 r = r600_bytecode_add_alu(ctx->bc, &alu); 4735 if (r) 4736 return r; 4737 } 4738 } else { 4739 /* tmp.z = log(tmp.x) */ 4740 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4741 alu.op = ALU_OP1_LOG_CLAMPED; 4742 alu.src[0].sel = ctx->temp_reg; 4743 alu.src[0].chan = 0; 4744 alu.dst.sel = ctx->temp_reg; 4745 alu.dst.chan = 2; 4746 alu.dst.write = 1; 4747 alu.last = 1; 4748 r = r600_bytecode_add_alu(ctx->bc, &alu); 4749 if (r) 4750 return r; 4751 } 4752 4753 chan = alu.dst.chan; 4754 sel = alu.dst.sel; 4755 4756 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 4757 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4758 alu.op = ALU_OP3_MUL_LIT; 4759 alu.src[0].sel = sel; 4760 alu.src[0].chan = chan; 4761 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 4762 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 4763 alu.dst.sel = ctx->temp_reg; 4764 alu.dst.chan = 0; 4765 alu.dst.write = 1; 4766 alu.is_op3 = 1; 4767 alu.last = 1; 4768 r = r600_bytecode_add_alu(ctx->bc, &alu); 4769 if (r) 4770 return r; 4771 4772 if (ctx->bc->chip_class == CAYMAN) { 4773 for (i = 0; i < 3; i++) { 4774 /* dst.z = exp(tmp.x) */ 4775 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4776 alu.op = ALU_OP1_EXP_IEEE; 4777 alu.src[0].sel = ctx->temp_reg; 4778 alu.src[0].chan = 0; 4779 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4780 if (i == 2) { 4781 alu.dst.write = 1; 4782 alu.last = 1; 4783 } else 4784 alu.dst.write = 0; 4785 r = r600_bytecode_add_alu(ctx->bc, &alu); 4786 if (r) 4787 return r; 4788 } 4789 } else { 4790 /* dst.z = exp(tmp.x) */ 4791 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4792 alu.op = ALU_OP1_EXP_IEEE; 4793 alu.src[0].sel = ctx->temp_reg; 4794 alu.src[0].chan = 0; 4795 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 4796 alu.last = 1; 4797 r = r600_bytecode_add_alu(ctx->bc, &alu); 4798 if (r) 4799 return r; 4800 } 4801 } 4802 4803 /* dst.x, <- 1.0 */ 4804 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4805 alu.op = ALU_OP1_MOV; 4806 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 4807 alu.src[0].chan = 0; 4808 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4809 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 4810 r = r600_bytecode_add_alu(ctx->bc, &alu); 4811 if (r) 4812 return r; 4813 4814 /* dst.y = max(src.x, 0.0) */ 4815 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4816 alu.op = ALU_OP2_MAX; 4817 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4818 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 4819 alu.src[1].chan = 0; 4820 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 4821 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 4822 r = r600_bytecode_add_alu(ctx->bc, &alu); 4823 if (r) 4824 return r; 4825 4826 /* dst.w, <- 1.0 */ 4827 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4828 alu.op = ALU_OP1_MOV; 4829 alu.src[0].sel = V_SQ_ALU_SRC_1; 4830 alu.src[0].chan = 0; 4831 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 4832 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 4833 alu.last = 1; 4834 r = r600_bytecode_add_alu(ctx->bc, &alu); 4835 if (r) 4836 return r; 4837 4838 return 0; 4839} 4840 4841static int tgsi_rsq(struct r600_shader_ctx *ctx) 4842{ 4843 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4844 struct r600_bytecode_alu alu; 4845 int i, r; 4846 4847 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4848 4849 /* XXX: 4850 * For state trackers other than OpenGL, we'll want to use 4851 * _RECIPSQRT_IEEE instead. 4852 */ 4853 alu.op = ALU_OP1_RECIPSQRT_CLAMPED; 4854 4855 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 4856 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 4857 r600_bytecode_src_set_abs(&alu.src[i]); 4858 } 4859 alu.dst.sel = ctx->temp_reg; 4860 alu.dst.write = 1; 4861 alu.last = 1; 4862 r = r600_bytecode_add_alu(ctx->bc, &alu); 4863 if (r) 4864 return r; 4865 /* replicate result */ 4866 return tgsi_helper_tempx_replicate(ctx); 4867} 4868 4869static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 4870{ 4871 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4872 struct r600_bytecode_alu alu; 4873 int i, r; 4874 4875 for (i = 0; i < 4; i++) { 4876 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4877 alu.src[0].sel = ctx->temp_reg; 4878 alu.op = ALU_OP1_MOV; 4879 alu.dst.chan = i; 4880 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4881 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4882 if (i == 3) 4883 alu.last = 1; 4884 r = r600_bytecode_add_alu(ctx->bc, &alu); 4885 if (r) 4886 return r; 4887 } 4888 return 0; 4889} 4890 4891static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 4892{ 4893 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4894 struct r600_bytecode_alu alu; 4895 int i, r; 4896 4897 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4898 alu.op = ctx->inst_info->op; 4899 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 4900 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 4901 } 4902 alu.dst.sel = ctx->temp_reg; 4903 alu.dst.write = 1; 4904 alu.last = 1; 4905 r = r600_bytecode_add_alu(ctx->bc, &alu); 4906 if (r) 4907 return r; 4908 /* replicate result */ 4909 return tgsi_helper_tempx_replicate(ctx); 4910} 4911 4912static int cayman_pow(struct r600_shader_ctx *ctx) 4913{ 4914 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4915 int i, r; 4916 struct r600_bytecode_alu alu; 4917 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4918 4919 for (i = 0; i < 3; i++) { 4920 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4921 alu.op = ALU_OP1_LOG_IEEE; 4922 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4923 alu.dst.sel = ctx->temp_reg; 4924 alu.dst.chan = i; 4925 alu.dst.write = 1; 4926 if (i == 2) 4927 alu.last = 1; 4928 r = r600_bytecode_add_alu(ctx->bc, &alu); 4929 if (r) 4930 return r; 4931 } 4932 4933 /* b * LOG2(a) */ 4934 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4935 alu.op = ALU_OP2_MUL; 4936 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 4937 alu.src[1].sel = ctx->temp_reg; 4938 alu.dst.sel = ctx->temp_reg; 4939 alu.dst.write = 1; 4940 alu.last = 1; 4941 r = r600_bytecode_add_alu(ctx->bc, &alu); 4942 if (r) 4943 return r; 4944 4945 for (i = 0; i < last_slot; i++) { 4946 /* POW(a,b) = EXP2(b * LOG2(a))*/ 4947 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4948 alu.op = ALU_OP1_EXP_IEEE; 4949 alu.src[0].sel = ctx->temp_reg; 4950 4951 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4952 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4953 if (i == last_slot - 1) 4954 alu.last = 1; 4955 r = r600_bytecode_add_alu(ctx->bc, &alu); 4956 if (r) 4957 return r; 4958 } 4959 return 0; 4960} 4961 4962static int tgsi_pow(struct r600_shader_ctx *ctx) 4963{ 4964 struct r600_bytecode_alu alu; 4965 int r; 4966 4967 /* LOG2(a) */ 4968 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4969 alu.op = ALU_OP1_LOG_IEEE; 4970 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4971 alu.dst.sel = ctx->temp_reg; 4972 alu.dst.write = 1; 4973 alu.last = 1; 4974 r = r600_bytecode_add_alu(ctx->bc, &alu); 4975 if (r) 4976 return r; 4977 /* b * LOG2(a) */ 4978 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4979 alu.op = ALU_OP2_MUL; 4980 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 4981 alu.src[1].sel = ctx->temp_reg; 4982 alu.dst.sel = ctx->temp_reg; 4983 alu.dst.write = 1; 4984 alu.last = 1; 4985 r = r600_bytecode_add_alu(ctx->bc, &alu); 4986 if (r) 4987 return r; 4988 /* POW(a,b) = EXP2(b * LOG2(a))*/ 4989 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4990 alu.op = ALU_OP1_EXP_IEEE; 4991 alu.src[0].sel = ctx->temp_reg; 4992 alu.dst.sel = ctx->temp_reg; 4993 alu.dst.write = 1; 4994 alu.last = 1; 4995 r = r600_bytecode_add_alu(ctx->bc, &alu); 4996 if (r) 4997 return r; 4998 return tgsi_helper_tempx_replicate(ctx); 4999} 5000 5001static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 5002{ 5003 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5004 struct r600_bytecode_alu alu; 5005 int i, r, j; 5006 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5007 int tmp0 = ctx->temp_reg; 5008 int tmp1 = r600_get_temp(ctx); 5009 int tmp2 = r600_get_temp(ctx); 5010 int tmp3 = r600_get_temp(ctx); 5011 /* Unsigned path: 5012 * 5013 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 5014 * 5015 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 5016 * 2. tmp0.z = lo (tmp0.x * src2) 5017 * 3. tmp0.w = -tmp0.z 5018 * 4. tmp0.y = hi (tmp0.x * src2) 5019 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 5020 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 5021 * 7. tmp1.x = tmp0.x - tmp0.w 5022 * 8. tmp1.y = tmp0.x + tmp0.w 5023 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 5024 * 10. tmp0.z = hi(tmp0.x * src1) = q 5025 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 5026 * 5027 * 12. tmp0.w = src1 - tmp0.y = r 5028 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 5029 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 5030 * 5031 * if DIV 5032 * 5033 * 15. tmp1.z = tmp0.z + 1 = q + 1 5034 * 16. tmp1.w = tmp0.z - 1 = q - 1 5035 * 5036 * else MOD 5037 * 5038 * 15. tmp1.z = tmp0.w - src2 = r - src2 5039 * 16. tmp1.w = tmp0.w + src2 = r + src2 5040 * 5041 * endif 5042 * 5043 * 17. tmp1.x = tmp1.x & tmp1.y 5044 * 5045 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 5046 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 5047 * 5048 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 5049 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 5050 * 5051 * Signed path: 5052 * 5053 * Same as unsigned, using abs values of the operands, 5054 * and fixing the sign of the result in the end. 5055 */ 5056 5057 for (i = 0; i < 4; i++) { 5058 if (!(write_mask & (1<<i))) 5059 continue; 5060 5061 if (signed_op) { 5062 5063 /* tmp2.x = -src0 */ 5064 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5065 alu.op = ALU_OP2_SUB_INT; 5066 5067 alu.dst.sel = tmp2; 5068 alu.dst.chan = 0; 5069 alu.dst.write = 1; 5070 5071 alu.src[0].sel = V_SQ_ALU_SRC_0; 5072 5073 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5074 5075 alu.last = 1; 5076 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5077 return r; 5078 5079 /* tmp2.y = -src1 */ 5080 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5081 alu.op = ALU_OP2_SUB_INT; 5082 5083 alu.dst.sel = tmp2; 5084 alu.dst.chan = 1; 5085 alu.dst.write = 1; 5086 5087 alu.src[0].sel = V_SQ_ALU_SRC_0; 5088 5089 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5090 5091 alu.last = 1; 5092 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5093 return r; 5094 5095 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 5096 /* it will be a sign of the quotient */ 5097 if (!mod) { 5098 5099 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5100 alu.op = ALU_OP2_XOR_INT; 5101 5102 alu.dst.sel = tmp2; 5103 alu.dst.chan = 2; 5104 alu.dst.write = 1; 5105 5106 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5107 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5108 5109 alu.last = 1; 5110 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5111 return r; 5112 } 5113 5114 /* tmp2.x = |src0| */ 5115 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5116 alu.op = ALU_OP3_CNDGE_INT; 5117 alu.is_op3 = 1; 5118 5119 alu.dst.sel = tmp2; 5120 alu.dst.chan = 0; 5121 alu.dst.write = 1; 5122 5123 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5124 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5125 alu.src[2].sel = tmp2; 5126 alu.src[2].chan = 0; 5127 5128 alu.last = 1; 5129 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5130 return r; 5131 5132 /* tmp2.y = |src1| */ 5133 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5134 alu.op = ALU_OP3_CNDGE_INT; 5135 alu.is_op3 = 1; 5136 5137 alu.dst.sel = tmp2; 5138 alu.dst.chan = 1; 5139 alu.dst.write = 1; 5140 5141 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5142 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5143 alu.src[2].sel = tmp2; 5144 alu.src[2].chan = 1; 5145 5146 alu.last = 1; 5147 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5148 return r; 5149 5150 } 5151 5152 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 5153 if (ctx->bc->chip_class == CAYMAN) { 5154 /* tmp3.x = u2f(src2) */ 5155 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5156 alu.op = ALU_OP1_UINT_TO_FLT; 5157 5158 alu.dst.sel = tmp3; 5159 alu.dst.chan = 0; 5160 alu.dst.write = 1; 5161 5162 if (signed_op) { 5163 alu.src[0].sel = tmp2; 5164 alu.src[0].chan = 1; 5165 } else { 5166 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5167 } 5168 5169 alu.last = 1; 5170 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5171 return r; 5172 5173 /* tmp0.x = recip(tmp3.x) */ 5174 for (j = 0 ; j < 3; j++) { 5175 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5176 alu.op = ALU_OP1_RECIP_IEEE; 5177 5178 alu.dst.sel = tmp0; 5179 alu.dst.chan = j; 5180 alu.dst.write = (j == 0); 5181 5182 alu.src[0].sel = tmp3; 5183 alu.src[0].chan = 0; 5184 5185 if (j == 2) 5186 alu.last = 1; 5187 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5188 return r; 5189 } 5190 5191 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5192 alu.op = ALU_OP2_MUL; 5193 5194 alu.src[0].sel = tmp0; 5195 alu.src[0].chan = 0; 5196 5197 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 5198 alu.src[1].value = 0x4f800000; 5199 5200 alu.dst.sel = tmp3; 5201 alu.dst.write = 1; 5202 alu.last = 1; 5203 r = r600_bytecode_add_alu(ctx->bc, &alu); 5204 if (r) 5205 return r; 5206 5207 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5208 alu.op = ALU_OP1_FLT_TO_UINT; 5209 5210 alu.dst.sel = tmp0; 5211 alu.dst.chan = 0; 5212 alu.dst.write = 1; 5213 5214 alu.src[0].sel = tmp3; 5215 alu.src[0].chan = 0; 5216 5217 alu.last = 1; 5218 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5219 return r; 5220 5221 } else { 5222 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5223 alu.op = ALU_OP1_RECIP_UINT; 5224 5225 alu.dst.sel = tmp0; 5226 alu.dst.chan = 0; 5227 alu.dst.write = 1; 5228 5229 if (signed_op) { 5230 alu.src[0].sel = tmp2; 5231 alu.src[0].chan = 1; 5232 } else { 5233 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5234 } 5235 5236 alu.last = 1; 5237 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5238 return r; 5239 } 5240 5241 /* 2. tmp0.z = lo (tmp0.x * src2) */ 5242 if (ctx->bc->chip_class == CAYMAN) { 5243 for (j = 0 ; j < 4; j++) { 5244 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5245 alu.op = ALU_OP2_MULLO_UINT; 5246 5247 alu.dst.sel = tmp0; 5248 alu.dst.chan = j; 5249 alu.dst.write = (j == 2); 5250 5251 alu.src[0].sel = tmp0; 5252 alu.src[0].chan = 0; 5253 if (signed_op) { 5254 alu.src[1].sel = tmp2; 5255 alu.src[1].chan = 1; 5256 } else { 5257 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5258 } 5259 5260 alu.last = (j == 3); 5261 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5262 return r; 5263 } 5264 } else { 5265 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5266 alu.op = ALU_OP2_MULLO_UINT; 5267 5268 alu.dst.sel = tmp0; 5269 alu.dst.chan = 2; 5270 alu.dst.write = 1; 5271 5272 alu.src[0].sel = tmp0; 5273 alu.src[0].chan = 0; 5274 if (signed_op) { 5275 alu.src[1].sel = tmp2; 5276 alu.src[1].chan = 1; 5277 } else { 5278 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5279 } 5280 5281 alu.last = 1; 5282 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5283 return r; 5284 } 5285 5286 /* 3. tmp0.w = -tmp0.z */ 5287 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5288 alu.op = ALU_OP2_SUB_INT; 5289 5290 alu.dst.sel = tmp0; 5291 alu.dst.chan = 3; 5292 alu.dst.write = 1; 5293 5294 alu.src[0].sel = V_SQ_ALU_SRC_0; 5295 alu.src[1].sel = tmp0; 5296 alu.src[1].chan = 2; 5297 5298 alu.last = 1; 5299 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5300 return r; 5301 5302 /* 4. tmp0.y = hi (tmp0.x * src2) */ 5303 if (ctx->bc->chip_class == CAYMAN) { 5304 for (j = 0 ; j < 4; j++) { 5305 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5306 alu.op = ALU_OP2_MULHI_UINT; 5307 5308 alu.dst.sel = tmp0; 5309 alu.dst.chan = j; 5310 alu.dst.write = (j == 1); 5311 5312 alu.src[0].sel = tmp0; 5313 alu.src[0].chan = 0; 5314 5315 if (signed_op) { 5316 alu.src[1].sel = tmp2; 5317 alu.src[1].chan = 1; 5318 } else { 5319 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5320 } 5321 alu.last = (j == 3); 5322 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5323 return r; 5324 } 5325 } else { 5326 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5327 alu.op = ALU_OP2_MULHI_UINT; 5328 5329 alu.dst.sel = tmp0; 5330 alu.dst.chan = 1; 5331 alu.dst.write = 1; 5332 5333 alu.src[0].sel = tmp0; 5334 alu.src[0].chan = 0; 5335 5336 if (signed_op) { 5337 alu.src[1].sel = tmp2; 5338 alu.src[1].chan = 1; 5339 } else { 5340 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5341 } 5342 5343 alu.last = 1; 5344 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5345 return r; 5346 } 5347 5348 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 5349 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5350 alu.op = ALU_OP3_CNDE_INT; 5351 alu.is_op3 = 1; 5352 5353 alu.dst.sel = tmp0; 5354 alu.dst.chan = 2; 5355 alu.dst.write = 1; 5356 5357 alu.src[0].sel = tmp0; 5358 alu.src[0].chan = 1; 5359 alu.src[1].sel = tmp0; 5360 alu.src[1].chan = 3; 5361 alu.src[2].sel = tmp0; 5362 alu.src[2].chan = 2; 5363 5364 alu.last = 1; 5365 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5366 return r; 5367 5368 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 5369 if (ctx->bc->chip_class == CAYMAN) { 5370 for (j = 0 ; j < 4; j++) { 5371 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5372 alu.op = ALU_OP2_MULHI_UINT; 5373 5374 alu.dst.sel = tmp0; 5375 alu.dst.chan = j; 5376 alu.dst.write = (j == 3); 5377 5378 alu.src[0].sel = tmp0; 5379 alu.src[0].chan = 2; 5380 5381 alu.src[1].sel = tmp0; 5382 alu.src[1].chan = 0; 5383 5384 alu.last = (j == 3); 5385 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5386 return r; 5387 } 5388 } else { 5389 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5390 alu.op = ALU_OP2_MULHI_UINT; 5391 5392 alu.dst.sel = tmp0; 5393 alu.dst.chan = 3; 5394 alu.dst.write = 1; 5395 5396 alu.src[0].sel = tmp0; 5397 alu.src[0].chan = 2; 5398 5399 alu.src[1].sel = tmp0; 5400 alu.src[1].chan = 0; 5401 5402 alu.last = 1; 5403 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5404 return r; 5405 } 5406 5407 /* 7. tmp1.x = tmp0.x - tmp0.w */ 5408 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5409 alu.op = ALU_OP2_SUB_INT; 5410 5411 alu.dst.sel = tmp1; 5412 alu.dst.chan = 0; 5413 alu.dst.write = 1; 5414 5415 alu.src[0].sel = tmp0; 5416 alu.src[0].chan = 0; 5417 alu.src[1].sel = tmp0; 5418 alu.src[1].chan = 3; 5419 5420 alu.last = 1; 5421 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5422 return r; 5423 5424 /* 8. tmp1.y = tmp0.x + tmp0.w */ 5425 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5426 alu.op = ALU_OP2_ADD_INT; 5427 5428 alu.dst.sel = tmp1; 5429 alu.dst.chan = 1; 5430 alu.dst.write = 1; 5431 5432 alu.src[0].sel = tmp0; 5433 alu.src[0].chan = 0; 5434 alu.src[1].sel = tmp0; 5435 alu.src[1].chan = 3; 5436 5437 alu.last = 1; 5438 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5439 return r; 5440 5441 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 5442 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5443 alu.op = ALU_OP3_CNDE_INT; 5444 alu.is_op3 = 1; 5445 5446 alu.dst.sel = tmp0; 5447 alu.dst.chan = 0; 5448 alu.dst.write = 1; 5449 5450 alu.src[0].sel = tmp0; 5451 alu.src[0].chan = 1; 5452 alu.src[1].sel = tmp1; 5453 alu.src[1].chan = 1; 5454 alu.src[2].sel = tmp1; 5455 alu.src[2].chan = 0; 5456 5457 alu.last = 1; 5458 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5459 return r; 5460 5461 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 5462 if (ctx->bc->chip_class == CAYMAN) { 5463 for (j = 0 ; j < 4; j++) { 5464 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5465 alu.op = ALU_OP2_MULHI_UINT; 5466 5467 alu.dst.sel = tmp0; 5468 alu.dst.chan = j; 5469 alu.dst.write = (j == 2); 5470 5471 alu.src[0].sel = tmp0; 5472 alu.src[0].chan = 0; 5473 5474 if (signed_op) { 5475 alu.src[1].sel = tmp2; 5476 alu.src[1].chan = 0; 5477 } else { 5478 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5479 } 5480 5481 alu.last = (j == 3); 5482 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5483 return r; 5484 } 5485 } else { 5486 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5487 alu.op = ALU_OP2_MULHI_UINT; 5488 5489 alu.dst.sel = tmp0; 5490 alu.dst.chan = 2; 5491 alu.dst.write = 1; 5492 5493 alu.src[0].sel = tmp0; 5494 alu.src[0].chan = 0; 5495 5496 if (signed_op) { 5497 alu.src[1].sel = tmp2; 5498 alu.src[1].chan = 0; 5499 } else { 5500 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5501 } 5502 5503 alu.last = 1; 5504 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5505 return r; 5506 } 5507 5508 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 5509 if (ctx->bc->chip_class == CAYMAN) { 5510 for (j = 0 ; j < 4; j++) { 5511 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5512 alu.op = ALU_OP2_MULLO_UINT; 5513 5514 alu.dst.sel = tmp0; 5515 alu.dst.chan = j; 5516 alu.dst.write = (j == 1); 5517 5518 if (signed_op) { 5519 alu.src[0].sel = tmp2; 5520 alu.src[0].chan = 1; 5521 } else { 5522 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5523 } 5524 5525 alu.src[1].sel = tmp0; 5526 alu.src[1].chan = 2; 5527 5528 alu.last = (j == 3); 5529 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5530 return r; 5531 } 5532 } else { 5533 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5534 alu.op = ALU_OP2_MULLO_UINT; 5535 5536 alu.dst.sel = tmp0; 5537 alu.dst.chan = 1; 5538 alu.dst.write = 1; 5539 5540 if (signed_op) { 5541 alu.src[0].sel = tmp2; 5542 alu.src[0].chan = 1; 5543 } else { 5544 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5545 } 5546 5547 alu.src[1].sel = tmp0; 5548 alu.src[1].chan = 2; 5549 5550 alu.last = 1; 5551 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5552 return r; 5553 } 5554 5555 /* 12. tmp0.w = src1 - tmp0.y = r */ 5556 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5557 alu.op = ALU_OP2_SUB_INT; 5558 5559 alu.dst.sel = tmp0; 5560 alu.dst.chan = 3; 5561 alu.dst.write = 1; 5562 5563 if (signed_op) { 5564 alu.src[0].sel = tmp2; 5565 alu.src[0].chan = 0; 5566 } else { 5567 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5568 } 5569 5570 alu.src[1].sel = tmp0; 5571 alu.src[1].chan = 1; 5572 5573 alu.last = 1; 5574 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5575 return r; 5576 5577 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 5578 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5579 alu.op = ALU_OP2_SETGE_UINT; 5580 5581 alu.dst.sel = tmp1; 5582 alu.dst.chan = 0; 5583 alu.dst.write = 1; 5584 5585 alu.src[0].sel = tmp0; 5586 alu.src[0].chan = 3; 5587 if (signed_op) { 5588 alu.src[1].sel = tmp2; 5589 alu.src[1].chan = 1; 5590 } else { 5591 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5592 } 5593 5594 alu.last = 1; 5595 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5596 return r; 5597 5598 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 5599 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5600 alu.op = ALU_OP2_SETGE_UINT; 5601 5602 alu.dst.sel = tmp1; 5603 alu.dst.chan = 1; 5604 alu.dst.write = 1; 5605 5606 if (signed_op) { 5607 alu.src[0].sel = tmp2; 5608 alu.src[0].chan = 0; 5609 } else { 5610 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5611 } 5612 5613 alu.src[1].sel = tmp0; 5614 alu.src[1].chan = 1; 5615 5616 alu.last = 1; 5617 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5618 return r; 5619 5620 if (mod) { /* UMOD */ 5621 5622 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 5623 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5624 alu.op = ALU_OP2_SUB_INT; 5625 5626 alu.dst.sel = tmp1; 5627 alu.dst.chan = 2; 5628 alu.dst.write = 1; 5629 5630 alu.src[0].sel = tmp0; 5631 alu.src[0].chan = 3; 5632 5633 if (signed_op) { 5634 alu.src[1].sel = tmp2; 5635 alu.src[1].chan = 1; 5636 } else { 5637 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5638 } 5639 5640 alu.last = 1; 5641 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5642 return r; 5643 5644 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 5645 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5646 alu.op = ALU_OP2_ADD_INT; 5647 5648 alu.dst.sel = tmp1; 5649 alu.dst.chan = 3; 5650 alu.dst.write = 1; 5651 5652 alu.src[0].sel = tmp0; 5653 alu.src[0].chan = 3; 5654 if (signed_op) { 5655 alu.src[1].sel = tmp2; 5656 alu.src[1].chan = 1; 5657 } else { 5658 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5659 } 5660 5661 alu.last = 1; 5662 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5663 return r; 5664 5665 } else { /* UDIV */ 5666 5667 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 5668 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5669 alu.op = ALU_OP2_ADD_INT; 5670 5671 alu.dst.sel = tmp1; 5672 alu.dst.chan = 2; 5673 alu.dst.write = 1; 5674 5675 alu.src[0].sel = tmp0; 5676 alu.src[0].chan = 2; 5677 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 5678 5679 alu.last = 1; 5680 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5681 return r; 5682 5683 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 5684 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5685 alu.op = ALU_OP2_ADD_INT; 5686 5687 alu.dst.sel = tmp1; 5688 alu.dst.chan = 3; 5689 alu.dst.write = 1; 5690 5691 alu.src[0].sel = tmp0; 5692 alu.src[0].chan = 2; 5693 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 5694 5695 alu.last = 1; 5696 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5697 return r; 5698 5699 } 5700 5701 /* 17. tmp1.x = tmp1.x & tmp1.y */ 5702 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5703 alu.op = ALU_OP2_AND_INT; 5704 5705 alu.dst.sel = tmp1; 5706 alu.dst.chan = 0; 5707 alu.dst.write = 1; 5708 5709 alu.src[0].sel = tmp1; 5710 alu.src[0].chan = 0; 5711 alu.src[1].sel = tmp1; 5712 alu.src[1].chan = 1; 5713 5714 alu.last = 1; 5715 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5716 return r; 5717 5718 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 5719 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 5720 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5721 alu.op = ALU_OP3_CNDE_INT; 5722 alu.is_op3 = 1; 5723 5724 alu.dst.sel = tmp0; 5725 alu.dst.chan = 2; 5726 alu.dst.write = 1; 5727 5728 alu.src[0].sel = tmp1; 5729 alu.src[0].chan = 0; 5730 alu.src[1].sel = tmp0; 5731 alu.src[1].chan = mod ? 3 : 2; 5732 alu.src[2].sel = tmp1; 5733 alu.src[2].chan = 2; 5734 5735 alu.last = 1; 5736 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5737 return r; 5738 5739 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 5740 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5741 alu.op = ALU_OP3_CNDE_INT; 5742 alu.is_op3 = 1; 5743 5744 if (signed_op) { 5745 alu.dst.sel = tmp0; 5746 alu.dst.chan = 2; 5747 alu.dst.write = 1; 5748 } else { 5749 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5750 } 5751 5752 alu.src[0].sel = tmp1; 5753 alu.src[0].chan = 1; 5754 alu.src[1].sel = tmp1; 5755 alu.src[1].chan = 3; 5756 alu.src[2].sel = tmp0; 5757 alu.src[2].chan = 2; 5758 5759 alu.last = 1; 5760 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5761 return r; 5762 5763 if (signed_op) { 5764 5765 /* fix the sign of the result */ 5766 5767 if (mod) { 5768 5769 /* tmp0.x = -tmp0.z */ 5770 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5771 alu.op = ALU_OP2_SUB_INT; 5772 5773 alu.dst.sel = tmp0; 5774 alu.dst.chan = 0; 5775 alu.dst.write = 1; 5776 5777 alu.src[0].sel = V_SQ_ALU_SRC_0; 5778 alu.src[1].sel = tmp0; 5779 alu.src[1].chan = 2; 5780 5781 alu.last = 1; 5782 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5783 return r; 5784 5785 /* sign of the remainder is the same as the sign of src0 */ 5786 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 5787 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5788 alu.op = ALU_OP3_CNDGE_INT; 5789 alu.is_op3 = 1; 5790 5791 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5792 5793 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5794 alu.src[1].sel = tmp0; 5795 alu.src[1].chan = 2; 5796 alu.src[2].sel = tmp0; 5797 alu.src[2].chan = 0; 5798 5799 alu.last = 1; 5800 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5801 return r; 5802 5803 } else { 5804 5805 /* tmp0.x = -tmp0.z */ 5806 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5807 alu.op = ALU_OP2_SUB_INT; 5808 5809 alu.dst.sel = tmp0; 5810 alu.dst.chan = 0; 5811 alu.dst.write = 1; 5812 5813 alu.src[0].sel = V_SQ_ALU_SRC_0; 5814 alu.src[1].sel = tmp0; 5815 alu.src[1].chan = 2; 5816 5817 alu.last = 1; 5818 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5819 return r; 5820 5821 /* fix the quotient sign (same as the sign of src0*src1) */ 5822 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 5823 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5824 alu.op = ALU_OP3_CNDGE_INT; 5825 alu.is_op3 = 1; 5826 5827 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5828 5829 alu.src[0].sel = tmp2; 5830 alu.src[0].chan = 2; 5831 alu.src[1].sel = tmp0; 5832 alu.src[1].chan = 2; 5833 alu.src[2].sel = tmp0; 5834 alu.src[2].chan = 0; 5835 5836 alu.last = 1; 5837 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5838 return r; 5839 } 5840 } 5841 } 5842 return 0; 5843} 5844 5845static int tgsi_udiv(struct r600_shader_ctx *ctx) 5846{ 5847 return tgsi_divmod(ctx, 0, 0); 5848} 5849 5850static int tgsi_umod(struct r600_shader_ctx *ctx) 5851{ 5852 return tgsi_divmod(ctx, 1, 0); 5853} 5854 5855static int tgsi_idiv(struct r600_shader_ctx *ctx) 5856{ 5857 return tgsi_divmod(ctx, 0, 1); 5858} 5859 5860static int tgsi_imod(struct r600_shader_ctx *ctx) 5861{ 5862 return tgsi_divmod(ctx, 1, 1); 5863} 5864 5865 5866static int tgsi_f2i(struct r600_shader_ctx *ctx) 5867{ 5868 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5869 struct r600_bytecode_alu alu; 5870 int i, r; 5871 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5872 int last_inst = tgsi_last_instruction(write_mask); 5873 5874 for (i = 0; i < 4; i++) { 5875 if (!(write_mask & (1<<i))) 5876 continue; 5877 5878 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5879 alu.op = ALU_OP1_TRUNC; 5880 5881 alu.dst.sel = ctx->temp_reg; 5882 alu.dst.chan = i; 5883 alu.dst.write = 1; 5884 5885 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5886 if (i == last_inst) 5887 alu.last = 1; 5888 r = r600_bytecode_add_alu(ctx->bc, &alu); 5889 if (r) 5890 return r; 5891 } 5892 5893 for (i = 0; i < 4; i++) { 5894 if (!(write_mask & (1<<i))) 5895 continue; 5896 5897 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5898 alu.op = ctx->inst_info->op; 5899 5900 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5901 5902 alu.src[0].sel = ctx->temp_reg; 5903 alu.src[0].chan = i; 5904 5905 if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT) 5906 alu.last = 1; 5907 r = r600_bytecode_add_alu(ctx->bc, &alu); 5908 if (r) 5909 return r; 5910 } 5911 5912 return 0; 5913} 5914 5915static int tgsi_iabs(struct r600_shader_ctx *ctx) 5916{ 5917 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5918 struct r600_bytecode_alu alu; 5919 int i, r; 5920 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5921 int last_inst = tgsi_last_instruction(write_mask); 5922 5923 /* tmp = -src */ 5924 for (i = 0; i < 4; i++) { 5925 if (!(write_mask & (1<<i))) 5926 continue; 5927 5928 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5929 alu.op = ALU_OP2_SUB_INT; 5930 5931 alu.dst.sel = ctx->temp_reg; 5932 alu.dst.chan = i; 5933 alu.dst.write = 1; 5934 5935 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5936 alu.src[0].sel = V_SQ_ALU_SRC_0; 5937 5938 if (i == last_inst) 5939 alu.last = 1; 5940 r = r600_bytecode_add_alu(ctx->bc, &alu); 5941 if (r) 5942 return r; 5943 } 5944 5945 /* dst = (src >= 0 ? src : tmp) */ 5946 for (i = 0; i < 4; i++) { 5947 if (!(write_mask & (1<<i))) 5948 continue; 5949 5950 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5951 alu.op = ALU_OP3_CNDGE_INT; 5952 alu.is_op3 = 1; 5953 alu.dst.write = 1; 5954 5955 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5956 5957 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5958 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5959 alu.src[2].sel = ctx->temp_reg; 5960 alu.src[2].chan = i; 5961 5962 if (i == last_inst) 5963 alu.last = 1; 5964 r = r600_bytecode_add_alu(ctx->bc, &alu); 5965 if (r) 5966 return r; 5967 } 5968 return 0; 5969} 5970 5971static int tgsi_issg(struct r600_shader_ctx *ctx) 5972{ 5973 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5974 struct r600_bytecode_alu alu; 5975 int i, r; 5976 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5977 int last_inst = tgsi_last_instruction(write_mask); 5978 5979 /* tmp = (src >= 0 ? src : -1) */ 5980 for (i = 0; i < 4; i++) { 5981 if (!(write_mask & (1<<i))) 5982 continue; 5983 5984 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5985 alu.op = ALU_OP3_CNDGE_INT; 5986 alu.is_op3 = 1; 5987 5988 alu.dst.sel = ctx->temp_reg; 5989 alu.dst.chan = i; 5990 alu.dst.write = 1; 5991 5992 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5993 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5994 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 5995 5996 if (i == last_inst) 5997 alu.last = 1; 5998 r = r600_bytecode_add_alu(ctx->bc, &alu); 5999 if (r) 6000 return r; 6001 } 6002 6003 /* dst = (tmp > 0 ? 1 : tmp) */ 6004 for (i = 0; i < 4; i++) { 6005 if (!(write_mask & (1<<i))) 6006 continue; 6007 6008 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6009 alu.op = ALU_OP3_CNDGT_INT; 6010 alu.is_op3 = 1; 6011 alu.dst.write = 1; 6012 6013 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6014 6015 alu.src[0].sel = ctx->temp_reg; 6016 alu.src[0].chan = i; 6017 6018 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 6019 6020 alu.src[2].sel = ctx->temp_reg; 6021 alu.src[2].chan = i; 6022 6023 if (i == last_inst) 6024 alu.last = 1; 6025 r = r600_bytecode_add_alu(ctx->bc, &alu); 6026 if (r) 6027 return r; 6028 } 6029 return 0; 6030} 6031 6032 6033 6034static int tgsi_ssg(struct r600_shader_ctx *ctx) 6035{ 6036 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6037 struct r600_bytecode_alu alu; 6038 int i, r; 6039 6040 /* tmp = (src > 0 ? 1 : src) */ 6041 for (i = 0; i < 4; i++) { 6042 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6043 alu.op = ALU_OP3_CNDGT; 6044 alu.is_op3 = 1; 6045 6046 alu.dst.sel = ctx->temp_reg; 6047 alu.dst.chan = i; 6048 6049 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6050 alu.src[1].sel = V_SQ_ALU_SRC_1; 6051 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 6052 6053 if (i == 3) 6054 alu.last = 1; 6055 r = r600_bytecode_add_alu(ctx->bc, &alu); 6056 if (r) 6057 return r; 6058 } 6059 6060 /* dst = (-tmp > 0 ? -1 : tmp) */ 6061 for (i = 0; i < 4; i++) { 6062 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6063 alu.op = ALU_OP3_CNDGT; 6064 alu.is_op3 = 1; 6065 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6066 6067 alu.src[0].sel = ctx->temp_reg; 6068 alu.src[0].chan = i; 6069 alu.src[0].neg = 1; 6070 6071 alu.src[1].sel = V_SQ_ALU_SRC_1; 6072 alu.src[1].neg = 1; 6073 6074 alu.src[2].sel = ctx->temp_reg; 6075 alu.src[2].chan = i; 6076 6077 if (i == 3) 6078 alu.last = 1; 6079 r = r600_bytecode_add_alu(ctx->bc, &alu); 6080 if (r) 6081 return r; 6082 } 6083 return 0; 6084} 6085 6086static int tgsi_bfi(struct r600_shader_ctx *ctx) 6087{ 6088 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6089 struct r600_bytecode_alu alu; 6090 int i, r, t1, t2; 6091 6092 unsigned write_mask = inst->Dst[0].Register.WriteMask; 6093 int last_inst = tgsi_last_instruction(write_mask); 6094 6095 t1 = ctx->temp_reg; 6096 6097 for (i = 0; i < 4; i++) { 6098 if (!(write_mask & (1<<i))) 6099 continue; 6100 6101 /* create mask tmp */ 6102 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6103 alu.op = ALU_OP2_BFM_INT; 6104 alu.dst.sel = t1; 6105 alu.dst.chan = i; 6106 alu.dst.write = 1; 6107 alu.last = i == last_inst; 6108 6109 r600_bytecode_src(&alu.src[0], &ctx->src[3], i); 6110 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6111 6112 r = r600_bytecode_add_alu(ctx->bc, &alu); 6113 if (r) 6114 return r; 6115 } 6116 6117 t2 = r600_get_temp(ctx); 6118 6119 for (i = 0; i < 4; i++) { 6120 if (!(write_mask & (1<<i))) 6121 continue; 6122 6123 /* shift insert left */ 6124 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6125 alu.op = ALU_OP2_LSHL_INT; 6126 alu.dst.sel = t2; 6127 alu.dst.chan = i; 6128 alu.dst.write = 1; 6129 alu.last = i == last_inst; 6130 6131 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 6132 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6133 6134 r = r600_bytecode_add_alu(ctx->bc, &alu); 6135 if (r) 6136 return r; 6137 } 6138 6139 for (i = 0; i < 4; i++) { 6140 if (!(write_mask & (1<<i))) 6141 continue; 6142 6143 /* actual bitfield insert */ 6144 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6145 alu.op = ALU_OP3_BFI_INT; 6146 alu.is_op3 = 1; 6147 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6148 alu.dst.chan = i; 6149 alu.dst.write = 1; 6150 alu.last = i == last_inst; 6151 6152 alu.src[0].sel = t1; 6153 alu.src[0].chan = i; 6154 alu.src[1].sel = t2; 6155 alu.src[1].chan = i; 6156 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 6157 6158 r = r600_bytecode_add_alu(ctx->bc, &alu); 6159 if (r) 6160 return r; 6161 } 6162 6163 return 0; 6164} 6165 6166static int tgsi_msb(struct r600_shader_ctx *ctx) 6167{ 6168 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6169 struct r600_bytecode_alu alu; 6170 int i, r, t1, t2; 6171 6172 unsigned write_mask = inst->Dst[0].Register.WriteMask; 6173 int last_inst = tgsi_last_instruction(write_mask); 6174 6175 assert(ctx->inst_info->op == ALU_OP1_FFBH_INT || 6176 ctx->inst_info->op == ALU_OP1_FFBH_UINT); 6177 6178 t1 = ctx->temp_reg; 6179 6180 /* bit position is indexed from lsb by TGSI, and from msb by the hardware */ 6181 for (i = 0; i < 4; i++) { 6182 if (!(write_mask & (1<<i))) 6183 continue; 6184 6185 /* t1 = FFBH_INT / FFBH_UINT */ 6186 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6187 alu.op = ctx->inst_info->op; 6188 alu.dst.sel = t1; 6189 alu.dst.chan = i; 6190 alu.dst.write = 1; 6191 alu.last = i == last_inst; 6192 6193 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6194 6195 r = r600_bytecode_add_alu(ctx->bc, &alu); 6196 if (r) 6197 return r; 6198 } 6199 6200 t2 = r600_get_temp(ctx); 6201 6202 for (i = 0; i < 4; i++) { 6203 if (!(write_mask & (1<<i))) 6204 continue; 6205 6206 /* t2 = 31 - t1 */ 6207 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6208 alu.op = ALU_OP2_SUB_INT; 6209 alu.dst.sel = t2; 6210 alu.dst.chan = i; 6211 alu.dst.write = 1; 6212 alu.last = i == last_inst; 6213 6214 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 6215 alu.src[0].value = 31; 6216 alu.src[1].sel = t1; 6217 alu.src[1].chan = i; 6218 6219 r = r600_bytecode_add_alu(ctx->bc, &alu); 6220 if (r) 6221 return r; 6222 } 6223 6224 for (i = 0; i < 4; i++) { 6225 if (!(write_mask & (1<<i))) 6226 continue; 6227 6228 /* result = t1 >= 0 ? t2 : t1 */ 6229 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6230 alu.op = ALU_OP3_CNDGE_INT; 6231 alu.is_op3 = 1; 6232 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6233 alu.dst.chan = i; 6234 alu.dst.write = 1; 6235 alu.last = i == last_inst; 6236 6237 alu.src[0].sel = t1; 6238 alu.src[0].chan = i; 6239 alu.src[1].sel = t2; 6240 alu.src[1].chan = i; 6241 alu.src[2].sel = t1; 6242 alu.src[2].chan = i; 6243 6244 r = r600_bytecode_add_alu(ctx->bc, &alu); 6245 if (r) 6246 return r; 6247 } 6248 6249 return 0; 6250} 6251 6252static int tgsi_interp_egcm(struct r600_shader_ctx *ctx) 6253{ 6254 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6255 struct r600_bytecode_alu alu; 6256 int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti; 6257 unsigned location; 6258 int input; 6259 6260 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); 6261 6262 input = inst->Src[0].Register.Index; 6263 6264 /* Interpolators have been marked for use already by allocate_system_value_inputs */ 6265 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6266 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6267 location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */ 6268 } 6269 else { 6270 location = TGSI_INTERPOLATE_LOC_CENTROID; 6271 } 6272 6273 k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location); 6274 if (k < 0) 6275 k = 0; 6276 interp_gpr = ctx->eg_interpolators[k].ij_index / 2; 6277 interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2); 6278 6279 /* NOTE: currently offset is not perspective correct */ 6280 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6281 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6282 int sample_gpr = -1; 6283 int gradientsH, gradientsV; 6284 struct r600_bytecode_tex tex; 6285 6286 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6287 sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]); 6288 } 6289 6290 gradientsH = r600_get_temp(ctx); 6291 gradientsV = r600_get_temp(ctx); 6292 for (i = 0; i < 2; i++) { 6293 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6294 tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V; 6295 tex.src_gpr = interp_gpr; 6296 tex.src_sel_x = interp_base_chan + 0; 6297 tex.src_sel_y = interp_base_chan + 1; 6298 tex.src_sel_z = 0; 6299 tex.src_sel_w = 0; 6300 tex.dst_gpr = i == 0 ? gradientsH : gradientsV; 6301 tex.dst_sel_x = 0; 6302 tex.dst_sel_y = 1; 6303 tex.dst_sel_z = 7; 6304 tex.dst_sel_w = 7; 6305 tex.inst_mod = 1; // Use per pixel gradient calculation 6306 tex.sampler_id = 0; 6307 tex.resource_id = tex.sampler_id; 6308 r = r600_bytecode_add_tex(ctx->bc, &tex); 6309 if (r) 6310 return r; 6311 } 6312 6313 for (i = 0; i < 2; i++) { 6314 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6315 alu.op = ALU_OP3_MULADD; 6316 alu.is_op3 = 1; 6317 alu.src[0].sel = gradientsH; 6318 alu.src[0].chan = i; 6319 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6320 alu.src[1].sel = sample_gpr; 6321 alu.src[1].chan = 2; 6322 } 6323 else { 6324 r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 6325 } 6326 alu.src[2].sel = interp_gpr; 6327 alu.src[2].chan = interp_base_chan + i; 6328 alu.dst.sel = ctx->temp_reg; 6329 alu.dst.chan = i; 6330 alu.last = i == 1; 6331 6332 r = r600_bytecode_add_alu(ctx->bc, &alu); 6333 if (r) 6334 return r; 6335 } 6336 6337 for (i = 0; i < 2; i++) { 6338 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6339 alu.op = ALU_OP3_MULADD; 6340 alu.is_op3 = 1; 6341 alu.src[0].sel = gradientsV; 6342 alu.src[0].chan = i; 6343 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6344 alu.src[1].sel = sample_gpr; 6345 alu.src[1].chan = 3; 6346 } 6347 else { 6348 r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 6349 } 6350 alu.src[2].sel = ctx->temp_reg; 6351 alu.src[2].chan = i; 6352 alu.dst.sel = ctx->temp_reg; 6353 alu.dst.chan = i; 6354 alu.last = i == 1; 6355 6356 r = r600_bytecode_add_alu(ctx->bc, &alu); 6357 if (r) 6358 return r; 6359 } 6360 } 6361 6362 tmp = r600_get_temp(ctx); 6363 for (i = 0; i < 8; i++) { 6364 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6365 alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY; 6366 6367 alu.dst.sel = tmp; 6368 if ((i > 1 && i < 6)) { 6369 alu.dst.write = 1; 6370 } 6371 else { 6372 alu.dst.write = 0; 6373 } 6374 alu.dst.chan = i % 4; 6375 6376 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6377 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6378 alu.src[0].sel = ctx->temp_reg; 6379 alu.src[0].chan = 1 - (i % 2); 6380 } else { 6381 alu.src[0].sel = interp_gpr; 6382 alu.src[0].chan = interp_base_chan + 1 - (i % 2); 6383 } 6384 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 6385 alu.src[1].chan = 0; 6386 6387 alu.last = i % 4 == 3; 6388 alu.bank_swizzle_force = SQ_ALU_VEC_210; 6389 6390 r = r600_bytecode_add_alu(ctx->bc, &alu); 6391 if (r) 6392 return r; 6393 } 6394 6395 // INTERP can't swizzle dst 6396 lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6397 for (i = 0; i <= lasti; i++) { 6398 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6399 continue; 6400 6401 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6402 alu.op = ALU_OP1_MOV; 6403 alu.src[0].sel = tmp; 6404 alu.src[0].chan = ctx->src[0].swizzle[i]; 6405 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6406 alu.dst.write = 1; 6407 alu.last = i == lasti; 6408 r = r600_bytecode_add_alu(ctx->bc, &alu); 6409 if (r) 6410 return r; 6411 } 6412 6413 return 0; 6414} 6415 6416 6417static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 6418{ 6419 struct r600_bytecode_alu alu; 6420 int i, r; 6421 6422 for (i = 0; i < 4; i++) { 6423 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6424 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 6425 alu.op = ALU_OP0_NOP; 6426 alu.dst.chan = i; 6427 } else { 6428 alu.op = ALU_OP1_MOV; 6429 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6430 alu.src[0].sel = ctx->temp_reg; 6431 alu.src[0].chan = i; 6432 } 6433 if (i == 3) { 6434 alu.last = 1; 6435 } 6436 r = r600_bytecode_add_alu(ctx->bc, &alu); 6437 if (r) 6438 return r; 6439 } 6440 return 0; 6441} 6442 6443static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, 6444 unsigned temp, int chan, 6445 struct r600_bytecode_alu_src *bc_src, 6446 const struct r600_shader_src *shader_src) 6447{ 6448 struct r600_bytecode_alu alu; 6449 int r; 6450 6451 r600_bytecode_src(bc_src, shader_src, chan); 6452 6453 /* op3 operands don't support abs modifier */ 6454 if (bc_src->abs) { 6455 assert(temp!=0); /* we actually need the extra register, make sure it is allocated. */ 6456 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6457 alu.op = ALU_OP1_MOV; 6458 alu.dst.sel = temp; 6459 alu.dst.chan = chan; 6460 alu.dst.write = 1; 6461 6462 alu.src[0] = *bc_src; 6463 alu.last = true; // sufficient? 6464 r = r600_bytecode_add_alu(ctx->bc, &alu); 6465 if (r) 6466 return r; 6467 6468 memset(bc_src, 0, sizeof(*bc_src)); 6469 bc_src->sel = temp; 6470 bc_src->chan = chan; 6471 } 6472 return 0; 6473} 6474 6475static int tgsi_op3(struct r600_shader_ctx *ctx) 6476{ 6477 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6478 struct r600_bytecode_alu alu; 6479 int i, j, r; 6480 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6481 int temp_regs[4]; 6482 6483 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6484 temp_regs[j] = 0; 6485 if (ctx->src[j].abs) 6486 temp_regs[j] = r600_get_temp(ctx); 6487 } 6488 for (i = 0; i < lasti + 1; i++) { 6489 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6490 continue; 6491 6492 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6493 alu.op = ctx->inst_info->op; 6494 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6495 r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]); 6496 if (r) 6497 return r; 6498 } 6499 6500 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6501 alu.dst.chan = i; 6502 alu.dst.write = 1; 6503 alu.is_op3 = 1; 6504 if (i == lasti) { 6505 alu.last = 1; 6506 } 6507 r = r600_bytecode_add_alu(ctx->bc, &alu); 6508 if (r) 6509 return r; 6510 } 6511 return 0; 6512} 6513 6514static int tgsi_dp(struct r600_shader_ctx *ctx) 6515{ 6516 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6517 struct r600_bytecode_alu alu; 6518 int i, j, r; 6519 6520 for (i = 0; i < 4; i++) { 6521 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6522 alu.op = ctx->inst_info->op; 6523 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6524 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 6525 } 6526 6527 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6528 alu.dst.chan = i; 6529 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 6530 /* handle some special cases */ 6531 switch (inst->Instruction.Opcode) { 6532 case TGSI_OPCODE_DP2: 6533 if (i > 1) { 6534 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 6535 alu.src[0].chan = alu.src[1].chan = 0; 6536 } 6537 break; 6538 case TGSI_OPCODE_DP3: 6539 if (i > 2) { 6540 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 6541 alu.src[0].chan = alu.src[1].chan = 0; 6542 } 6543 break; 6544 case TGSI_OPCODE_DPH: 6545 if (i == 3) { 6546 alu.src[0].sel = V_SQ_ALU_SRC_1; 6547 alu.src[0].chan = 0; 6548 alu.src[0].neg = 0; 6549 } 6550 break; 6551 default: 6552 break; 6553 } 6554 if (i == 3) { 6555 alu.last = 1; 6556 } 6557 r = r600_bytecode_add_alu(ctx->bc, &alu); 6558 if (r) 6559 return r; 6560 } 6561 return 0; 6562} 6563 6564static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 6565 unsigned index) 6566{ 6567 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6568 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 6569 inst->Src[index].Register.File != TGSI_FILE_INPUT && 6570 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 6571 ctx->src[index].neg || ctx->src[index].abs || 6572 (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == PIPE_SHADER_GEOMETRY); 6573} 6574 6575static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 6576 unsigned index) 6577{ 6578 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6579 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 6580} 6581 6582static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) 6583{ 6584 struct r600_bytecode_vtx vtx; 6585 struct r600_bytecode_alu alu; 6586 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6587 int src_gpr, r, i; 6588 int id = tgsi_tex_get_src_gpr(ctx, 1); 6589 6590 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 6591 if (src_requires_loading) { 6592 for (i = 0; i < 4; i++) { 6593 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6594 alu.op = ALU_OP1_MOV; 6595 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6596 alu.dst.sel = ctx->temp_reg; 6597 alu.dst.chan = i; 6598 if (i == 3) 6599 alu.last = 1; 6600 alu.dst.write = 1; 6601 r = r600_bytecode_add_alu(ctx->bc, &alu); 6602 if (r) 6603 return r; 6604 } 6605 src_gpr = ctx->temp_reg; 6606 } 6607 6608 memset(&vtx, 0, sizeof(vtx)); 6609 vtx.op = FETCH_OP_VFETCH; 6610 vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; 6611 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 6612 vtx.src_gpr = src_gpr; 6613 vtx.mega_fetch_count = 16; 6614 vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 6615 vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 6616 vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 6617 vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 6618 vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 6619 vtx.use_const_fields = 1; 6620 6621 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 6622 return r; 6623 6624 if (ctx->bc->chip_class >= EVERGREEN) 6625 return 0; 6626 6627 for (i = 0; i < 4; i++) { 6628 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6629 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6630 continue; 6631 6632 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6633 alu.op = ALU_OP2_AND_INT; 6634 6635 alu.dst.chan = i; 6636 alu.dst.sel = vtx.dst_gpr; 6637 alu.dst.write = 1; 6638 6639 alu.src[0].sel = vtx.dst_gpr; 6640 alu.src[0].chan = i; 6641 6642 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL; 6643 alu.src[1].sel += (id * 2); 6644 alu.src[1].chan = i % 4; 6645 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6646 6647 if (i == lasti) 6648 alu.last = 1; 6649 r = r600_bytecode_add_alu(ctx->bc, &alu); 6650 if (r) 6651 return r; 6652 } 6653 6654 if (inst->Dst[0].Register.WriteMask & 3) { 6655 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6656 alu.op = ALU_OP2_OR_INT; 6657 6658 alu.dst.chan = 3; 6659 alu.dst.sel = vtx.dst_gpr; 6660 alu.dst.write = 1; 6661 6662 alu.src[0].sel = vtx.dst_gpr; 6663 alu.src[0].chan = 3; 6664 6665 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1; 6666 alu.src[1].chan = 0; 6667 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6668 6669 alu.last = 1; 6670 r = r600_bytecode_add_alu(ctx->bc, &alu); 6671 if (r) 6672 return r; 6673 } 6674 return 0; 6675} 6676 6677static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) 6678{ 6679 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6680 struct r600_bytecode_alu alu; 6681 int r; 6682 int id = tgsi_tex_get_src_gpr(ctx, 1); 6683 6684 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6685 alu.op = ALU_OP1_MOV; 6686 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 6687 if (ctx->bc->chip_class >= EVERGREEN) { 6688 /* channel 0 or 2 of each word */ 6689 alu.src[0].sel += (id / 2); 6690 alu.src[0].chan = (id % 2) * 2; 6691 } else { 6692 /* r600 we have them at channel 2 of the second dword */ 6693 alu.src[0].sel += (id * 2) + 1; 6694 alu.src[0].chan = 1; 6695 } 6696 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6697 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 6698 alu.last = 1; 6699 r = r600_bytecode_add_alu(ctx->bc, &alu); 6700 if (r) 6701 return r; 6702 return 0; 6703} 6704 6705static int tgsi_tex(struct r600_shader_ctx *ctx) 6706{ 6707 static float one_point_five = 1.5f; 6708 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6709 struct r600_bytecode_tex tex; 6710 struct r600_bytecode_alu alu; 6711 unsigned src_gpr; 6712 int r, i, j; 6713 int opcode; 6714 bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing && 6715 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 6716 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || 6717 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); 6718 6719 bool txf_add_offsets = inst->Texture.NumOffsets && 6720 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 6721 inst->Texture.Texture != TGSI_TEXTURE_BUFFER; 6722 6723 /* Texture fetch instructions can only use gprs as source. 6724 * Also they cannot negate the source or take the absolute value */ 6725 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && 6726 inst->Instruction.Opcode != TGSI_OPCODE_TXQS && 6727 tgsi_tex_src_requires_loading(ctx, 0)) || 6728 read_compressed_msaa || txf_add_offsets; 6729 6730 boolean src_loaded = FALSE; 6731 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; 6732 int8_t offset_x = 0, offset_y = 0, offset_z = 0; 6733 boolean has_txq_cube_array_z = false; 6734 unsigned sampler_index_mode; 6735 6736 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && 6737 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6738 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) 6739 if (inst->Dst[0].Register.WriteMask & 4) { 6740 ctx->shader->has_txq_cube_array_z_comp = true; 6741 has_txq_cube_array_z = true; 6742 } 6743 6744 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 6745 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 6746 inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || 6747 inst->Instruction.Opcode == TGSI_OPCODE_TG4) 6748 sampler_src_reg = 2; 6749 6750 /* TGSI moves the sampler to src reg 3 for TXD */ 6751 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) 6752 sampler_src_reg = 3; 6753 6754 sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 6755 6756 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 6757 6758 if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { 6759 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { 6760 ctx->shader->uses_tex_buffers = true; 6761 return r600_do_buffer_txq(ctx); 6762 } 6763 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 6764 if (ctx->bc->chip_class < EVERGREEN) 6765 ctx->shader->uses_tex_buffers = true; 6766 return do_vtx_fetch_inst(ctx, src_requires_loading); 6767 } 6768 } 6769 6770 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 6771 int out_chan; 6772 /* Add perspective divide */ 6773 if (ctx->bc->chip_class == CAYMAN) { 6774 out_chan = 2; 6775 for (i = 0; i < 3; i++) { 6776 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6777 alu.op = ALU_OP1_RECIP_IEEE; 6778 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6779 6780 alu.dst.sel = ctx->temp_reg; 6781 alu.dst.chan = i; 6782 if (i == 2) 6783 alu.last = 1; 6784 if (out_chan == i) 6785 alu.dst.write = 1; 6786 r = r600_bytecode_add_alu(ctx->bc, &alu); 6787 if (r) 6788 return r; 6789 } 6790 6791 } else { 6792 out_chan = 3; 6793 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6794 alu.op = ALU_OP1_RECIP_IEEE; 6795 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6796 6797 alu.dst.sel = ctx->temp_reg; 6798 alu.dst.chan = out_chan; 6799 alu.last = 1; 6800 alu.dst.write = 1; 6801 r = r600_bytecode_add_alu(ctx->bc, &alu); 6802 if (r) 6803 return r; 6804 } 6805 6806 for (i = 0; i < 3; i++) { 6807 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6808 alu.op = ALU_OP2_MUL; 6809 alu.src[0].sel = ctx->temp_reg; 6810 alu.src[0].chan = out_chan; 6811 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6812 alu.dst.sel = ctx->temp_reg; 6813 alu.dst.chan = i; 6814 alu.dst.write = 1; 6815 r = r600_bytecode_add_alu(ctx->bc, &alu); 6816 if (r) 6817 return r; 6818 } 6819 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6820 alu.op = ALU_OP1_MOV; 6821 alu.src[0].sel = V_SQ_ALU_SRC_1; 6822 alu.src[0].chan = 0; 6823 alu.dst.sel = ctx->temp_reg; 6824 alu.dst.chan = 3; 6825 alu.last = 1; 6826 alu.dst.write = 1; 6827 r = r600_bytecode_add_alu(ctx->bc, &alu); 6828 if (r) 6829 return r; 6830 src_loaded = TRUE; 6831 src_gpr = ctx->temp_reg; 6832 } 6833 6834 6835 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 6836 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6837 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6838 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 6839 inst->Instruction.Opcode != TGSI_OPCODE_TXQ && 6840 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { 6841 6842 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 6843 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 6844 6845 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 6846 for (i = 0; i < 4; i++) { 6847 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6848 alu.op = ALU_OP2_CUBE; 6849 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 6850 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 6851 alu.dst.sel = ctx->temp_reg; 6852 alu.dst.chan = i; 6853 if (i == 3) 6854 alu.last = 1; 6855 alu.dst.write = 1; 6856 r = r600_bytecode_add_alu(ctx->bc, &alu); 6857 if (r) 6858 return r; 6859 } 6860 6861 /* tmp1.z = RCP_e(|tmp1.z|) */ 6862 if (ctx->bc->chip_class == CAYMAN) { 6863 for (i = 0; i < 3; i++) { 6864 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6865 alu.op = ALU_OP1_RECIP_IEEE; 6866 alu.src[0].sel = ctx->temp_reg; 6867 alu.src[0].chan = 2; 6868 alu.src[0].abs = 1; 6869 alu.dst.sel = ctx->temp_reg; 6870 alu.dst.chan = i; 6871 if (i == 2) 6872 alu.dst.write = 1; 6873 if (i == 2) 6874 alu.last = 1; 6875 r = r600_bytecode_add_alu(ctx->bc, &alu); 6876 if (r) 6877 return r; 6878 } 6879 } else { 6880 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6881 alu.op = ALU_OP1_RECIP_IEEE; 6882 alu.src[0].sel = ctx->temp_reg; 6883 alu.src[0].chan = 2; 6884 alu.src[0].abs = 1; 6885 alu.dst.sel = ctx->temp_reg; 6886 alu.dst.chan = 2; 6887 alu.dst.write = 1; 6888 alu.last = 1; 6889 r = r600_bytecode_add_alu(ctx->bc, &alu); 6890 if (r) 6891 return r; 6892 } 6893 6894 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 6895 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 6896 * muladd has no writemask, have to use another temp 6897 */ 6898 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6899 alu.op = ALU_OP3_MULADD; 6900 alu.is_op3 = 1; 6901 6902 alu.src[0].sel = ctx->temp_reg; 6903 alu.src[0].chan = 0; 6904 alu.src[1].sel = ctx->temp_reg; 6905 alu.src[1].chan = 2; 6906 6907 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 6908 alu.src[2].chan = 0; 6909 alu.src[2].value = *(uint32_t *)&one_point_five; 6910 6911 alu.dst.sel = ctx->temp_reg; 6912 alu.dst.chan = 0; 6913 alu.dst.write = 1; 6914 6915 r = r600_bytecode_add_alu(ctx->bc, &alu); 6916 if (r) 6917 return r; 6918 6919 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6920 alu.op = ALU_OP3_MULADD; 6921 alu.is_op3 = 1; 6922 6923 alu.src[0].sel = ctx->temp_reg; 6924 alu.src[0].chan = 1; 6925 alu.src[1].sel = ctx->temp_reg; 6926 alu.src[1].chan = 2; 6927 6928 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 6929 alu.src[2].chan = 0; 6930 alu.src[2].value = *(uint32_t *)&one_point_five; 6931 6932 alu.dst.sel = ctx->temp_reg; 6933 alu.dst.chan = 1; 6934 alu.dst.write = 1; 6935 6936 alu.last = 1; 6937 r = r600_bytecode_add_alu(ctx->bc, &alu); 6938 if (r) 6939 return r; 6940 /* write initial compare value into Z component 6941 - W src 0 for shadow cube 6942 - X src 1 for shadow cube array */ 6943 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6944 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6945 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6946 alu.op = ALU_OP1_MOV; 6947 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) 6948 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 6949 else 6950 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6951 alu.dst.sel = ctx->temp_reg; 6952 alu.dst.chan = 2; 6953 alu.dst.write = 1; 6954 alu.last = 1; 6955 r = r600_bytecode_add_alu(ctx->bc, &alu); 6956 if (r) 6957 return r; 6958 } 6959 6960 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6961 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6962 if (ctx->bc->chip_class >= EVERGREEN) { 6963 int mytmp = r600_get_temp(ctx); 6964 static const float eight = 8.0f; 6965 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6966 alu.op = ALU_OP1_MOV; 6967 alu.src[0].sel = ctx->temp_reg; 6968 alu.src[0].chan = 3; 6969 alu.dst.sel = mytmp; 6970 alu.dst.chan = 0; 6971 alu.dst.write = 1; 6972 alu.last = 1; 6973 r = r600_bytecode_add_alu(ctx->bc, &alu); 6974 if (r) 6975 return r; 6976 6977 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ 6978 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6979 alu.op = ALU_OP3_MULADD; 6980 alu.is_op3 = 1; 6981 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6982 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6983 alu.src[1].chan = 0; 6984 alu.src[1].value = *(uint32_t *)&eight; 6985 alu.src[2].sel = mytmp; 6986 alu.src[2].chan = 0; 6987 alu.dst.sel = ctx->temp_reg; 6988 alu.dst.chan = 3; 6989 alu.dst.write = 1; 6990 alu.last = 1; 6991 r = r600_bytecode_add_alu(ctx->bc, &alu); 6992 if (r) 6993 return r; 6994 } else if (ctx->bc->chip_class < EVERGREEN) { 6995 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6996 tex.op = FETCH_OP_SET_CUBEMAP_INDEX; 6997 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6998 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6999 tex.src_gpr = r600_get_temp(ctx); 7000 tex.src_sel_x = 0; 7001 tex.src_sel_y = 0; 7002 tex.src_sel_z = 0; 7003 tex.src_sel_w = 0; 7004 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 7005 tex.coord_type_x = 1; 7006 tex.coord_type_y = 1; 7007 tex.coord_type_z = 1; 7008 tex.coord_type_w = 1; 7009 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7010 alu.op = ALU_OP1_MOV; 7011 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7012 alu.dst.sel = tex.src_gpr; 7013 alu.dst.chan = 0; 7014 alu.last = 1; 7015 alu.dst.write = 1; 7016 r = r600_bytecode_add_alu(ctx->bc, &alu); 7017 if (r) 7018 return r; 7019 7020 r = r600_bytecode_add_tex(ctx->bc, &tex); 7021 if (r) 7022 return r; 7023 } 7024 7025 } 7026 7027 /* for cube forms of lod and bias we need to route things */ 7028 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || 7029 inst->Instruction.Opcode == TGSI_OPCODE_TXL || 7030 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 7031 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { 7032 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7033 alu.op = ALU_OP1_MOV; 7034 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 7035 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 7036 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 7037 else 7038 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7039 alu.dst.sel = ctx->temp_reg; 7040 alu.dst.chan = 2; 7041 alu.last = 1; 7042 alu.dst.write = 1; 7043 r = r600_bytecode_add_alu(ctx->bc, &alu); 7044 if (r) 7045 return r; 7046 } 7047 7048 src_loaded = TRUE; 7049 src_gpr = ctx->temp_reg; 7050 } 7051 7052 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 7053 int temp_h = 0, temp_v = 0; 7054 int start_val = 0; 7055 7056 /* if we've already loaded the src (i.e. CUBE don't reload it). */ 7057 if (src_loaded == TRUE) 7058 start_val = 1; 7059 else 7060 src_loaded = TRUE; 7061 for (i = start_val; i < 3; i++) { 7062 int treg = r600_get_temp(ctx); 7063 7064 if (i == 0) 7065 src_gpr = treg; 7066 else if (i == 1) 7067 temp_h = treg; 7068 else 7069 temp_v = treg; 7070 7071 for (j = 0; j < 4; j++) { 7072 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7073 alu.op = ALU_OP1_MOV; 7074 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 7075 alu.dst.sel = treg; 7076 alu.dst.chan = j; 7077 if (j == 3) 7078 alu.last = 1; 7079 alu.dst.write = 1; 7080 r = r600_bytecode_add_alu(ctx->bc, &alu); 7081 if (r) 7082 return r; 7083 } 7084 } 7085 for (i = 1; i < 3; i++) { 7086 /* set gradients h/v */ 7087 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7088 tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H : 7089 FETCH_OP_SET_GRADIENTS_V; 7090 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7091 tex.sampler_index_mode = sampler_index_mode; 7092 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7093 tex.resource_index_mode = sampler_index_mode; 7094 7095 tex.src_gpr = (i == 1) ? temp_h : temp_v; 7096 tex.src_sel_x = 0; 7097 tex.src_sel_y = 1; 7098 tex.src_sel_z = 2; 7099 tex.src_sel_w = 3; 7100 7101 tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */ 7102 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 7103 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 7104 tex.coord_type_x = 1; 7105 tex.coord_type_y = 1; 7106 tex.coord_type_z = 1; 7107 tex.coord_type_w = 1; 7108 } 7109 r = r600_bytecode_add_tex(ctx->bc, &tex); 7110 if (r) 7111 return r; 7112 } 7113 } 7114 7115 if (src_requires_loading && !src_loaded) { 7116 for (i = 0; i < 4; i++) { 7117 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7118 alu.op = ALU_OP1_MOV; 7119 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7120 alu.dst.sel = ctx->temp_reg; 7121 alu.dst.chan = i; 7122 if (i == 3) 7123 alu.last = 1; 7124 alu.dst.write = 1; 7125 r = r600_bytecode_add_alu(ctx->bc, &alu); 7126 if (r) 7127 return r; 7128 } 7129 src_loaded = TRUE; 7130 src_gpr = ctx->temp_reg; 7131 } 7132 7133 /* get offset values */ 7134 if (inst->Texture.NumOffsets) { 7135 assert(inst->Texture.NumOffsets == 1); 7136 7137 /* The texture offset feature doesn't work with the TXF instruction 7138 * and must be emulated by adding the offset to the texture coordinates. */ 7139 if (txf_add_offsets) { 7140 const struct tgsi_texture_offset *off = inst->TexOffsets; 7141 7142 switch (inst->Texture.Texture) { 7143 case TGSI_TEXTURE_3D: 7144 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7145 alu.op = ALU_OP2_ADD_INT; 7146 alu.src[0].sel = src_gpr; 7147 alu.src[0].chan = 2; 7148 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7149 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ]; 7150 alu.dst.sel = src_gpr; 7151 alu.dst.chan = 2; 7152 alu.dst.write = 1; 7153 alu.last = 1; 7154 r = r600_bytecode_add_alu(ctx->bc, &alu); 7155 if (r) 7156 return r; 7157 /* fall through */ 7158 7159 case TGSI_TEXTURE_2D: 7160 case TGSI_TEXTURE_SHADOW2D: 7161 case TGSI_TEXTURE_RECT: 7162 case TGSI_TEXTURE_SHADOWRECT: 7163 case TGSI_TEXTURE_2D_ARRAY: 7164 case TGSI_TEXTURE_SHADOW2D_ARRAY: 7165 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7166 alu.op = ALU_OP2_ADD_INT; 7167 alu.src[0].sel = src_gpr; 7168 alu.src[0].chan = 1; 7169 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7170 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY]; 7171 alu.dst.sel = src_gpr; 7172 alu.dst.chan = 1; 7173 alu.dst.write = 1; 7174 alu.last = 1; 7175 r = r600_bytecode_add_alu(ctx->bc, &alu); 7176 if (r) 7177 return r; 7178 /* fall through */ 7179 7180 case TGSI_TEXTURE_1D: 7181 case TGSI_TEXTURE_SHADOW1D: 7182 case TGSI_TEXTURE_1D_ARRAY: 7183 case TGSI_TEXTURE_SHADOW1D_ARRAY: 7184 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7185 alu.op = ALU_OP2_ADD_INT; 7186 alu.src[0].sel = src_gpr; 7187 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7188 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX]; 7189 alu.dst.sel = src_gpr; 7190 alu.dst.write = 1; 7191 alu.last = 1; 7192 r = r600_bytecode_add_alu(ctx->bc, &alu); 7193 if (r) 7194 return r; 7195 break; 7196 /* texture offsets do not apply to other texture targets */ 7197 } 7198 } else { 7199 switch (inst->Texture.Texture) { 7200 case TGSI_TEXTURE_3D: 7201 offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 7202 /* fallthrough */ 7203 case TGSI_TEXTURE_2D: 7204 case TGSI_TEXTURE_SHADOW2D: 7205 case TGSI_TEXTURE_RECT: 7206 case TGSI_TEXTURE_SHADOWRECT: 7207 case TGSI_TEXTURE_2D_ARRAY: 7208 case TGSI_TEXTURE_SHADOW2D_ARRAY: 7209 offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 7210 /* fallthrough */ 7211 case TGSI_TEXTURE_1D: 7212 case TGSI_TEXTURE_SHADOW1D: 7213 case TGSI_TEXTURE_1D_ARRAY: 7214 case TGSI_TEXTURE_SHADOW1D_ARRAY: 7215 offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 7216 } 7217 } 7218 } 7219 7220 /* Obtain the sample index for reading a compressed MSAA color texture. 7221 * To read the FMASK, we use the ldfptr instruction, which tells us 7222 * where the samples are stored. 7223 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, 7224 * which is the identity mapping. Each nibble says which physical sample 7225 * should be fetched to get that sample. 7226 * 7227 * Assume src.z contains the sample index. It should be modified like this: 7228 * src.z = (ldfptr() >> (src.z * 4)) & 0xF; 7229 * Then fetch the texel with src. 7230 */ 7231 if (read_compressed_msaa) { 7232 unsigned sample_chan = 3; 7233 unsigned temp = r600_get_temp(ctx); 7234 assert(src_loaded); 7235 7236 /* temp.w = ldfptr() */ 7237 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7238 tex.op = FETCH_OP_LD; 7239 tex.inst_mod = 1; /* to indicate this is ldfptr */ 7240 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7241 tex.sampler_index_mode = sampler_index_mode; 7242 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7243 tex.resource_index_mode = sampler_index_mode; 7244 tex.src_gpr = src_gpr; 7245 tex.dst_gpr = temp; 7246 tex.dst_sel_x = 7; /* mask out these components */ 7247 tex.dst_sel_y = 7; 7248 tex.dst_sel_z = 7; 7249 tex.dst_sel_w = 0; /* store X */ 7250 tex.src_sel_x = 0; 7251 tex.src_sel_y = 1; 7252 tex.src_sel_z = 2; 7253 tex.src_sel_w = 3; 7254 tex.offset_x = offset_x; 7255 tex.offset_y = offset_y; 7256 tex.offset_z = offset_z; 7257 r = r600_bytecode_add_tex(ctx->bc, &tex); 7258 if (r) 7259 return r; 7260 7261 /* temp.x = sample_index*4 */ 7262 if (ctx->bc->chip_class == CAYMAN) { 7263 for (i = 0 ; i < 4; i++) { 7264 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7265 alu.op = ALU_OP2_MULLO_INT; 7266 alu.src[0].sel = src_gpr; 7267 alu.src[0].chan = sample_chan; 7268 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7269 alu.src[1].value = 4; 7270 alu.dst.sel = temp; 7271 alu.dst.chan = i; 7272 alu.dst.write = i == 0; 7273 if (i == 3) 7274 alu.last = 1; 7275 r = r600_bytecode_add_alu(ctx->bc, &alu); 7276 if (r) 7277 return r; 7278 } 7279 } else { 7280 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7281 alu.op = ALU_OP2_MULLO_INT; 7282 alu.src[0].sel = src_gpr; 7283 alu.src[0].chan = sample_chan; 7284 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7285 alu.src[1].value = 4; 7286 alu.dst.sel = temp; 7287 alu.dst.chan = 0; 7288 alu.dst.write = 1; 7289 alu.last = 1; 7290 r = r600_bytecode_add_alu(ctx->bc, &alu); 7291 if (r) 7292 return r; 7293 } 7294 7295 /* sample_index = temp.w >> temp.x */ 7296 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7297 alu.op = ALU_OP2_LSHR_INT; 7298 alu.src[0].sel = temp; 7299 alu.src[0].chan = 3; 7300 alu.src[1].sel = temp; 7301 alu.src[1].chan = 0; 7302 alu.dst.sel = src_gpr; 7303 alu.dst.chan = sample_chan; 7304 alu.dst.write = 1; 7305 alu.last = 1; 7306 r = r600_bytecode_add_alu(ctx->bc, &alu); 7307 if (r) 7308 return r; 7309 7310 /* sample_index & 0xF */ 7311 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7312 alu.op = ALU_OP2_AND_INT; 7313 alu.src[0].sel = src_gpr; 7314 alu.src[0].chan = sample_chan; 7315 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7316 alu.src[1].value = 0xF; 7317 alu.dst.sel = src_gpr; 7318 alu.dst.chan = sample_chan; 7319 alu.dst.write = 1; 7320 alu.last = 1; 7321 r = r600_bytecode_add_alu(ctx->bc, &alu); 7322 if (r) 7323 return r; 7324#if 0 7325 /* visualize the FMASK */ 7326 for (i = 0; i < 4; i++) { 7327 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7328 alu.op = ALU_OP1_INT_TO_FLT; 7329 alu.src[0].sel = src_gpr; 7330 alu.src[0].chan = sample_chan; 7331 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 7332 alu.dst.chan = i; 7333 alu.dst.write = 1; 7334 alu.last = 1; 7335 r = r600_bytecode_add_alu(ctx->bc, &alu); 7336 if (r) 7337 return r; 7338 } 7339 return 0; 7340#endif 7341 } 7342 7343 /* does this shader want a num layers from TXQ for a cube array? */ 7344 if (has_txq_cube_array_z) { 7345 int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7346 7347 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7348 alu.op = ALU_OP1_MOV; 7349 7350 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 7351 if (ctx->bc->chip_class >= EVERGREEN) { 7352 /* channel 1 or 3 of each word */ 7353 alu.src[0].sel += (id / 2); 7354 alu.src[0].chan = ((id % 2) * 2) + 1; 7355 } else { 7356 /* r600 we have them at channel 2 of the second dword */ 7357 alu.src[0].sel += (id * 2) + 1; 7358 alu.src[0].chan = 2; 7359 } 7360 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 7361 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 7362 alu.last = 1; 7363 r = r600_bytecode_add_alu(ctx->bc, &alu); 7364 if (r) 7365 return r; 7366 /* disable writemask from texture instruction */ 7367 inst->Dst[0].Register.WriteMask &= ~4; 7368 } 7369 7370 opcode = ctx->inst_info->op; 7371 if (opcode == FETCH_OP_GATHER4 && 7372 inst->TexOffsets[0].File != TGSI_FILE_NULL && 7373 inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) { 7374 opcode = FETCH_OP_GATHER4_O; 7375 7376 /* GATHER4_O/GATHER4_C_O use offset values loaded by 7377 SET_TEXTURE_OFFSETS instruction. The immediate offset values 7378 encoded in the instruction are ignored. */ 7379 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7380 tex.op = FETCH_OP_SET_TEXTURE_OFFSETS; 7381 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7382 tex.sampler_index_mode = sampler_index_mode; 7383 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7384 tex.resource_index_mode = sampler_index_mode; 7385 7386 tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index; 7387 tex.src_sel_x = inst->TexOffsets[0].SwizzleX; 7388 tex.src_sel_y = inst->TexOffsets[0].SwizzleY; 7389 tex.src_sel_z = inst->TexOffsets[0].SwizzleZ; 7390 tex.src_sel_w = 4; 7391 7392 tex.dst_sel_x = 7; 7393 tex.dst_sel_y = 7; 7394 tex.dst_sel_z = 7; 7395 tex.dst_sel_w = 7; 7396 7397 r = r600_bytecode_add_tex(ctx->bc, &tex); 7398 if (r) 7399 return r; 7400 } 7401 7402 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 7403 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 7404 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 7405 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7406 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 7407 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 7408 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7409 switch (opcode) { 7410 case FETCH_OP_SAMPLE: 7411 opcode = FETCH_OP_SAMPLE_C; 7412 break; 7413 case FETCH_OP_SAMPLE_L: 7414 opcode = FETCH_OP_SAMPLE_C_L; 7415 break; 7416 case FETCH_OP_SAMPLE_LB: 7417 opcode = FETCH_OP_SAMPLE_C_LB; 7418 break; 7419 case FETCH_OP_SAMPLE_G: 7420 opcode = FETCH_OP_SAMPLE_C_G; 7421 break; 7422 /* Texture gather variants */ 7423 case FETCH_OP_GATHER4: 7424 opcode = FETCH_OP_GATHER4_C; 7425 break; 7426 case FETCH_OP_GATHER4_O: 7427 opcode = FETCH_OP_GATHER4_C_O; 7428 break; 7429 } 7430 } 7431 7432 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7433 tex.op = opcode; 7434 7435 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7436 tex.sampler_index_mode = sampler_index_mode; 7437 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7438 tex.resource_index_mode = sampler_index_mode; 7439 tex.src_gpr = src_gpr; 7440 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 7441 7442 if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE || 7443 inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) { 7444 tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */ 7445 } 7446 7447 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) { 7448 int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]; 7449 tex.inst_mod = texture_component_select; 7450 7451 if (ctx->bc->chip_class == CAYMAN) { 7452 /* GATHER4 result order is different from TGSI TG4 */ 7453 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 0 : 7; 7454 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 1 : 7; 7455 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 2 : 7; 7456 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7457 } else { 7458 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7459 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 7460 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7461 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7462 } 7463 } 7464 else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) { 7465 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7466 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7467 tex.dst_sel_z = 7; 7468 tex.dst_sel_w = 7; 7469 } 7470 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 7471 tex.dst_sel_x = 3; 7472 tex.dst_sel_y = 7; 7473 tex.dst_sel_z = 7; 7474 tex.dst_sel_w = 7; 7475 } 7476 else { 7477 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7478 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7479 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 7480 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7481 } 7482 7483 7484 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ || 7485 inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 7486 tex.src_sel_x = 4; 7487 tex.src_sel_y = 4; 7488 tex.src_sel_z = 4; 7489 tex.src_sel_w = 4; 7490 } else if (src_loaded) { 7491 tex.src_sel_x = 0; 7492 tex.src_sel_y = 1; 7493 tex.src_sel_z = 2; 7494 tex.src_sel_w = 3; 7495 } else { 7496 tex.src_sel_x = ctx->src[0].swizzle[0]; 7497 tex.src_sel_y = ctx->src[0].swizzle[1]; 7498 tex.src_sel_z = ctx->src[0].swizzle[2]; 7499 tex.src_sel_w = ctx->src[0].swizzle[3]; 7500 tex.src_rel = ctx->src[0].rel; 7501 } 7502 7503 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || 7504 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7505 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7506 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7507 tex.src_sel_x = 1; 7508 tex.src_sel_y = 0; 7509 tex.src_sel_z = 3; 7510 tex.src_sel_w = 2; /* route Z compare or Lod value into W */ 7511 } 7512 7513 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 7514 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 7515 tex.coord_type_x = 1; 7516 tex.coord_type_y = 1; 7517 } 7518 tex.coord_type_z = 1; 7519 tex.coord_type_w = 1; 7520 7521 tex.offset_x = offset_x; 7522 tex.offset_y = offset_y; 7523 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && 7524 (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 7525 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) { 7526 tex.offset_z = 0; 7527 } 7528 else { 7529 tex.offset_z = offset_z; 7530 } 7531 7532 /* Put the depth for comparison in W. 7533 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 7534 * Some instructions expect the depth in Z. */ 7535 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 7536 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 7537 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 7538 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 7539 opcode != FETCH_OP_SAMPLE_C_L && 7540 opcode != FETCH_OP_SAMPLE_C_LB) { 7541 tex.src_sel_w = tex.src_sel_z; 7542 } 7543 7544 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 7545 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 7546 if (opcode == FETCH_OP_SAMPLE_C_L || 7547 opcode == FETCH_OP_SAMPLE_C_LB) { 7548 /* the array index is read from Y */ 7549 tex.coord_type_y = 0; 7550 } else { 7551 /* the array index is read from Z */ 7552 tex.coord_type_z = 0; 7553 tex.src_sel_z = tex.src_sel_y; 7554 } 7555 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 7556 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 7557 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7558 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 7559 (ctx->bc->chip_class >= EVERGREEN))) 7560 /* the array index is read from Z */ 7561 tex.coord_type_z = 0; 7562 7563 /* mask unused source components */ 7564 if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { 7565 switch (inst->Texture.Texture) { 7566 case TGSI_TEXTURE_2D: 7567 case TGSI_TEXTURE_RECT: 7568 tex.src_sel_z = 7; 7569 tex.src_sel_w = 7; 7570 break; 7571 case TGSI_TEXTURE_1D_ARRAY: 7572 tex.src_sel_y = 7; 7573 tex.src_sel_w = 7; 7574 break; 7575 case TGSI_TEXTURE_1D: 7576 tex.src_sel_y = 7; 7577 tex.src_sel_z = 7; 7578 tex.src_sel_w = 7; 7579 break; 7580 } 7581 } 7582 7583 r = r600_bytecode_add_tex(ctx->bc, &tex); 7584 if (r) 7585 return r; 7586 7587 /* add shadow ambient support - gallium doesn't do it yet */ 7588 return 0; 7589} 7590 7591static int tgsi_lrp(struct r600_shader_ctx *ctx) 7592{ 7593 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7594 struct r600_bytecode_alu alu; 7595 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7596 unsigned i, temp_regs[2]; 7597 int r; 7598 7599 /* optimize if it's just an equal balance */ 7600 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 7601 for (i = 0; i < lasti + 1; i++) { 7602 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7603 continue; 7604 7605 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7606 alu.op = ALU_OP2_ADD; 7607 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 7608 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7609 alu.omod = 3; 7610 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7611 alu.dst.chan = i; 7612 if (i == lasti) { 7613 alu.last = 1; 7614 } 7615 r = r600_bytecode_add_alu(ctx->bc, &alu); 7616 if (r) 7617 return r; 7618 } 7619 return 0; 7620 } 7621 7622 /* 1 - src0 */ 7623 for (i = 0; i < lasti + 1; i++) { 7624 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7625 continue; 7626 7627 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7628 alu.op = ALU_OP2_ADD; 7629 alu.src[0].sel = V_SQ_ALU_SRC_1; 7630 alu.src[0].chan = 0; 7631 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 7632 r600_bytecode_src_toggle_neg(&alu.src[1]); 7633 alu.dst.sel = ctx->temp_reg; 7634 alu.dst.chan = i; 7635 if (i == lasti) { 7636 alu.last = 1; 7637 } 7638 alu.dst.write = 1; 7639 r = r600_bytecode_add_alu(ctx->bc, &alu); 7640 if (r) 7641 return r; 7642 } 7643 7644 /* (1 - src0) * src2 */ 7645 for (i = 0; i < lasti + 1; i++) { 7646 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7647 continue; 7648 7649 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7650 alu.op = ALU_OP2_MUL; 7651 alu.src[0].sel = ctx->temp_reg; 7652 alu.src[0].chan = i; 7653 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7654 alu.dst.sel = ctx->temp_reg; 7655 alu.dst.chan = i; 7656 if (i == lasti) { 7657 alu.last = 1; 7658 } 7659 alu.dst.write = 1; 7660 r = r600_bytecode_add_alu(ctx->bc, &alu); 7661 if (r) 7662 return r; 7663 } 7664 7665 /* src0 * src1 + (1 - src0) * src2 */ 7666 if (ctx->src[0].abs) 7667 temp_regs[0] = r600_get_temp(ctx); 7668 else 7669 temp_regs[0] = 0; 7670 if (ctx->src[1].abs) 7671 temp_regs[1] = r600_get_temp(ctx); 7672 else 7673 temp_regs[1] = 0; 7674 7675 for (i = 0; i < lasti + 1; i++) { 7676 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7677 continue; 7678 7679 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7680 alu.op = ALU_OP3_MULADD; 7681 alu.is_op3 = 1; 7682 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 7683 if (r) 7684 return r; 7685 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[1]); 7686 if (r) 7687 return r; 7688 alu.src[2].sel = ctx->temp_reg; 7689 alu.src[2].chan = i; 7690 7691 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7692 alu.dst.chan = i; 7693 if (i == lasti) { 7694 alu.last = 1; 7695 } 7696 r = r600_bytecode_add_alu(ctx->bc, &alu); 7697 if (r) 7698 return r; 7699 } 7700 return 0; 7701} 7702 7703static int tgsi_cmp(struct r600_shader_ctx *ctx) 7704{ 7705 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7706 struct r600_bytecode_alu alu; 7707 int i, r, j; 7708 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7709 int temp_regs[3]; 7710 7711 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 7712 temp_regs[j] = 0; 7713 if (ctx->src[j].abs) 7714 temp_regs[j] = r600_get_temp(ctx); 7715 } 7716 7717 for (i = 0; i < lasti + 1; i++) { 7718 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7719 continue; 7720 7721 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7722 alu.op = ALU_OP3_CNDGE; 7723 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 7724 if (r) 7725 return r; 7726 r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]); 7727 if (r) 7728 return r; 7729 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]); 7730 if (r) 7731 return r; 7732 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7733 alu.dst.chan = i; 7734 alu.dst.write = 1; 7735 alu.is_op3 = 1; 7736 if (i == lasti) 7737 alu.last = 1; 7738 r = r600_bytecode_add_alu(ctx->bc, &alu); 7739 if (r) 7740 return r; 7741 } 7742 return 0; 7743} 7744 7745static int tgsi_ucmp(struct r600_shader_ctx *ctx) 7746{ 7747 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7748 struct r600_bytecode_alu alu; 7749 int i, r; 7750 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7751 7752 for (i = 0; i < lasti + 1; i++) { 7753 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7754 continue; 7755 7756 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7757 alu.op = ALU_OP3_CNDE_INT; 7758 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7759 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7760 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 7761 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7762 alu.dst.chan = i; 7763 alu.dst.write = 1; 7764 alu.is_op3 = 1; 7765 if (i == lasti) 7766 alu.last = 1; 7767 r = r600_bytecode_add_alu(ctx->bc, &alu); 7768 if (r) 7769 return r; 7770 } 7771 return 0; 7772} 7773 7774static int tgsi_xpd(struct r600_shader_ctx *ctx) 7775{ 7776 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7777 static const unsigned int src0_swizzle[] = {2, 0, 1}; 7778 static const unsigned int src1_swizzle[] = {1, 2, 0}; 7779 struct r600_bytecode_alu alu; 7780 uint32_t use_temp = 0; 7781 int i, r; 7782 7783 if (inst->Dst[0].Register.WriteMask != 0xf) 7784 use_temp = 1; 7785 7786 for (i = 0; i < 4; i++) { 7787 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7788 alu.op = ALU_OP2_MUL; 7789 if (i < 3) { 7790 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 7791 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 7792 } else { 7793 alu.src[0].sel = V_SQ_ALU_SRC_0; 7794 alu.src[0].chan = i; 7795 alu.src[1].sel = V_SQ_ALU_SRC_0; 7796 alu.src[1].chan = i; 7797 } 7798 7799 alu.dst.sel = ctx->temp_reg; 7800 alu.dst.chan = i; 7801 alu.dst.write = 1; 7802 7803 if (i == 3) 7804 alu.last = 1; 7805 r = r600_bytecode_add_alu(ctx->bc, &alu); 7806 if (r) 7807 return r; 7808 } 7809 7810 for (i = 0; i < 4; i++) { 7811 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7812 alu.op = ALU_OP3_MULADD; 7813 7814 if (i < 3) { 7815 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 7816 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 7817 } else { 7818 alu.src[0].sel = V_SQ_ALU_SRC_0; 7819 alu.src[0].chan = i; 7820 alu.src[1].sel = V_SQ_ALU_SRC_0; 7821 alu.src[1].chan = i; 7822 } 7823 7824 alu.src[2].sel = ctx->temp_reg; 7825 alu.src[2].neg = 1; 7826 alu.src[2].chan = i; 7827 7828 if (use_temp) 7829 alu.dst.sel = ctx->temp_reg; 7830 else 7831 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7832 alu.dst.chan = i; 7833 alu.dst.write = 1; 7834 alu.is_op3 = 1; 7835 if (i == 3) 7836 alu.last = 1; 7837 r = r600_bytecode_add_alu(ctx->bc, &alu); 7838 if (r) 7839 return r; 7840 } 7841 if (use_temp) 7842 return tgsi_helper_copy(ctx, inst); 7843 return 0; 7844} 7845 7846static int tgsi_exp(struct r600_shader_ctx *ctx) 7847{ 7848 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7849 struct r600_bytecode_alu alu; 7850 int r; 7851 unsigned i; 7852 7853 /* result.x = 2^floor(src); */ 7854 if (inst->Dst[0].Register.WriteMask & 1) { 7855 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7856 7857 alu.op = ALU_OP1_FLOOR; 7858 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7859 7860 alu.dst.sel = ctx->temp_reg; 7861 alu.dst.chan = 0; 7862 alu.dst.write = 1; 7863 alu.last = 1; 7864 r = r600_bytecode_add_alu(ctx->bc, &alu); 7865 if (r) 7866 return r; 7867 7868 if (ctx->bc->chip_class == CAYMAN) { 7869 for (i = 0; i < 3; i++) { 7870 alu.op = ALU_OP1_EXP_IEEE; 7871 alu.src[0].sel = ctx->temp_reg; 7872 alu.src[0].chan = 0; 7873 7874 alu.dst.sel = ctx->temp_reg; 7875 alu.dst.chan = i; 7876 alu.dst.write = i == 0; 7877 alu.last = i == 2; 7878 r = r600_bytecode_add_alu(ctx->bc, &alu); 7879 if (r) 7880 return r; 7881 } 7882 } else { 7883 alu.op = ALU_OP1_EXP_IEEE; 7884 alu.src[0].sel = ctx->temp_reg; 7885 alu.src[0].chan = 0; 7886 7887 alu.dst.sel = ctx->temp_reg; 7888 alu.dst.chan = 0; 7889 alu.dst.write = 1; 7890 alu.last = 1; 7891 r = r600_bytecode_add_alu(ctx->bc, &alu); 7892 if (r) 7893 return r; 7894 } 7895 } 7896 7897 /* result.y = tmp - floor(tmp); */ 7898 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 7899 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7900 7901 alu.op = ALU_OP1_FRACT; 7902 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7903 7904 alu.dst.sel = ctx->temp_reg; 7905#if 0 7906 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7907 if (r) 7908 return r; 7909#endif 7910 alu.dst.write = 1; 7911 alu.dst.chan = 1; 7912 7913 alu.last = 1; 7914 7915 r = r600_bytecode_add_alu(ctx->bc, &alu); 7916 if (r) 7917 return r; 7918 } 7919 7920 /* result.z = RoughApprox2ToX(tmp);*/ 7921 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 7922 if (ctx->bc->chip_class == CAYMAN) { 7923 for (i = 0; i < 3; i++) { 7924 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7925 alu.op = ALU_OP1_EXP_IEEE; 7926 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7927 7928 alu.dst.sel = ctx->temp_reg; 7929 alu.dst.chan = i; 7930 if (i == 2) { 7931 alu.dst.write = 1; 7932 alu.last = 1; 7933 } 7934 7935 r = r600_bytecode_add_alu(ctx->bc, &alu); 7936 if (r) 7937 return r; 7938 } 7939 } else { 7940 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7941 alu.op = ALU_OP1_EXP_IEEE; 7942 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7943 7944 alu.dst.sel = ctx->temp_reg; 7945 alu.dst.write = 1; 7946 alu.dst.chan = 2; 7947 7948 alu.last = 1; 7949 7950 r = r600_bytecode_add_alu(ctx->bc, &alu); 7951 if (r) 7952 return r; 7953 } 7954 } 7955 7956 /* result.w = 1.0;*/ 7957 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 7958 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7959 7960 alu.op = ALU_OP1_MOV; 7961 alu.src[0].sel = V_SQ_ALU_SRC_1; 7962 alu.src[0].chan = 0; 7963 7964 alu.dst.sel = ctx->temp_reg; 7965 alu.dst.chan = 3; 7966 alu.dst.write = 1; 7967 alu.last = 1; 7968 r = r600_bytecode_add_alu(ctx->bc, &alu); 7969 if (r) 7970 return r; 7971 } 7972 return tgsi_helper_copy(ctx, inst); 7973} 7974 7975static int tgsi_log(struct r600_shader_ctx *ctx) 7976{ 7977 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7978 struct r600_bytecode_alu alu; 7979 int r; 7980 unsigned i; 7981 7982 /* result.x = floor(log2(|src|)); */ 7983 if (inst->Dst[0].Register.WriteMask & 1) { 7984 if (ctx->bc->chip_class == CAYMAN) { 7985 for (i = 0; i < 3; i++) { 7986 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7987 7988 alu.op = ALU_OP1_LOG_IEEE; 7989 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7990 r600_bytecode_src_set_abs(&alu.src[0]); 7991 7992 alu.dst.sel = ctx->temp_reg; 7993 alu.dst.chan = i; 7994 if (i == 0) 7995 alu.dst.write = 1; 7996 if (i == 2) 7997 alu.last = 1; 7998 r = r600_bytecode_add_alu(ctx->bc, &alu); 7999 if (r) 8000 return r; 8001 } 8002 8003 } else { 8004 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8005 8006 alu.op = ALU_OP1_LOG_IEEE; 8007 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8008 r600_bytecode_src_set_abs(&alu.src[0]); 8009 8010 alu.dst.sel = ctx->temp_reg; 8011 alu.dst.chan = 0; 8012 alu.dst.write = 1; 8013 alu.last = 1; 8014 r = r600_bytecode_add_alu(ctx->bc, &alu); 8015 if (r) 8016 return r; 8017 } 8018 8019 alu.op = ALU_OP1_FLOOR; 8020 alu.src[0].sel = ctx->temp_reg; 8021 alu.src[0].chan = 0; 8022 8023 alu.dst.sel = ctx->temp_reg; 8024 alu.dst.chan = 0; 8025 alu.dst.write = 1; 8026 alu.last = 1; 8027 8028 r = r600_bytecode_add_alu(ctx->bc, &alu); 8029 if (r) 8030 return r; 8031 } 8032 8033 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 8034 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 8035 8036 if (ctx->bc->chip_class == CAYMAN) { 8037 for (i = 0; i < 3; i++) { 8038 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8039 8040 alu.op = ALU_OP1_LOG_IEEE; 8041 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8042 r600_bytecode_src_set_abs(&alu.src[0]); 8043 8044 alu.dst.sel = ctx->temp_reg; 8045 alu.dst.chan = i; 8046 if (i == 1) 8047 alu.dst.write = 1; 8048 if (i == 2) 8049 alu.last = 1; 8050 8051 r = r600_bytecode_add_alu(ctx->bc, &alu); 8052 if (r) 8053 return r; 8054 } 8055 } else { 8056 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8057 8058 alu.op = ALU_OP1_LOG_IEEE; 8059 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8060 r600_bytecode_src_set_abs(&alu.src[0]); 8061 8062 alu.dst.sel = ctx->temp_reg; 8063 alu.dst.chan = 1; 8064 alu.dst.write = 1; 8065 alu.last = 1; 8066 8067 r = r600_bytecode_add_alu(ctx->bc, &alu); 8068 if (r) 8069 return r; 8070 } 8071 8072 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8073 8074 alu.op = ALU_OP1_FLOOR; 8075 alu.src[0].sel = ctx->temp_reg; 8076 alu.src[0].chan = 1; 8077 8078 alu.dst.sel = ctx->temp_reg; 8079 alu.dst.chan = 1; 8080 alu.dst.write = 1; 8081 alu.last = 1; 8082 8083 r = r600_bytecode_add_alu(ctx->bc, &alu); 8084 if (r) 8085 return r; 8086 8087 if (ctx->bc->chip_class == CAYMAN) { 8088 for (i = 0; i < 3; i++) { 8089 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8090 alu.op = ALU_OP1_EXP_IEEE; 8091 alu.src[0].sel = ctx->temp_reg; 8092 alu.src[0].chan = 1; 8093 8094 alu.dst.sel = ctx->temp_reg; 8095 alu.dst.chan = i; 8096 if (i == 1) 8097 alu.dst.write = 1; 8098 if (i == 2) 8099 alu.last = 1; 8100 8101 r = r600_bytecode_add_alu(ctx->bc, &alu); 8102 if (r) 8103 return r; 8104 } 8105 } else { 8106 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8107 alu.op = ALU_OP1_EXP_IEEE; 8108 alu.src[0].sel = ctx->temp_reg; 8109 alu.src[0].chan = 1; 8110 8111 alu.dst.sel = ctx->temp_reg; 8112 alu.dst.chan = 1; 8113 alu.dst.write = 1; 8114 alu.last = 1; 8115 8116 r = r600_bytecode_add_alu(ctx->bc, &alu); 8117 if (r) 8118 return r; 8119 } 8120 8121 if (ctx->bc->chip_class == CAYMAN) { 8122 for (i = 0; i < 3; i++) { 8123 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8124 alu.op = ALU_OP1_RECIP_IEEE; 8125 alu.src[0].sel = ctx->temp_reg; 8126 alu.src[0].chan = 1; 8127 8128 alu.dst.sel = ctx->temp_reg; 8129 alu.dst.chan = i; 8130 if (i == 1) 8131 alu.dst.write = 1; 8132 if (i == 2) 8133 alu.last = 1; 8134 8135 r = r600_bytecode_add_alu(ctx->bc, &alu); 8136 if (r) 8137 return r; 8138 } 8139 } else { 8140 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8141 alu.op = ALU_OP1_RECIP_IEEE; 8142 alu.src[0].sel = ctx->temp_reg; 8143 alu.src[0].chan = 1; 8144 8145 alu.dst.sel = ctx->temp_reg; 8146 alu.dst.chan = 1; 8147 alu.dst.write = 1; 8148 alu.last = 1; 8149 8150 r = r600_bytecode_add_alu(ctx->bc, &alu); 8151 if (r) 8152 return r; 8153 } 8154 8155 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8156 8157 alu.op = ALU_OP2_MUL; 8158 8159 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8160 r600_bytecode_src_set_abs(&alu.src[0]); 8161 8162 alu.src[1].sel = ctx->temp_reg; 8163 alu.src[1].chan = 1; 8164 8165 alu.dst.sel = ctx->temp_reg; 8166 alu.dst.chan = 1; 8167 alu.dst.write = 1; 8168 alu.last = 1; 8169 8170 r = r600_bytecode_add_alu(ctx->bc, &alu); 8171 if (r) 8172 return r; 8173 } 8174 8175 /* result.z = log2(|src|);*/ 8176 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 8177 if (ctx->bc->chip_class == CAYMAN) { 8178 for (i = 0; i < 3; i++) { 8179 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8180 8181 alu.op = ALU_OP1_LOG_IEEE; 8182 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8183 r600_bytecode_src_set_abs(&alu.src[0]); 8184 8185 alu.dst.sel = ctx->temp_reg; 8186 if (i == 2) 8187 alu.dst.write = 1; 8188 alu.dst.chan = i; 8189 if (i == 2) 8190 alu.last = 1; 8191 8192 r = r600_bytecode_add_alu(ctx->bc, &alu); 8193 if (r) 8194 return r; 8195 } 8196 } else { 8197 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8198 8199 alu.op = ALU_OP1_LOG_IEEE; 8200 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8201 r600_bytecode_src_set_abs(&alu.src[0]); 8202 8203 alu.dst.sel = ctx->temp_reg; 8204 alu.dst.write = 1; 8205 alu.dst.chan = 2; 8206 alu.last = 1; 8207 8208 r = r600_bytecode_add_alu(ctx->bc, &alu); 8209 if (r) 8210 return r; 8211 } 8212 } 8213 8214 /* result.w = 1.0; */ 8215 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 8216 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8217 8218 alu.op = ALU_OP1_MOV; 8219 alu.src[0].sel = V_SQ_ALU_SRC_1; 8220 alu.src[0].chan = 0; 8221 8222 alu.dst.sel = ctx->temp_reg; 8223 alu.dst.chan = 3; 8224 alu.dst.write = 1; 8225 alu.last = 1; 8226 8227 r = r600_bytecode_add_alu(ctx->bc, &alu); 8228 if (r) 8229 return r; 8230 } 8231 8232 return tgsi_helper_copy(ctx, inst); 8233} 8234 8235static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 8236{ 8237 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8238 struct r600_bytecode_alu alu; 8239 int r; 8240 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8241 unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index); 8242 8243 assert(inst->Dst[0].Register.Index < 3); 8244 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8245 8246 switch (inst->Instruction.Opcode) { 8247 case TGSI_OPCODE_ARL: 8248 alu.op = ALU_OP1_FLT_TO_INT_FLOOR; 8249 break; 8250 case TGSI_OPCODE_ARR: 8251 alu.op = ALU_OP1_FLT_TO_INT; 8252 break; 8253 case TGSI_OPCODE_UARL: 8254 alu.op = ALU_OP1_MOV; 8255 break; 8256 default: 8257 assert(0); 8258 return -1; 8259 } 8260 8261 for (i = 0; i <= lasti; ++i) { 8262 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8263 continue; 8264 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8265 alu.last = i == lasti; 8266 alu.dst.sel = reg; 8267 alu.dst.chan = i; 8268 alu.dst.write = 1; 8269 r = r600_bytecode_add_alu(ctx->bc, &alu); 8270 if (r) 8271 return r; 8272 } 8273 8274 if (inst->Dst[0].Register.Index > 0) 8275 ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0; 8276 else 8277 ctx->bc->ar_loaded = 0; 8278 8279 return 0; 8280} 8281static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 8282{ 8283 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8284 struct r600_bytecode_alu alu; 8285 int r; 8286 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8287 8288 switch (inst->Instruction.Opcode) { 8289 case TGSI_OPCODE_ARL: 8290 memset(&alu, 0, sizeof(alu)); 8291 alu.op = ALU_OP1_FLOOR; 8292 alu.dst.sel = ctx->bc->ar_reg; 8293 alu.dst.write = 1; 8294 for (i = 0; i <= lasti; ++i) { 8295 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8296 alu.dst.chan = i; 8297 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8298 alu.last = i == lasti; 8299 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8300 return r; 8301 } 8302 } 8303 8304 memset(&alu, 0, sizeof(alu)); 8305 alu.op = ALU_OP1_FLT_TO_INT; 8306 alu.src[0].sel = ctx->bc->ar_reg; 8307 alu.dst.sel = ctx->bc->ar_reg; 8308 alu.dst.write = 1; 8309 /* FLT_TO_INT is trans-only on r600/r700 */ 8310 alu.last = TRUE; 8311 for (i = 0; i <= lasti; ++i) { 8312 alu.dst.chan = i; 8313 alu.src[0].chan = i; 8314 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8315 return r; 8316 } 8317 break; 8318 case TGSI_OPCODE_ARR: 8319 memset(&alu, 0, sizeof(alu)); 8320 alu.op = ALU_OP1_FLT_TO_INT; 8321 alu.dst.sel = ctx->bc->ar_reg; 8322 alu.dst.write = 1; 8323 /* FLT_TO_INT is trans-only on r600/r700 */ 8324 alu.last = TRUE; 8325 for (i = 0; i <= lasti; ++i) { 8326 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8327 alu.dst.chan = i; 8328 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8329 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8330 return r; 8331 } 8332 } 8333 break; 8334 case TGSI_OPCODE_UARL: 8335 memset(&alu, 0, sizeof(alu)); 8336 alu.op = ALU_OP1_MOV; 8337 alu.dst.sel = ctx->bc->ar_reg; 8338 alu.dst.write = 1; 8339 for (i = 0; i <= lasti; ++i) { 8340 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8341 alu.dst.chan = i; 8342 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8343 alu.last = i == lasti; 8344 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8345 return r; 8346 } 8347 } 8348 break; 8349 default: 8350 assert(0); 8351 return -1; 8352 } 8353 8354 ctx->bc->ar_loaded = 0; 8355 return 0; 8356} 8357 8358static int tgsi_opdst(struct r600_shader_ctx *ctx) 8359{ 8360 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8361 struct r600_bytecode_alu alu; 8362 int i, r = 0; 8363 8364 for (i = 0; i < 4; i++) { 8365 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8366 8367 alu.op = ALU_OP2_MUL; 8368 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 8369 8370 if (i == 0 || i == 3) { 8371 alu.src[0].sel = V_SQ_ALU_SRC_1; 8372 } else { 8373 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8374 } 8375 8376 if (i == 0 || i == 2) { 8377 alu.src[1].sel = V_SQ_ALU_SRC_1; 8378 } else { 8379 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 8380 } 8381 if (i == 3) 8382 alu.last = 1; 8383 r = r600_bytecode_add_alu(ctx->bc, &alu); 8384 if (r) 8385 return r; 8386 } 8387 return 0; 8388} 8389 8390static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type) 8391{ 8392 struct r600_bytecode_alu alu; 8393 int r; 8394 8395 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8396 alu.op = opcode; 8397 alu.execute_mask = 1; 8398 alu.update_pred = 1; 8399 8400 alu.dst.sel = ctx->temp_reg; 8401 alu.dst.write = 1; 8402 alu.dst.chan = 0; 8403 8404 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8405 alu.src[1].sel = V_SQ_ALU_SRC_0; 8406 alu.src[1].chan = 0; 8407 8408 alu.last = 1; 8409 8410 r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); 8411 if (r) 8412 return r; 8413 return 0; 8414} 8415 8416static int pops(struct r600_shader_ctx *ctx, int pops) 8417{ 8418 unsigned force_pop = ctx->bc->force_add_cf; 8419 8420 if (!force_pop) { 8421 int alu_pop = 3; 8422 if (ctx->bc->cf_last) { 8423 if (ctx->bc->cf_last->op == CF_OP_ALU) 8424 alu_pop = 0; 8425 else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER) 8426 alu_pop = 1; 8427 } 8428 alu_pop += pops; 8429 if (alu_pop == 1) { 8430 ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER; 8431 ctx->bc->force_add_cf = 1; 8432 } else if (alu_pop == 2) { 8433 ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER; 8434 ctx->bc->force_add_cf = 1; 8435 } else { 8436 force_pop = 1; 8437 } 8438 } 8439 8440 if (force_pop) { 8441 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 8442 ctx->bc->cf_last->pop_count = pops; 8443 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 8444 } 8445 8446 return 0; 8447} 8448 8449static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, 8450 unsigned reason) 8451{ 8452 struct r600_stack_info *stack = &ctx->bc->stack; 8453 unsigned elements, entries; 8454 8455 unsigned entry_size = stack->entry_size; 8456 8457 elements = (stack->loop + stack->push_wqm ) * entry_size; 8458 elements += stack->push; 8459 8460 switch (ctx->bc->chip_class) { 8461 case R600: 8462 case R700: 8463 /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on 8464 * the stack must be reserved to hold the current active/continue 8465 * masks */ 8466 if (reason == FC_PUSH_VPM) { 8467 elements += 2; 8468 } 8469 break; 8470 8471 case CAYMAN: 8472 /* r9xx: any stack operation on empty stack consumes 2 additional 8473 * elements */ 8474 elements += 2; 8475 8476 /* fallthrough */ 8477 /* FIXME: do the two elements added above cover the cases for the 8478 * r8xx+ below? */ 8479 8480 case EVERGREEN: 8481 /* r8xx+: 2 extra elements are not always required, but one extra 8482 * element must be added for each of the following cases: 8483 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest 8484 * stack usage. 8485 * (Currently we don't use ALU_ELSE_AFTER.) 8486 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM 8487 * PUSH instruction executed. 8488 * 8489 * NOTE: it seems we also need to reserve additional element in some 8490 * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, 8491 * then STACK_SIZE should be 2 instead of 1 */ 8492 if (reason == FC_PUSH_VPM) { 8493 elements += 1; 8494 } 8495 break; 8496 8497 default: 8498 assert(0); 8499 break; 8500 } 8501 8502 /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4 8503 * for all chips, so we use 4 in the final formula, not the real entry_size 8504 * for the chip */ 8505 entry_size = 4; 8506 8507 entries = (elements + (entry_size - 1)) / entry_size; 8508 8509 if (entries > stack->max_entries) 8510 stack->max_entries = entries; 8511} 8512 8513static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) 8514{ 8515 switch(reason) { 8516 case FC_PUSH_VPM: 8517 --ctx->bc->stack.push; 8518 assert(ctx->bc->stack.push >= 0); 8519 break; 8520 case FC_PUSH_WQM: 8521 --ctx->bc->stack.push_wqm; 8522 assert(ctx->bc->stack.push_wqm >= 0); 8523 break; 8524 case FC_LOOP: 8525 --ctx->bc->stack.loop; 8526 assert(ctx->bc->stack.loop >= 0); 8527 break; 8528 default: 8529 assert(0); 8530 break; 8531 } 8532} 8533 8534static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) 8535{ 8536 switch (reason) { 8537 case FC_PUSH_VPM: 8538 ++ctx->bc->stack.push; 8539 break; 8540 case FC_PUSH_WQM: 8541 ++ctx->bc->stack.push_wqm; 8542 case FC_LOOP: 8543 ++ctx->bc->stack.loop; 8544 break; 8545 default: 8546 assert(0); 8547 } 8548 8549 callstack_update_max_depth(ctx, reason); 8550} 8551 8552static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 8553{ 8554 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 8555 8556 sp->mid = realloc((void *)sp->mid, 8557 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 8558 sp->mid[sp->num_mid] = ctx->bc->cf_last; 8559 sp->num_mid++; 8560} 8561 8562static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 8563{ 8564 ctx->bc->fc_sp++; 8565 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 8566 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 8567} 8568 8569static void fc_poplevel(struct r600_shader_ctx *ctx) 8570{ 8571 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 8572 free(sp->mid); 8573 sp->mid = NULL; 8574 sp->num_mid = 0; 8575 sp->start = NULL; 8576 sp->type = 0; 8577 ctx->bc->fc_sp--; 8578} 8579 8580#if 0 8581static int emit_return(struct r600_shader_ctx *ctx) 8582{ 8583 r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN)); 8584 return 0; 8585} 8586 8587static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 8588{ 8589 8590 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP)); 8591 ctx->bc->cf_last->pop_count = pops; 8592 /* XXX work out offset */ 8593 return 0; 8594} 8595 8596static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 8597{ 8598 return 0; 8599} 8600 8601static void emit_testflag(struct r600_shader_ctx *ctx) 8602{ 8603 8604} 8605 8606static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 8607{ 8608 emit_testflag(ctx); 8609 emit_jump_to_offset(ctx, 1, 4); 8610 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 8611 pops(ctx, ifidx + 1); 8612 emit_return(ctx); 8613} 8614 8615static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 8616{ 8617 emit_testflag(ctx); 8618 8619 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8620 ctx->bc->cf_last->pop_count = 1; 8621 8622 fc_set_mid(ctx, fc_sp); 8623 8624 pops(ctx, 1); 8625} 8626#endif 8627 8628static int emit_if(struct r600_shader_ctx *ctx, int opcode) 8629{ 8630 int alu_type = CF_OP_ALU_PUSH_BEFORE; 8631 8632 /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by 8633 * LOOP_STARTxxx for nested loops may put the branch stack into a state 8634 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this 8635 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ 8636 if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) { 8637 r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); 8638 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 8639 alu_type = CF_OP_ALU; 8640 } 8641 8642 emit_logic_pred(ctx, opcode, alu_type); 8643 8644 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 8645 8646 fc_pushlevel(ctx, FC_IF); 8647 8648 callstack_push(ctx, FC_PUSH_VPM); 8649 return 0; 8650} 8651 8652static int tgsi_if(struct r600_shader_ctx *ctx) 8653{ 8654 return emit_if(ctx, ALU_OP2_PRED_SETNE); 8655} 8656 8657static int tgsi_uif(struct r600_shader_ctx *ctx) 8658{ 8659 return emit_if(ctx, ALU_OP2_PRED_SETNE_INT); 8660} 8661 8662static int tgsi_else(struct r600_shader_ctx *ctx) 8663{ 8664 r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); 8665 ctx->bc->cf_last->pop_count = 1; 8666 8667 fc_set_mid(ctx, ctx->bc->fc_sp); 8668 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 8669 return 0; 8670} 8671 8672static int tgsi_endif(struct r600_shader_ctx *ctx) 8673{ 8674 pops(ctx, 1); 8675 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 8676 R600_ERR("if/endif unbalanced in shader\n"); 8677 return -1; 8678 } 8679 8680 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 8681 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 8682 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 8683 } else { 8684 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 8685 } 8686 fc_poplevel(ctx); 8687 8688 callstack_pop(ctx, FC_PUSH_VPM); 8689 return 0; 8690} 8691 8692static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 8693{ 8694 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not 8695 * limited to 4096 iterations, like the other LOOP_* instructions. */ 8696 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10); 8697 8698 fc_pushlevel(ctx, FC_LOOP); 8699 8700 /* check stack depth */ 8701 callstack_push(ctx, FC_LOOP); 8702 return 0; 8703} 8704 8705static int tgsi_endloop(struct r600_shader_ctx *ctx) 8706{ 8707 unsigned i; 8708 8709 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); 8710 8711 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 8712 R600_ERR("loop/endloop in shader code are not paired.\n"); 8713 return -EINVAL; 8714 } 8715 8716 /* fixup loop pointers - from r600isa 8717 LOOP END points to CF after LOOP START, 8718 LOOP START point to CF after LOOP END 8719 BRK/CONT point to LOOP END CF 8720 */ 8721 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 8722 8723 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 8724 8725 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 8726 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 8727 } 8728 /* XXX add LOOPRET support */ 8729 fc_poplevel(ctx); 8730 callstack_pop(ctx, FC_LOOP); 8731 return 0; 8732} 8733 8734static int tgsi_loop_breakc(struct r600_shader_ctx *ctx) 8735{ 8736 int r; 8737 unsigned int fscp; 8738 8739 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 8740 { 8741 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 8742 break; 8743 } 8744 if (fscp == 0) { 8745 R600_ERR("BREAKC not inside loop/endloop pair\n"); 8746 return -EINVAL; 8747 } 8748 8749 if (ctx->bc->chip_class == EVERGREEN && 8750 ctx->bc->family != CHIP_CYPRESS && 8751 ctx->bc->family != CHIP_JUNIPER) { 8752 /* HW bug: ALU_BREAK does not save the active mask correctly */ 8753 r = tgsi_uif(ctx); 8754 if (r) 8755 return r; 8756 8757 r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK); 8758 if (r) 8759 return r; 8760 fc_set_mid(ctx, fscp); 8761 8762 return tgsi_endif(ctx); 8763 } else { 8764 r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK); 8765 if (r) 8766 return r; 8767 fc_set_mid(ctx, fscp); 8768 } 8769 8770 return 0; 8771} 8772 8773static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 8774{ 8775 unsigned int fscp; 8776 8777 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 8778 { 8779 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 8780 break; 8781 } 8782 8783 if (fscp == 0) { 8784 R600_ERR("Break not inside loop/endloop pair\n"); 8785 return -EINVAL; 8786 } 8787 8788 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8789 8790 fc_set_mid(ctx, fscp); 8791 8792 return 0; 8793} 8794 8795static int tgsi_gs_emit(struct r600_shader_ctx *ctx) 8796{ 8797 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8798 int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX]; 8799 int r; 8800 8801 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 8802 emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE); 8803 8804 r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8805 if (!r) { 8806 ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream 8807 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 8808 return emit_inc_ring_offset(ctx, stream, TRUE); 8809 } 8810 return r; 8811} 8812 8813static int tgsi_umad(struct r600_shader_ctx *ctx) 8814{ 8815 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8816 struct r600_bytecode_alu alu; 8817 int i, j, k, r; 8818 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8819 8820 /* src0 * src1 */ 8821 for (i = 0; i < lasti + 1; i++) { 8822 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8823 continue; 8824 8825 if (ctx->bc->chip_class == CAYMAN) { 8826 for (j = 0 ; j < 4; j++) { 8827 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8828 8829 alu.op = ALU_OP2_MULLO_UINT; 8830 for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { 8831 r600_bytecode_src(&alu.src[k], &ctx->src[k], i); 8832 } 8833 alu.dst.chan = j; 8834 alu.dst.sel = ctx->temp_reg; 8835 alu.dst.write = (j == i); 8836 if (j == 3) 8837 alu.last = 1; 8838 r = r600_bytecode_add_alu(ctx->bc, &alu); 8839 if (r) 8840 return r; 8841 } 8842 } else { 8843 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8844 8845 alu.dst.chan = i; 8846 alu.dst.sel = ctx->temp_reg; 8847 alu.dst.write = 1; 8848 8849 alu.op = ALU_OP2_MULLO_UINT; 8850 for (j = 0; j < 2; j++) { 8851 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 8852 } 8853 8854 alu.last = 1; 8855 r = r600_bytecode_add_alu(ctx->bc, &alu); 8856 if (r) 8857 return r; 8858 } 8859 } 8860 8861 8862 for (i = 0; i < lasti + 1; i++) { 8863 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8864 continue; 8865 8866 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8867 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 8868 8869 alu.op = ALU_OP2_ADD_INT; 8870 8871 alu.src[0].sel = ctx->temp_reg; 8872 alu.src[0].chan = i; 8873 8874 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 8875 if (i == lasti) { 8876 alu.last = 1; 8877 } 8878 r = r600_bytecode_add_alu(ctx->bc, &alu); 8879 if (r) 8880 return r; 8881 } 8882 return 0; 8883} 8884 8885static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 8886 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, 8887 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 8888 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 8889 8890 /* XXX: 8891 * For state trackers other than OpenGL, we'll want to use 8892 * _RECIP_IEEE instead. 8893 */ 8894 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 8895 8896 [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, 8897 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 8898 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 8899 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 8900 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 8901 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 8902 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 8903 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 8904 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 8905 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 8906 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 8907 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 8908 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 8909 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 8910 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 8911 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 8912 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 8913 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 8914 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 8915 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 8916 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 8917 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 8918 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 8919 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 8920 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 8921 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 8922 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 8923 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 8924 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 8925 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 8926 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 8927 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 8928 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 8929 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8930 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8931 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 8932 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8933 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8934 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8935 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8936 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 8937 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 8938 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 8939 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 8940 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 8941 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 8942 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 8943 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 8944 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 8945 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 8946 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 8947 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8948 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8949 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8950 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8951 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 8952 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 8953 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_r600_arl}, 8954 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 8955 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 8956 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 8957 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 8958 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 8959 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 8960 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8961 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 8962 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 8963 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 8964 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8965 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 8966 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 8967 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 8968 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 8969 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 8970 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 8971 [TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 8972 [TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 8973 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 8974 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 8975 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 8976 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 8977 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 8978 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 8979 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2_trans}, 8980 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 8981 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 8982 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 8983 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 8984 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 8985 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8986 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 8987 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8988 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 8989 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 8990 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 8991 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 8992 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8993 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 8994 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8995 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8996 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 8997 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 8998 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 8999 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9000 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9001 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9002 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9003 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9004 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 9005 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9006 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9007 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc}, 9008 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9009 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9010 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 9011 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, 9012 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9013 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9014 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9015 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9016 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9017 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2_trans}, 9018 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9019 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, 9020 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 9021 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9022 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9023 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9024 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9025 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9026 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9027 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 9028 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9029 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9030 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2_trans}, 9031 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9032 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2_swap}, 9033 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9034 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9035 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9036 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9037 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9038 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9039 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9040 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9041 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9042 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9043 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9044 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9045 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9046 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9047 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9048 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9049 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_r600_arl}, 9050 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9051 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9052 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9053 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9054 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9055 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9056 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9057 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9058 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 9059 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9060 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9061 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9062 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9063 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9064 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9065 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9066 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9067 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9068 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9069 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9070 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9071 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9072 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 9073 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 9074 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_unsupported}, 9075 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_unsupported}, 9076 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_unsupported}, 9077 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_unsupported}, 9078 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_unsupported}, 9079 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_unsupported}, 9080 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_unsupported}, 9081 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_unsupported}, 9082 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_unsupported}, 9083 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_unsupported}, 9084 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_unsupported}, 9085 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_unsupported}, 9086 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_unsupported}, 9087 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9088}; 9089 9090static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 9091 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 9092 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 9093 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 9094 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 9095 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, 9096 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 9097 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 9098 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 9099 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 9100 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 9101 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 9102 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 9103 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 9104 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 9105 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 9106 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 9107 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 9108 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 9109 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 9110 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 9111 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 9112 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 9113 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 9114 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 9115 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 9116 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 9117 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 9118 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 9119 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 9120 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 9121 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 9122 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 9123 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 9124 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 9125 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 9126 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 9127 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 9128 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9129 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9130 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 9131 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9132 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9133 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9134 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9135 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 9136 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 9137 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 9138 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 9139 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 9140 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 9141 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 9142 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 9143 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 9144 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 9145 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 9146 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9147 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9148 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9149 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9150 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 9151 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 9152 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 9153 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 9154 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 9155 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 9156 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 9157 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 9158 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 9159 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9160 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 9161 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 9162 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 9163 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9164 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 9165 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 9166 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 9167 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 9168 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 9169 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 9170 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9171 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9172 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 9173 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 9174 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 9175 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 9176 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 9177 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 9178 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 9179 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9180 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9181 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9182 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9183 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9184 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9185 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9186 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9187 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9188 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9189 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9190 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9191 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9192 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9193 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9194 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9195 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9196 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 9197 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9198 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9199 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9200 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9201 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9202 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9203 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 9204 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9205 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9206 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 9207 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9208 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9209 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 9210 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i}, 9211 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9212 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9213 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9214 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9215 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9216 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 9217 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9218 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_f2i}, 9219 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 9220 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9221 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9222 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9223 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9224 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9225 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9226 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 9227 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9228 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9229 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 9230 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9231 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 9232 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9233 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9234 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9235 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9236 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9237 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9238 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9239 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9240 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9241 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9242 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9243 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9244 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9245 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9246 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9247 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9248 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 9249 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9250 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9251 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9252 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9253 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9254 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9255 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9256 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9257 [TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 9258 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9259 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9260 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9261 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9262 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9263 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9264 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9265 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9266 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9267 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9268 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9269 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9270 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9271 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 9272 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 9273 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 9274 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 9275 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 9276 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 9277 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 9278 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 9279 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 9280 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 9281 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 9282 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 9283 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9284 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9285 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9286 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 9287 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 9288 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 9289 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 9290 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 9291 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 9292 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 9293 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 9294 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 9295 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 9296 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 9297 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 9298 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 9299 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 9300 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 9301 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 9302 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 9303 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 9304 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 9305 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 9306 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 9307 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 9308 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 9309 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9310}; 9311 9312static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 9313 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 9314 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 9315 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 9316 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, 9317 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, 9318 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 9319 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 9320 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 9321 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 9322 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 9323 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 9324 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 9325 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 9326 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 9327 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 9328 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 9329 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 9330 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 9331 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 9332 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 9333 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, 9334 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 9335 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 9336 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 9337 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 9338 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 9339 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 9340 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 9341 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, 9342 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, 9343 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, 9344 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 9345 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 9346 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 9347 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 9348 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 9349 [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig}, 9350 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9351 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9352 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 9353 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9354 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9355 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9356 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9357 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 9358 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 9359 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 9360 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 9361 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, cayman_trig}, 9362 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 9363 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 9364 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 9365 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 9366 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 9367 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 9368 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9369 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9370 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9371 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9372 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 9373 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 9374 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 9375 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 9376 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 9377 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 9378 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 9379 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 9380 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 9381 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9382 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 9383 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 9384 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 9385 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9386 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 9387 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 9388 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 9389 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 9390 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 9391 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 9392 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9393 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9394 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 9395 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 9396 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 9397 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2}, 9398 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 9399 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 9400 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 9401 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9402 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9403 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9404 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9405 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9406 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9407 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9408 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9409 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9410 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9411 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9412 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9413 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9414 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9415 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9416 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9417 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9418 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 9419 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9420 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9421 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9422 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9423 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9424 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9425 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 9426 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9427 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9428 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 9429 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9430 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9431 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 9432 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2}, 9433 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9434 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9435 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9436 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9437 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9438 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 9439 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9440 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2}, 9441 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2}, 9442 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9443 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9444 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9445 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9446 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9447 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9448 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_INT, cayman_mul_int_instr}, 9449 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9450 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9451 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 9452 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9453 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 9454 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9455 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9456 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9457 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9458 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9459 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9460 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9461 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9462 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9463 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9464 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9465 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9466 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9467 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9468 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9469 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9470 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 9471 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9472 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9473 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9474 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9475 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9476 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9477 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9478 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9479 [TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 9480 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9481 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9482 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9483 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9484 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9485 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9486 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9487 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9488 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9489 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9490 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9491 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9492 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9493 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, cayman_mul_int_instr}, 9494 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, cayman_mul_int_instr}, 9495 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 9496 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 9497 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 9498 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 9499 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 9500 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 9501 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 9502 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 9503 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 9504 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 9505 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9506 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9507 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9508 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 9509 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 9510 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 9511 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 9512 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 9513 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 9514 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 9515 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 9516 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 9517 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 9518 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 9519 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 9520 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 9521 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 9522 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 9523 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 9524 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 9525 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 9526 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 9527 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 9528 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 9529 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 9530 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 9531 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9532}; 9533