r600_shader.c revision 9662a43d23c0ae46b4294561476b57e22e76ae04
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "r600_sq.h" 24#include "r600_llvm.h" 25#include "r600_formats.h" 26#include "r600_opcodes.h" 27#include "r600_shader.h" 28#include "r600d.h" 29 30#include "sb/sb_public.h" 31 32#include "pipe/p_shader_tokens.h" 33#include "tgsi/tgsi_info.h" 34#include "tgsi/tgsi_parse.h" 35#include "tgsi/tgsi_scan.h" 36#include "tgsi/tgsi_dump.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39#include <stdio.h> 40#include <errno.h> 41 42/* CAYMAN notes 43Why CAYMAN got loops for lots of instructions is explained here. 44 45-These 8xx t-slot only ops are implemented in all vector slots. 46MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 47These 8xx t-slot only opcodes become vector ops, with all four 48slots expecting the arguments on sources a and b. Result is 49broadcast to all channels. 50MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64 51These 8xx t-slot only opcodes become vector ops in the z, y, and 52x slots. 53EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 54RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 55SQRT_IEEE/_64 56SIN/COS 57The w slot may have an independent co-issued operation, or if the 58result is required to be in the w slot, the opcode above may be 59issued in the w slot as well. 60The compiler must issue the source argument to slots z, y, and x 61*/ 62 63/* Contents of r0 on entry to various shaders 64 65 VS - .x = VertexID 66 .y = RelVertexID (??) 67 .w = InstanceID 68 69 GS - r0.xyw, r1.xyz = per-vertex offsets 70 r0.z = PrimitiveID 71 72 TCS - .x = PatchID 73 .y = RelPatchID (??) 74 .z = InvocationID 75 .w = tess factor base. 76 77 TES - .x = TessCoord.x 78 - .y = TessCoord.y 79 - .z = RelPatchID (??) 80 - .w = PrimitiveID 81 82 PS - face_gpr.z = SampleMask 83 face_gpr.w = SampleID 84*/ 85#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) 86static int r600_shader_from_tgsi(struct r600_context *rctx, 87 struct r600_pipe_shader *pipeshader, 88 union r600_shader_key key); 89 90static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, 91 int size, unsigned comp_mask) { 92 93 if (!size) 94 return; 95 96 if (ps->num_arrays == ps->max_arrays) { 97 ps->max_arrays += 64; 98 ps->arrays = realloc(ps->arrays, ps->max_arrays * 99 sizeof(struct r600_shader_array)); 100 } 101 102 int n = ps->num_arrays; 103 ++ps->num_arrays; 104 105 ps->arrays[n].comp_mask = comp_mask; 106 ps->arrays[n].gpr_start = start_gpr; 107 ps->arrays[n].gpr_count = size; 108} 109 110static void r600_dump_streamout(struct pipe_stream_output_info *so) 111{ 112 unsigned i; 113 114 fprintf(stderr, "STREAMOUT\n"); 115 for (i = 0; i < so->num_outputs; i++) { 116 unsigned mask = ((1 << so->output[i].num_components) - 1) << 117 so->output[i].start_component; 118 fprintf(stderr, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", 119 i, 120 so->output[i].stream, 121 so->output[i].output_buffer, 122 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, 123 so->output[i].register_index, 124 mask & 1 ? "x" : "", 125 mask & 2 ? "y" : "", 126 mask & 4 ? "z" : "", 127 mask & 8 ? "w" : "", 128 so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); 129 } 130} 131 132static int store_shader(struct pipe_context *ctx, 133 struct r600_pipe_shader *shader) 134{ 135 struct r600_context *rctx = (struct r600_context *)ctx; 136 uint32_t *ptr, i; 137 138 if (shader->bo == NULL) { 139 shader->bo = (struct r600_resource*) 140 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); 141 if (shader->bo == NULL) { 142 return -ENOMEM; 143 } 144 ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); 145 if (R600_BIG_ENDIAN) { 146 for (i = 0; i < shader->shader.bc.ndw; ++i) { 147 ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); 148 } 149 } else { 150 memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); 151 } 152 rctx->b.ws->buffer_unmap(shader->bo->cs_buf); 153 } 154 155 return 0; 156} 157 158int r600_pipe_shader_create(struct pipe_context *ctx, 159 struct r600_pipe_shader *shader, 160 union r600_shader_key key) 161{ 162 struct r600_context *rctx = (struct r600_context *)ctx; 163 struct r600_pipe_shader_selector *sel = shader->selector; 164 int r; 165 bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens); 166 unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); 167 unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 168 unsigned export_shader; 169 170 shader->shader.bc.isa = rctx->isa; 171 172 if (dump) { 173 fprintf(stderr, "--------------------------------------------------------------\n"); 174 tgsi_dump(sel->tokens, 0); 175 176 if (sel->so.num_outputs) { 177 r600_dump_streamout(&sel->so); 178 } 179 } 180 r = r600_shader_from_tgsi(rctx, shader, key); 181 if (r) { 182 R600_ERR("translation from TGSI failed !\n"); 183 goto error; 184 } 185 if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) { 186 /* only disable for vertex shaders in tess paths */ 187 if (key.vs.as_ls) 188 use_sb = 0; 189 } 190 use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_TESS_CTRL); 191 use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_TESS_EVAL); 192 193 /* disable SB for shaders using doubles */ 194 use_sb &= !shader->shader.uses_doubles; 195 196 /* Check if the bytecode has already been built. When using the llvm 197 * backend, r600_shader_from_tgsi() will take care of building the 198 * bytecode. 199 */ 200 if (!shader->shader.bc.bytecode) { 201 r = r600_bytecode_build(&shader->shader.bc); 202 if (r) { 203 R600_ERR("building bytecode failed !\n"); 204 goto error; 205 } 206 } 207 208 if (dump && !sb_disasm) { 209 fprintf(stderr, "--------------------------------------------------------------\n"); 210 r600_bytecode_disasm(&shader->shader.bc); 211 fprintf(stderr, "______________________________________________________________\n"); 212 } else if ((dump && sb_disasm) || use_sb) { 213 r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, 214 dump, use_sb); 215 if (r) { 216 R600_ERR("r600_sb_bytecode_process failed !\n"); 217 goto error; 218 } 219 } 220 221 if (shader->gs_copy_shader) { 222 if (dump) { 223 // dump copy shader 224 r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, 225 &shader->gs_copy_shader->shader, dump, 0); 226 if (r) 227 goto error; 228 } 229 230 if ((r = store_shader(ctx, shader->gs_copy_shader))) 231 goto error; 232 } 233 234 /* Store the shader in a buffer. */ 235 if ((r = store_shader(ctx, shader))) 236 goto error; 237 238 /* Build state. */ 239 switch (shader->shader.processor_type) { 240 case TGSI_PROCESSOR_TESS_CTRL: 241 evergreen_update_hs_state(ctx, shader); 242 break; 243 case TGSI_PROCESSOR_TESS_EVAL: 244 if (key.tes.as_es) 245 evergreen_update_es_state(ctx, shader); 246 else 247 evergreen_update_vs_state(ctx, shader); 248 break; 249 case TGSI_PROCESSOR_GEOMETRY: 250 if (rctx->b.chip_class >= EVERGREEN) { 251 evergreen_update_gs_state(ctx, shader); 252 evergreen_update_vs_state(ctx, shader->gs_copy_shader); 253 } else { 254 r600_update_gs_state(ctx, shader); 255 r600_update_vs_state(ctx, shader->gs_copy_shader); 256 } 257 break; 258 case TGSI_PROCESSOR_VERTEX: 259 export_shader = key.vs.as_es; 260 if (rctx->b.chip_class >= EVERGREEN) { 261 if (key.vs.as_ls) 262 evergreen_update_ls_state(ctx, shader); 263 else if (key.vs.as_es) 264 evergreen_update_es_state(ctx, shader); 265 else 266 evergreen_update_vs_state(ctx, shader); 267 } else { 268 if (export_shader) 269 r600_update_es_state(ctx, shader); 270 else 271 r600_update_vs_state(ctx, shader); 272 } 273 break; 274 case TGSI_PROCESSOR_FRAGMENT: 275 if (rctx->b.chip_class >= EVERGREEN) { 276 evergreen_update_ps_state(ctx, shader); 277 } else { 278 r600_update_ps_state(ctx, shader); 279 } 280 break; 281 default: 282 r = -EINVAL; 283 goto error; 284 } 285 return 0; 286 287error: 288 r600_pipe_shader_destroy(ctx, shader); 289 return r; 290} 291 292void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 293{ 294 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 295 r600_bytecode_clear(&shader->shader.bc); 296 r600_release_command_buffer(&shader->command_buffer); 297} 298 299/* 300 * tgsi -> r600 shader 301 */ 302struct r600_shader_tgsi_instruction; 303 304struct r600_shader_src { 305 unsigned sel; 306 unsigned swizzle[4]; 307 unsigned neg; 308 unsigned abs; 309 unsigned rel; 310 unsigned kc_bank; 311 boolean kc_rel; /* true if cache bank is indexed */ 312 uint32_t value[4]; 313}; 314 315struct eg_interp { 316 boolean enabled; 317 unsigned ij_index; 318}; 319 320struct r600_shader_ctx { 321 struct tgsi_shader_info info; 322 struct tgsi_parse_context parse; 323 const struct tgsi_token *tokens; 324 unsigned type; 325 unsigned file_offset[TGSI_FILE_COUNT]; 326 unsigned temp_reg; 327 const struct r600_shader_tgsi_instruction *inst_info; 328 struct r600_bytecode *bc; 329 struct r600_shader *shader; 330 struct r600_shader_src src[4]; 331 uint32_t *literals; 332 uint32_t nliterals; 333 uint32_t max_driver_temp_used; 334 boolean use_llvm; 335 /* needed for evergreen interpolation */ 336 struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid 337 /* evergreen/cayman also store sample mask in face register */ 338 int face_gpr; 339 /* sample id is .w component stored in fixed point position register */ 340 int fixed_pt_position_gpr; 341 int colors_used; 342 boolean clip_vertex_write; 343 unsigned cv_output; 344 unsigned edgeflag_output; 345 int fragcoord_input; 346 int native_integers; 347 int next_ring_offset; 348 int gs_out_ring_offset; 349 int gs_next_vertex; 350 struct r600_shader *gs_for_vs; 351 int gs_export_gpr_tregs[4]; 352 const struct pipe_stream_output_info *gs_stream_output_info; 353 unsigned enabled_stream_buffers_mask; 354 unsigned tess_input_info; /* temp with tess input offsets */ 355 unsigned tess_output_info; /* temp with tess input offsets */ 356}; 357 358struct r600_shader_tgsi_instruction { 359 unsigned op; 360 int (*process)(struct r600_shader_ctx *ctx); 361}; 362 363static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind); 364static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 365static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 366static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); 367static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 368static int tgsi_else(struct r600_shader_ctx *ctx); 369static int tgsi_endif(struct r600_shader_ctx *ctx); 370static int tgsi_bgnloop(struct r600_shader_ctx *ctx); 371static int tgsi_endloop(struct r600_shader_ctx *ctx); 372static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 373static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 374 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 375 unsigned int dst_reg); 376static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 377 const struct r600_shader_src *shader_src, 378 unsigned chan); 379static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 380 unsigned dst_reg); 381 382static int tgsi_last_instruction(unsigned writemask) 383{ 384 int i, lasti = 0; 385 386 for (i = 0; i < 4; i++) { 387 if (writemask & (1 << i)) { 388 lasti = i; 389 } 390 } 391 return lasti; 392} 393 394static int tgsi_is_supported(struct r600_shader_ctx *ctx) 395{ 396 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 397 int j; 398 399 if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) { 400 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 401 return -EINVAL; 402 } 403 if (i->Instruction.Predicate) { 404 R600_ERR("predicate unsupported\n"); 405 return -EINVAL; 406 } 407#if 0 408 if (i->Instruction.Label) { 409 R600_ERR("label unsupported\n"); 410 return -EINVAL; 411 } 412#endif 413 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 414 if (i->Src[j].Register.Dimension) { 415 switch (i->Src[j].Register.File) { 416 case TGSI_FILE_CONSTANT: 417 break; 418 case TGSI_FILE_INPUT: 419 if (ctx->type == TGSI_PROCESSOR_GEOMETRY || 420 ctx->type == TGSI_PROCESSOR_TESS_CTRL || 421 ctx->type == TGSI_PROCESSOR_TESS_EVAL) 422 break; 423 case TGSI_FILE_OUTPUT: 424 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) 425 break; 426 default: 427 R600_ERR("unsupported src %d (file %d, dimension %d)\n", j, 428 i->Src[j].Register.File, 429 i->Src[j].Register.Dimension); 430 return -EINVAL; 431 } 432 } 433 } 434 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 435 if (i->Dst[j].Register.Dimension) { 436 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) 437 continue; 438 R600_ERR("unsupported dst (dimension)\n"); 439 return -EINVAL; 440 } 441 } 442 return 0; 443} 444 445int eg_get_interpolator_index(unsigned interpolate, unsigned location) 446{ 447 if (interpolate == TGSI_INTERPOLATE_COLOR || 448 interpolate == TGSI_INTERPOLATE_LINEAR || 449 interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 450 { 451 int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR; 452 int loc; 453 454 switch(location) { 455 case TGSI_INTERPOLATE_LOC_CENTER: 456 loc = 1; 457 break; 458 case TGSI_INTERPOLATE_LOC_CENTROID: 459 loc = 2; 460 break; 461 case TGSI_INTERPOLATE_LOC_SAMPLE: 462 default: 463 loc = 0; break; 464 } 465 466 return is_linear * 3 + loc; 467 } 468 469 return -1; 470} 471 472static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, 473 int input) 474{ 475 int i = eg_get_interpolator_index( 476 ctx->shader->input[input].interpolate, 477 ctx->shader->input[input].interpolate_location); 478 assert(i >= 0); 479 ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index; 480} 481 482static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 483{ 484 int i, r; 485 struct r600_bytecode_alu alu; 486 int gpr = 0, base_chan = 0; 487 int ij_index = ctx->shader->input[input].ij_index; 488 489 /* work out gpr and base_chan from index */ 490 gpr = ij_index / 2; 491 base_chan = (2 * (ij_index % 2)) + 1; 492 493 for (i = 0; i < 8; i++) { 494 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 495 496 if (i < 4) 497 alu.op = ALU_OP2_INTERP_ZW; 498 else 499 alu.op = ALU_OP2_INTERP_XY; 500 501 if ((i > 1) && (i < 6)) { 502 alu.dst.sel = ctx->shader->input[input].gpr; 503 alu.dst.write = 1; 504 } 505 506 alu.dst.chan = i % 4; 507 508 alu.src[0].sel = gpr; 509 alu.src[0].chan = (base_chan - (i % 2)); 510 511 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 512 513 alu.bank_swizzle_force = SQ_ALU_VEC_210; 514 if ((i % 4) == 3) 515 alu.last = 1; 516 r = r600_bytecode_add_alu(ctx->bc, &alu); 517 if (r) 518 return r; 519 } 520 return 0; 521} 522 523static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 524{ 525 int i, r; 526 struct r600_bytecode_alu alu; 527 528 for (i = 0; i < 4; i++) { 529 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 530 531 alu.op = ALU_OP1_INTERP_LOAD_P0; 532 533 alu.dst.sel = ctx->shader->input[input].gpr; 534 alu.dst.write = 1; 535 536 alu.dst.chan = i; 537 538 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 539 alu.src[0].chan = i; 540 541 if (i == 3) 542 alu.last = 1; 543 r = r600_bytecode_add_alu(ctx->bc, &alu); 544 if (r) 545 return r; 546 } 547 return 0; 548} 549 550/* 551 * Special export handling in shaders 552 * 553 * shader export ARRAY_BASE for EXPORT_POS: 554 * 60 is position 555 * 61 is misc vector 556 * 62, 63 are clip distance vectors 557 * 558 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 559 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 560 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 561 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 562 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 563 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 564 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 565 * exclusive from render target index) 566 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 567 * 568 * 569 * shader export ARRAY_BASE for EXPORT_PIXEL: 570 * 0-7 CB targets 571 * 61 computed Z vector 572 * 573 * The use of the values exported in the computed Z vector are controlled 574 * by DB_SHADER_CONTROL: 575 * Z_EXPORT_ENABLE - Z as a float in RED 576 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 577 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 578 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 579 * DB_SOURCE_FORMAT - export control restrictions 580 * 581 */ 582 583 584/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 585static int r600_spi_sid(struct r600_shader_io * io) 586{ 587 int index, name = io->name; 588 589 /* These params are handled differently, they don't need 590 * semantic indices, so we'll use 0 for them. 591 */ 592 if (name == TGSI_SEMANTIC_POSITION || 593 name == TGSI_SEMANTIC_PSIZE || 594 name == TGSI_SEMANTIC_EDGEFLAG || 595 name == TGSI_SEMANTIC_FACE || 596 name == TGSI_SEMANTIC_SAMPLEMASK) 597 index = 0; 598 else { 599 if (name == TGSI_SEMANTIC_GENERIC) { 600 /* For generic params simply use sid from tgsi */ 601 index = io->sid; 602 } else { 603 /* For non-generic params - pack name and sid into 8 bits */ 604 index = 0x80 | (name<<3) | (io->sid); 605 } 606 607 /* Make sure that all really used indices have nonzero value, so 608 * we can just compare it to 0 later instead of comparing the name 609 * with different values to detect special cases. */ 610 index++; 611 } 612 613 return index; 614}; 615 616/* we need this to get a common lds index for vs/tcs/tes input/outputs */ 617int r600_get_lds_unique_index(unsigned semantic_name, unsigned index) 618{ 619 switch (semantic_name) { 620 case TGSI_SEMANTIC_POSITION: 621 return 0; 622 case TGSI_SEMANTIC_PSIZE: 623 return 1; 624 case TGSI_SEMANTIC_CLIPDIST: 625 assert(index <= 1); 626 return 2 + index; 627 case TGSI_SEMANTIC_GENERIC: 628 if (index <= 63-4) 629 return 4 + index - 9; 630 else 631 /* same explanation as in the default statement, 632 * the only user hitting this is st/nine. 633 */ 634 return 0; 635 636 /* patch indices are completely separate and thus start from 0 */ 637 case TGSI_SEMANTIC_TESSOUTER: 638 return 0; 639 case TGSI_SEMANTIC_TESSINNER: 640 return 1; 641 case TGSI_SEMANTIC_PATCH: 642 return 2 + index; 643 644 default: 645 /* Don't fail here. The result of this function is only used 646 * for LS, TCS, TES, and GS, where legacy GL semantics can't 647 * occur, but this function is called for all vertex shaders 648 * before it's known whether LS will be compiled or not. 649 */ 650 return 0; 651 } 652} 653 654/* turn input into interpolate on EG */ 655static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 656{ 657 int r = 0; 658 659 if (ctx->shader->input[index].spi_sid) { 660 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 661 if (ctx->shader->input[index].interpolate > 0) { 662 evergreen_interp_assign_ij_index(ctx, index); 663 if (!ctx->use_llvm) 664 r = evergreen_interp_alu(ctx, index); 665 } else { 666 if (!ctx->use_llvm) 667 r = evergreen_interp_flat(ctx, index); 668 } 669 } 670 return r; 671} 672 673static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 674{ 675 struct r600_bytecode_alu alu; 676 int i, r; 677 int gpr_front = ctx->shader->input[front].gpr; 678 int gpr_back = ctx->shader->input[back].gpr; 679 680 for (i = 0; i < 4; i++) { 681 memset(&alu, 0, sizeof(alu)); 682 alu.op = ALU_OP3_CNDGT; 683 alu.is_op3 = 1; 684 alu.dst.write = 1; 685 alu.dst.sel = gpr_front; 686 alu.src[0].sel = ctx->face_gpr; 687 alu.src[1].sel = gpr_front; 688 alu.src[2].sel = gpr_back; 689 690 alu.dst.chan = i; 691 alu.src[1].chan = i; 692 alu.src[2].chan = i; 693 alu.last = (i==3); 694 695 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 696 return r; 697 } 698 699 return 0; 700} 701 702/* execute a single slot ALU calculation */ 703static int single_alu_op2(struct r600_shader_ctx *ctx, int op, 704 int dst_sel, int dst_chan, 705 int src0_sel, unsigned src0_chan_val, 706 int src1_sel, unsigned src1_chan_val) 707{ 708 struct r600_bytecode_alu alu; 709 int r, i; 710 711 if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) { 712 for (i = 0; i < 4; i++) { 713 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 714 alu.op = op; 715 alu.src[0].sel = src0_sel; 716 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 717 alu.src[0].value = src0_chan_val; 718 else 719 alu.src[0].chan = src0_chan_val; 720 alu.src[1].sel = src1_sel; 721 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 722 alu.src[1].value = src1_chan_val; 723 else 724 alu.src[1].chan = src1_chan_val; 725 alu.dst.sel = dst_sel; 726 alu.dst.chan = i; 727 alu.dst.write = i == dst_chan; 728 alu.last = (i == 3); 729 r = r600_bytecode_add_alu(ctx->bc, &alu); 730 if (r) 731 return r; 732 } 733 return 0; 734 } 735 736 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 737 alu.op = op; 738 alu.src[0].sel = src0_sel; 739 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 740 alu.src[0].value = src0_chan_val; 741 else 742 alu.src[0].chan = src0_chan_val; 743 alu.src[1].sel = src1_sel; 744 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 745 alu.src[1].value = src1_chan_val; 746 else 747 alu.src[1].chan = src1_chan_val; 748 alu.dst.sel = dst_sel; 749 alu.dst.chan = dst_chan; 750 alu.dst.write = 1; 751 alu.last = 1; 752 r = r600_bytecode_add_alu(ctx->bc, &alu); 753 if (r) 754 return r; 755 return 0; 756} 757 758/* execute a single slot ALU calculation */ 759static int single_alu_op3(struct r600_shader_ctx *ctx, int op, 760 int dst_sel, int dst_chan, 761 int src0_sel, unsigned src0_chan_val, 762 int src1_sel, unsigned src1_chan_val, 763 int src2_sel, unsigned src2_chan_val) 764{ 765 struct r600_bytecode_alu alu; 766 int r; 767 768 /* validate this for other ops */ 769 assert(op == ALU_OP3_MULADD_UINT24); 770 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 771 alu.op = op; 772 alu.src[0].sel = src0_sel; 773 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 774 alu.src[0].value = src0_chan_val; 775 else 776 alu.src[0].chan = src0_chan_val; 777 alu.src[1].sel = src1_sel; 778 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 779 alu.src[1].value = src1_chan_val; 780 else 781 alu.src[1].chan = src1_chan_val; 782 alu.src[2].sel = src2_sel; 783 if (src2_sel == V_SQ_ALU_SRC_LITERAL) 784 alu.src[2].value = src2_chan_val; 785 else 786 alu.src[2].chan = src2_chan_val; 787 alu.dst.sel = dst_sel; 788 alu.dst.chan = dst_chan; 789 alu.is_op3 = 1; 790 alu.last = 1; 791 r = r600_bytecode_add_alu(ctx->bc, &alu); 792 if (r) 793 return r; 794 return 0; 795} 796 797/* put it in temp_reg.x */ 798static int get_lds_offset0(struct r600_shader_ctx *ctx, 799 int rel_patch_chan, 800 int temp_reg, bool is_patch_var) 801{ 802 int r; 803 804 /* MUL temp.x, patch_stride (input_vals.x), rel_patch_id (r0.y (tcs)) */ 805 /* ADD 806 Dimension - patch0_offset (input_vals.z), 807 Non-dim - patch0_data_offset (input_vals.w) 808 */ 809 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 810 temp_reg, 0, 811 ctx->tess_output_info, 0, 812 0, rel_patch_chan, 813 ctx->tess_output_info, is_patch_var ? 3 : 2); 814 if (r) 815 return r; 816 return 0; 817} 818 819static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index) 820{ 821 return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg; 822} 823 824static int r600_get_temp(struct r600_shader_ctx *ctx) 825{ 826 return ctx->temp_reg + ctx->max_driver_temp_used++; 827} 828 829static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid) 830{ 831 int i; 832 i = ctx->shader->noutput++; 833 ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID; 834 ctx->shader->output[i].sid = 0; 835 ctx->shader->output[i].gpr = 0; 836 ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT; 837 ctx->shader->output[i].write_mask = 0x4; 838 ctx->shader->output[i].spi_sid = prim_id_sid; 839 840 return 0; 841} 842 843static int tgsi_declaration(struct r600_shader_ctx *ctx) 844{ 845 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 846 int r, i, j, count = d->Range.Last - d->Range.First + 1; 847 848 switch (d->Declaration.File) { 849 case TGSI_FILE_INPUT: 850 for (j = 0; j < count; j++) { 851 i = ctx->shader->ninput + j; 852 assert(i < Elements(ctx->shader->input)); 853 ctx->shader->input[i].name = d->Semantic.Name; 854 ctx->shader->input[i].sid = d->Semantic.Index + j; 855 ctx->shader->input[i].interpolate = d->Interp.Interpolate; 856 ctx->shader->input[i].interpolate_location = d->Interp.Location; 857 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j; 858 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 859 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 860 switch (ctx->shader->input[i].name) { 861 case TGSI_SEMANTIC_FACE: 862 if (ctx->face_gpr != -1) 863 ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ 864 else 865 ctx->face_gpr = ctx->shader->input[i].gpr; 866 break; 867 case TGSI_SEMANTIC_COLOR: 868 ctx->colors_used++; 869 break; 870 case TGSI_SEMANTIC_POSITION: 871 ctx->fragcoord_input = i; 872 break; 873 case TGSI_SEMANTIC_PRIMID: 874 /* set this for now */ 875 ctx->shader->gs_prim_id_input = true; 876 ctx->shader->ps_prim_id_input = i; 877 break; 878 } 879 if (ctx->bc->chip_class >= EVERGREEN) { 880 if ((r = evergreen_interp_input(ctx, i))) 881 return r; 882 } 883 } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 884 /* FIXME probably skip inputs if they aren't passed in the ring */ 885 ctx->shader->input[i].ring_offset = ctx->next_ring_offset; 886 ctx->next_ring_offset += 16; 887 if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) 888 ctx->shader->gs_prim_id_input = true; 889 } 890 } 891 ctx->shader->ninput += count; 892 break; 893 case TGSI_FILE_OUTPUT: 894 for (j = 0; j < count; j++) { 895 i = ctx->shader->noutput + j; 896 assert(i < Elements(ctx->shader->output)); 897 ctx->shader->output[i].name = d->Semantic.Name; 898 ctx->shader->output[i].sid = d->Semantic.Index + j; 899 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j; 900 ctx->shader->output[i].interpolate = d->Interp.Interpolate; 901 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 902 if (ctx->type == TGSI_PROCESSOR_VERTEX || 903 ctx->type == TGSI_PROCESSOR_GEOMETRY || 904 ctx->type == TGSI_PROCESSOR_TESS_EVAL) { 905 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 906 switch (d->Semantic.Name) { 907 case TGSI_SEMANTIC_CLIPDIST: 908 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << 909 ((d->Semantic.Index + j) << 2); 910 break; 911 case TGSI_SEMANTIC_PSIZE: 912 ctx->shader->vs_out_misc_write = 1; 913 ctx->shader->vs_out_point_size = 1; 914 break; 915 case TGSI_SEMANTIC_EDGEFLAG: 916 ctx->shader->vs_out_misc_write = 1; 917 ctx->shader->vs_out_edgeflag = 1; 918 ctx->edgeflag_output = i; 919 break; 920 case TGSI_SEMANTIC_VIEWPORT_INDEX: 921 ctx->shader->vs_out_misc_write = 1; 922 ctx->shader->vs_out_viewport = 1; 923 break; 924 case TGSI_SEMANTIC_LAYER: 925 ctx->shader->vs_out_misc_write = 1; 926 ctx->shader->vs_out_layer = 1; 927 break; 928 case TGSI_SEMANTIC_CLIPVERTEX: 929 ctx->clip_vertex_write = TRUE; 930 ctx->cv_output = i; 931 break; 932 } 933 if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 934 ctx->gs_out_ring_offset += 16; 935 } 936 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 937 switch (d->Semantic.Name) { 938 case TGSI_SEMANTIC_COLOR: 939 ctx->shader->nr_ps_max_color_exports++; 940 break; 941 } 942 } 943 } 944 ctx->shader->noutput += count; 945 break; 946 case TGSI_FILE_TEMPORARY: 947 if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 948 if (d->Array.ArrayID) { 949 r600_add_gpr_array(ctx->shader, 950 ctx->file_offset[TGSI_FILE_TEMPORARY] + 951 d->Range.First, 952 d->Range.Last - d->Range.First + 1, 0x0F); 953 } 954 } 955 break; 956 957 case TGSI_FILE_CONSTANT: 958 case TGSI_FILE_SAMPLER: 959 case TGSI_FILE_SAMPLER_VIEW: 960 case TGSI_FILE_ADDRESS: 961 break; 962 963 case TGSI_FILE_SYSTEM_VALUE: 964 if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK || 965 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID || 966 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) { 967 break; /* Already handled from allocate_system_value_inputs */ 968 } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 969 if (!ctx->native_integers) { 970 struct r600_bytecode_alu alu; 971 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 972 973 alu.op = ALU_OP1_INT_TO_FLT; 974 alu.src[0].sel = 0; 975 alu.src[0].chan = 3; 976 977 alu.dst.sel = 0; 978 alu.dst.chan = 3; 979 alu.dst.write = 1; 980 alu.last = 1; 981 982 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 983 return r; 984 } 985 break; 986 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 987 break; 988 else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID) 989 break; 990 else if (d->Semantic.Name == TGSI_SEMANTIC_TESSINNER || 991 d->Semantic.Name == TGSI_SEMANTIC_TESSOUTER) { 992 int param = r600_get_lds_unique_index(d->Semantic.Name, 0); 993 int dreg = d->Semantic.Name == TGSI_SEMANTIC_TESSINNER ? 3 : 2; 994 unsigned temp_reg = r600_get_temp(ctx); 995 996 r = get_lds_offset0(ctx, 2, temp_reg, true); 997 if (r) 998 return r; 999 1000 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1001 temp_reg, 0, 1002 temp_reg, 0, 1003 V_SQ_ALU_SRC_LITERAL, param * 16); 1004 if (r) 1005 return r; 1006 1007 do_lds_fetch_values(ctx, temp_reg, dreg); 1008 } 1009 else if (d->Semantic.Name == TGSI_SEMANTIC_TESSCOORD) { 1010 /* MOV r1.x, r0.x; 1011 MOV r1.y, r0.y; 1012 */ 1013 for (i = 0; i < 2; i++) { 1014 struct r600_bytecode_alu alu; 1015 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1016 alu.op = ALU_OP1_MOV; 1017 alu.src[0].sel = 0; 1018 alu.src[0].chan = 0 + i; 1019 alu.dst.sel = 1; 1020 alu.dst.chan = 0 + i; 1021 alu.dst.write = 1; 1022 alu.last = (i == 1) ? 1 : 0; 1023 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1024 return r; 1025 } 1026 /* ADD r1.z, 1.0f, -r0.x */ 1027 struct r600_bytecode_alu alu; 1028 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1029 alu.op = ALU_OP2_ADD; 1030 alu.src[0].sel = V_SQ_ALU_SRC_1; 1031 alu.src[1].sel = 1; 1032 alu.src[1].chan = 0; 1033 alu.src[1].neg = 1; 1034 alu.dst.sel = 1; 1035 alu.dst.chan = 2; 1036 alu.dst.write = 1; 1037 alu.last = 1; 1038 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1039 return r; 1040 1041 /* ADD r1.z, r1.z, -r1.y */ 1042 alu.op = ALU_OP2_ADD; 1043 alu.src[0].sel = 1; 1044 alu.src[0].chan = 2; 1045 alu.src[1].sel = 1; 1046 alu.src[1].chan = 1; 1047 alu.src[1].neg = 1; 1048 alu.dst.sel = 1; 1049 alu.dst.chan = 2; 1050 alu.dst.write = 1; 1051 alu.last = 1; 1052 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1053 return r; 1054 break; 1055 } 1056 break; 1057 default: 1058 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 1059 return -EINVAL; 1060 } 1061 return 0; 1062} 1063 1064static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset) 1065{ 1066 struct tgsi_parse_context parse; 1067 struct { 1068 boolean enabled; 1069 int *reg; 1070 unsigned name, alternate_name; 1071 } inputs[2] = { 1072 { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */ 1073 1074 { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */ 1075 }; 1076 int i, k, num_regs = 0; 1077 1078 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 1079 return 0; 1080 } 1081 1082 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 1083 while (!tgsi_parse_end_of_tokens(&parse)) { 1084 tgsi_parse_token(&parse); 1085 1086 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 1087 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 1088 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 1089 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 1090 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 1091 { 1092 int interpolate, location, k; 1093 1094 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 1095 location = TGSI_INTERPOLATE_LOC_CENTER; 1096 inputs[1].enabled = true; /* needs SAMPLEID */ 1097 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 1098 location = TGSI_INTERPOLATE_LOC_CENTER; 1099 /* Needs sample positions, currently those are always available */ 1100 } else { 1101 location = TGSI_INTERPOLATE_LOC_CENTROID; 1102 } 1103 1104 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 1105 k = eg_get_interpolator_index(interpolate, location); 1106 ctx->eg_interpolators[k].enabled = true; 1107 } 1108 } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { 1109 struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration; 1110 if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1111 for (k = 0; k < Elements(inputs); k++) { 1112 if (d->Semantic.Name == inputs[k].name || 1113 d->Semantic.Name == inputs[k].alternate_name) { 1114 inputs[k].enabled = true; 1115 } 1116 } 1117 } 1118 } 1119 } 1120 1121 tgsi_parse_free(&parse); 1122 1123 for (i = 0; i < Elements(inputs); i++) { 1124 boolean enabled = inputs[i].enabled; 1125 int *reg = inputs[i].reg; 1126 unsigned name = inputs[i].name; 1127 1128 if (enabled) { 1129 int gpr = gpr_offset + num_regs++; 1130 1131 // add to inputs, allocate a gpr 1132 k = ctx->shader->ninput ++; 1133 ctx->shader->input[k].name = name; 1134 ctx->shader->input[k].sid = 0; 1135 ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT; 1136 ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER; 1137 *reg = ctx->shader->input[k].gpr = gpr; 1138 } 1139 } 1140 1141 return gpr_offset + num_regs; 1142} 1143 1144/* 1145 * for evergreen we need to scan the shader to find the number of GPRs we need to 1146 * reserve for interpolation and system values 1147 * 1148 * we need to know if we are going to emit 1149 * any sample or centroid inputs 1150 * if perspective and linear are required 1151*/ 1152static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 1153{ 1154 int i; 1155 int num_baryc; 1156 struct tgsi_parse_context parse; 1157 1158 memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators)); 1159 1160 for (i = 0; i < ctx->info.num_inputs; i++) { 1161 int k; 1162 /* skip position/face/mask/sampleid */ 1163 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 1164 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE || 1165 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK || 1166 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID) 1167 continue; 1168 1169 k = eg_get_interpolator_index( 1170 ctx->info.input_interpolate[i], 1171 ctx->info.input_interpolate_loc[i]); 1172 if (k >= 0) 1173 ctx->eg_interpolators[k].enabled = TRUE; 1174 } 1175 1176 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 1177 return 0; 1178 } 1179 1180 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 1181 while (!tgsi_parse_end_of_tokens(&parse)) { 1182 tgsi_parse_token(&parse); 1183 1184 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 1185 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 1186 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 1187 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 1188 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 1189 { 1190 int interpolate, location, k; 1191 1192 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 1193 location = TGSI_INTERPOLATE_LOC_CENTER; 1194 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 1195 location = TGSI_INTERPOLATE_LOC_CENTER; 1196 } else { 1197 location = TGSI_INTERPOLATE_LOC_CENTROID; 1198 } 1199 1200 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 1201 k = eg_get_interpolator_index(interpolate, location); 1202 ctx->eg_interpolators[k].enabled = true; 1203 } 1204 } 1205 } 1206 1207 tgsi_parse_free(&parse); 1208 1209 /* assign gpr to each interpolator according to priority */ 1210 num_baryc = 0; 1211 for (i = 0; i < Elements(ctx->eg_interpolators); i++) { 1212 if (ctx->eg_interpolators[i].enabled) { 1213 ctx->eg_interpolators[i].ij_index = num_baryc; 1214 num_baryc ++; 1215 } 1216 } 1217 1218 /* XXX PULL MODEL and LINE STIPPLE */ 1219 1220 num_baryc = (num_baryc + 1) >> 1; 1221 return allocate_system_value_inputs(ctx, num_baryc); 1222} 1223 1224/* sample_id_sel == NULL means fetch for current sample */ 1225static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel) 1226{ 1227 struct r600_bytecode_vtx vtx; 1228 int r, t1; 1229 1230 assert(ctx->fixed_pt_position_gpr != -1); 1231 1232 t1 = r600_get_temp(ctx); 1233 1234 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 1235 vtx.op = FETCH_OP_VFETCH; 1236 vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 1237 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1238 if (sample_id == NULL) { 1239 vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; 1240 vtx.src_sel_x = 3; 1241 } 1242 else { 1243 struct r600_bytecode_alu alu; 1244 1245 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1246 alu.op = ALU_OP1_MOV; 1247 r600_bytecode_src(&alu.src[0], sample_id, chan_sel); 1248 alu.dst.sel = t1; 1249 alu.dst.write = 1; 1250 alu.last = 1; 1251 r = r600_bytecode_add_alu(ctx->bc, &alu); 1252 if (r) 1253 return r; 1254 1255 vtx.src_gpr = t1; 1256 vtx.src_sel_x = 0; 1257 } 1258 vtx.mega_fetch_count = 16; 1259 vtx.dst_gpr = t1; 1260 vtx.dst_sel_x = 0; 1261 vtx.dst_sel_y = 1; 1262 vtx.dst_sel_z = 2; 1263 vtx.dst_sel_w = 3; 1264 vtx.data_format = FMT_32_32_32_32_FLOAT; 1265 vtx.num_format_all = 2; 1266 vtx.format_comp_all = 1; 1267 vtx.use_const_fields = 0; 1268 vtx.offset = 1; // first element is size of buffer 1269 vtx.endian = r600_endian_swap(32); 1270 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 1271 1272 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 1273 if (r) 1274 return r; 1275 1276 return t1; 1277} 1278 1279static void tgsi_src(struct r600_shader_ctx *ctx, 1280 const struct tgsi_full_src_register *tgsi_src, 1281 struct r600_shader_src *r600_src) 1282{ 1283 memset(r600_src, 0, sizeof(*r600_src)); 1284 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 1285 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 1286 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 1287 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 1288 r600_src->neg = tgsi_src->Register.Negate; 1289 r600_src->abs = tgsi_src->Register.Absolute; 1290 1291 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 1292 int index; 1293 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 1294 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 1295 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 1296 1297 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 1298 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs); 1299 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 1300 return; 1301 } 1302 index = tgsi_src->Register.Index; 1303 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 1304 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 1305 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 1306 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) { 1307 r600_src->swizzle[0] = 2; // Z value 1308 r600_src->swizzle[1] = 2; 1309 r600_src->swizzle[2] = 2; 1310 r600_src->swizzle[3] = 2; 1311 r600_src->sel = ctx->face_gpr; 1312 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) { 1313 r600_src->swizzle[0] = 3; // W value 1314 r600_src->swizzle[1] = 3; 1315 r600_src->swizzle[2] = 3; 1316 r600_src->swizzle[3] = 3; 1317 r600_src->sel = ctx->fixed_pt_position_gpr; 1318 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) { 1319 r600_src->swizzle[0] = 0; 1320 r600_src->swizzle[1] = 1; 1321 r600_src->swizzle[2] = 4; 1322 r600_src->swizzle[3] = 4; 1323 r600_src->sel = load_sample_position(ctx, NULL, -1); 1324 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 1325 r600_src->swizzle[0] = 3; 1326 r600_src->swizzle[1] = 3; 1327 r600_src->swizzle[2] = 3; 1328 r600_src->swizzle[3] = 3; 1329 r600_src->sel = 0; 1330 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 1331 r600_src->swizzle[0] = 0; 1332 r600_src->swizzle[1] = 0; 1333 r600_src->swizzle[2] = 0; 1334 r600_src->swizzle[3] = 0; 1335 r600_src->sel = 0; 1336 } else if (ctx->type != TGSI_PROCESSOR_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1337 r600_src->swizzle[0] = 3; 1338 r600_src->swizzle[1] = 3; 1339 r600_src->swizzle[2] = 3; 1340 r600_src->swizzle[3] = 3; 1341 r600_src->sel = 1; 1342 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1343 r600_src->swizzle[0] = 2; 1344 r600_src->swizzle[1] = 2; 1345 r600_src->swizzle[2] = 2; 1346 r600_src->swizzle[3] = 2; 1347 r600_src->sel = 0; 1348 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSCOORD) { 1349 r600_src->sel = 1; 1350 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSINNER) { 1351 r600_src->sel = 3; 1352 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSOUTER) { 1353 r600_src->sel = 2; 1354 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTICESIN) { 1355 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) { 1356 r600_src->sel = ctx->tess_input_info; 1357 r600_src->swizzle[0] = 2; 1358 r600_src->swizzle[1] = 2; 1359 r600_src->swizzle[2] = 2; 1360 r600_src->swizzle[3] = 2; 1361 } else { 1362 r600_src->sel = ctx->tess_input_info; 1363 r600_src->swizzle[0] = 3; 1364 r600_src->swizzle[1] = 3; 1365 r600_src->swizzle[2] = 3; 1366 r600_src->swizzle[3] = 3; 1367 } 1368 } else if (ctx->type == TGSI_PROCESSOR_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1369 r600_src->sel = 0; 1370 r600_src->swizzle[0] = 0; 1371 r600_src->swizzle[1] = 0; 1372 r600_src->swizzle[2] = 0; 1373 r600_src->swizzle[3] = 0; 1374 } else if (ctx->type == TGSI_PROCESSOR_TESS_EVAL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1375 r600_src->sel = 0; 1376 r600_src->swizzle[0] = 3; 1377 r600_src->swizzle[1] = 3; 1378 r600_src->swizzle[2] = 3; 1379 r600_src->swizzle[3] = 3; 1380 } 1381 } else { 1382 if (tgsi_src->Register.Indirect) 1383 r600_src->rel = V_SQ_REL_RELATIVE; 1384 r600_src->sel = tgsi_src->Register.Index; 1385 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 1386 } 1387 if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { 1388 if (tgsi_src->Register.Dimension) { 1389 r600_src->kc_bank = tgsi_src->Dimension.Index; 1390 if (tgsi_src->Dimension.Indirect) { 1391 r600_src->kc_rel = 1; 1392 } 1393 } 1394 } 1395} 1396 1397static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 1398 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 1399 unsigned int dst_reg) 1400{ 1401 struct r600_bytecode_vtx vtx; 1402 unsigned int ar_reg; 1403 int r; 1404 1405 if (offset) { 1406 struct r600_bytecode_alu alu; 1407 1408 memset(&alu, 0, sizeof(alu)); 1409 1410 alu.op = ALU_OP2_ADD_INT; 1411 alu.src[0].sel = ctx->bc->ar_reg; 1412 alu.src[0].chan = ar_chan; 1413 1414 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1415 alu.src[1].value = offset; 1416 1417 alu.dst.sel = dst_reg; 1418 alu.dst.chan = ar_chan; 1419 alu.dst.write = 1; 1420 alu.last = 1; 1421 1422 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1423 return r; 1424 1425 ar_reg = dst_reg; 1426 } else { 1427 ar_reg = ctx->bc->ar_reg; 1428 } 1429 1430 memset(&vtx, 0, sizeof(vtx)); 1431 vtx.buffer_id = cb_idx; 1432 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1433 vtx.src_gpr = ar_reg; 1434 vtx.src_sel_x = ar_chan; 1435 vtx.mega_fetch_count = 16; 1436 vtx.dst_gpr = dst_reg; 1437 vtx.dst_sel_x = 0; /* SEL_X */ 1438 vtx.dst_sel_y = 1; /* SEL_Y */ 1439 vtx.dst_sel_z = 2; /* SEL_Z */ 1440 vtx.dst_sel_w = 3; /* SEL_W */ 1441 vtx.data_format = FMT_32_32_32_32_FLOAT; 1442 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 1443 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 1444 vtx.endian = r600_endian_swap(32); 1445 vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE; 1446 1447 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1448 return r; 1449 1450 return 0; 1451} 1452 1453static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1454{ 1455 struct r600_bytecode_vtx vtx; 1456 int r; 1457 unsigned index = src->Register.Index; 1458 unsigned vtx_id = src->Dimension.Index; 1459 int offset_reg = vtx_id / 3; 1460 int offset_chan = vtx_id % 3; 1461 1462 /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, 1463 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ 1464 1465 if (offset_reg == 0 && offset_chan == 2) 1466 offset_chan = 3; 1467 1468 if (src->Dimension.Indirect) { 1469 int treg[3]; 1470 int t2; 1471 struct r600_bytecode_alu alu; 1472 int r, i; 1473 1474 /* you have got to be shitting me - 1475 we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt. 1476 at least this is what fglrx seems to do. */ 1477 for (i = 0; i < 3; i++) { 1478 treg[i] = r600_get_temp(ctx); 1479 } 1480 r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F); 1481 1482 t2 = r600_get_temp(ctx); 1483 for (i = 0; i < 3; i++) { 1484 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1485 alu.op = ALU_OP1_MOV; 1486 alu.src[0].sel = 0; 1487 alu.src[0].chan = i == 2 ? 3 : i; 1488 alu.dst.sel = treg[i]; 1489 alu.dst.chan = 0; 1490 alu.dst.write = 1; 1491 alu.last = 1; 1492 r = r600_bytecode_add_alu(ctx->bc, &alu); 1493 if (r) 1494 return r; 1495 } 1496 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1497 alu.op = ALU_OP1_MOV; 1498 alu.src[0].sel = treg[0]; 1499 alu.src[0].rel = 1; 1500 alu.dst.sel = t2; 1501 alu.dst.write = 1; 1502 alu.last = 1; 1503 r = r600_bytecode_add_alu(ctx->bc, &alu); 1504 if (r) 1505 return r; 1506 offset_reg = t2; 1507 } 1508 1509 1510 memset(&vtx, 0, sizeof(vtx)); 1511 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1512 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1513 vtx.src_gpr = offset_reg; 1514 vtx.src_sel_x = offset_chan; 1515 vtx.offset = index * 16; /*bytes*/ 1516 vtx.mega_fetch_count = 16; 1517 vtx.dst_gpr = dst_reg; 1518 vtx.dst_sel_x = 0; /* SEL_X */ 1519 vtx.dst_sel_y = 1; /* SEL_Y */ 1520 vtx.dst_sel_z = 2; /* SEL_Z */ 1521 vtx.dst_sel_w = 3; /* SEL_W */ 1522 if (ctx->bc->chip_class >= EVERGREEN) { 1523 vtx.use_const_fields = 1; 1524 } else { 1525 vtx.data_format = FMT_32_32_32_32_FLOAT; 1526 } 1527 1528 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1529 return r; 1530 1531 return 0; 1532} 1533 1534static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) 1535{ 1536 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1537 int i; 1538 1539 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1540 struct tgsi_full_src_register *src = &inst->Src[i]; 1541 1542 if (src->Register.File == TGSI_FILE_INPUT) { 1543 if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) { 1544 /* primitive id is in R0.z */ 1545 ctx->src[i].sel = 0; 1546 ctx->src[i].swizzle[0] = 2; 1547 } 1548 } 1549 if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { 1550 int treg = r600_get_temp(ctx); 1551 1552 fetch_gs_input(ctx, src, treg); 1553 ctx->src[i].sel = treg; 1554 } 1555 } 1556 return 0; 1557} 1558 1559 1560/* Tessellation shaders pass outputs to the next shader using LDS. 1561 * 1562 * LS outputs = TCS(HS) inputs 1563 * TCS(HS) outputs = TES(DS) inputs 1564 * 1565 * The LDS layout is: 1566 * - TCS inputs for patch 0 1567 * - TCS inputs for patch 1 1568 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) 1569 * - ... 1570 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset 1571 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset 1572 * - TCS outputs for patch 1 1573 * - Per-patch TCS outputs for patch 1 1574 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) 1575 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) 1576 * - ... 1577 * 1578 * All three shaders VS(LS), TCS, TES share the same LDS space. 1579 */ 1580/* this will return with the dw address in temp_reg.x */ 1581static int r600_get_byte_address(struct r600_shader_ctx *ctx, int temp_reg, 1582 const struct tgsi_full_dst_register *dst, 1583 const struct tgsi_full_src_register *src, 1584 int stride_bytes_reg, int stride_bytes_chan) 1585{ 1586 struct tgsi_full_dst_register reg; 1587 ubyte *name, *index, *array_first; 1588 int r; 1589 int param; 1590 struct tgsi_shader_info *info = &ctx->info; 1591 /* Set the register description. The address computation is the same 1592 * for sources and destinations. */ 1593 if (src) { 1594 reg.Register.File = src->Register.File; 1595 reg.Register.Index = src->Register.Index; 1596 reg.Register.Indirect = src->Register.Indirect; 1597 reg.Register.Dimension = src->Register.Dimension; 1598 reg.Indirect = src->Indirect; 1599 reg.Dimension = src->Dimension; 1600 reg.DimIndirect = src->DimIndirect; 1601 } else 1602 reg = *dst; 1603 1604 /* If the register is 2-dimensional (e.g. an array of vertices 1605 * in a primitive), calculate the base address of the vertex. */ 1606 if (reg.Register.Dimension) { 1607 int sel, chan; 1608 if (reg.Dimension.Indirect) { 1609 unsigned addr_reg; 1610 assert (reg.DimIndirect.File == TGSI_FILE_ADDRESS); 1611 1612 addr_reg = get_address_file_reg(ctx, reg.DimIndirect.Index); 1613 /* pull the value from index_reg */ 1614 sel = addr_reg; 1615 chan = 0; 1616 } else { 1617 sel = V_SQ_ALU_SRC_LITERAL; 1618 chan = reg.Dimension.Index; 1619 } 1620 1621 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1622 temp_reg, 0, 1623 stride_bytes_reg, stride_bytes_chan, 1624 sel, chan, 1625 temp_reg, 0); 1626 if (r) 1627 return r; 1628 } 1629 1630 if (reg.Register.File == TGSI_FILE_INPUT) { 1631 name = info->input_semantic_name; 1632 index = info->input_semantic_index; 1633 array_first = info->input_array_first; 1634 } else if (reg.Register.File == TGSI_FILE_OUTPUT) { 1635 name = info->output_semantic_name; 1636 index = info->output_semantic_index; 1637 array_first = info->output_array_first; 1638 } else { 1639 assert(0); 1640 return -1; 1641 } 1642 if (reg.Register.Indirect) { 1643 int addr_reg; 1644 int first; 1645 /* Add the relative address of the element. */ 1646 if (reg.Indirect.ArrayID) 1647 first = array_first[reg.Indirect.ArrayID]; 1648 else 1649 first = reg.Register.Index; 1650 1651 addr_reg = get_address_file_reg(ctx, reg.Indirect.Index); 1652 1653 /* pull the value from index_reg */ 1654 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1655 temp_reg, 0, 1656 V_SQ_ALU_SRC_LITERAL, 16, 1657 addr_reg, 0, 1658 temp_reg, 0); 1659 if (r) 1660 return r; 1661 1662 param = r600_get_lds_unique_index(name[first], 1663 index[first]); 1664 1665 } else { 1666 param = r600_get_lds_unique_index(name[reg.Register.Index], 1667 index[reg.Register.Index]); 1668 } 1669 1670 /* add to base_addr - passed in temp_reg.x */ 1671 if (param) { 1672 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1673 temp_reg, 0, 1674 temp_reg, 0, 1675 V_SQ_ALU_SRC_LITERAL, param * 16); 1676 if (r) 1677 return r; 1678 1679 } 1680 return 0; 1681} 1682 1683static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 1684 unsigned dst_reg) 1685{ 1686 struct r600_bytecode_alu alu; 1687 int r, i; 1688 1689 if ((ctx->bc->cf_last->ndw>>1) >= 0x60) 1690 ctx->bc->force_add_cf = 1; 1691 for (i = 1; i < 4; i++) { 1692 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1693 temp_reg, i, 1694 temp_reg, 0, 1695 V_SQ_ALU_SRC_LITERAL, 4 * i); 1696 } 1697 for (i = 0; i < 4; i++) { 1698 /* emit an LDS_READ_RET */ 1699 memset(&alu, 0, sizeof(alu)); 1700 alu.op = LDS_OP1_LDS_READ_RET; 1701 alu.src[0].sel = temp_reg; 1702 alu.src[0].chan = i; 1703 alu.src[1].sel = V_SQ_ALU_SRC_0; 1704 alu.src[2].sel = V_SQ_ALU_SRC_0; 1705 alu.dst.chan = 0; 1706 alu.is_lds_idx_op = true; 1707 alu.last = 1; 1708 r = r600_bytecode_add_alu(ctx->bc, &alu); 1709 if (r) 1710 return r; 1711 } 1712 for (i = 0; i < 4; i++) { 1713 /* then read from LDS_OQ_A_POP */ 1714 memset(&alu, 0, sizeof(alu)); 1715 1716 alu.op = ALU_OP1_MOV; 1717 alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; 1718 alu.src[0].chan = 0; 1719 alu.dst.sel = dst_reg; 1720 alu.dst.chan = i; 1721 alu.dst.write = 1; 1722 alu.last = 1; 1723 r = r600_bytecode_add_alu(ctx->bc, &alu); 1724 if (r) 1725 return r; 1726 } 1727 return 0; 1728} 1729 1730static int fetch_tes_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1731{ 1732 int r; 1733 unsigned temp_reg = r600_get_temp(ctx); 1734 1735 r = get_lds_offset0(ctx, 2, temp_reg, 1736 src->Register.Dimension ? false : true); 1737 if (r) 1738 return r; 1739 1740 /* the base address is now in temp.x */ 1741 r = r600_get_byte_address(ctx, temp_reg, 1742 NULL, src, ctx->tess_output_info, 1); 1743 if (r) 1744 return r; 1745 1746 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1747 if (r) 1748 return r; 1749 return 0; 1750} 1751 1752static int fetch_tcs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1753{ 1754 int r; 1755 unsigned temp_reg = r600_get_temp(ctx); 1756 1757 /* t.x = ips * r0.y */ 1758 r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 1759 temp_reg, 0, 1760 ctx->tess_input_info, 0, 1761 0, 1); 1762 1763 if (r) 1764 return r; 1765 1766 /* the base address is now in temp.x */ 1767 r = r600_get_byte_address(ctx, temp_reg, 1768 NULL, src, ctx->tess_input_info, 1); 1769 if (r) 1770 return r; 1771 1772 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1773 if (r) 1774 return r; 1775 return 0; 1776} 1777 1778static int fetch_tcs_output(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1779{ 1780 int r; 1781 unsigned temp_reg = r600_get_temp(ctx); 1782 1783 r = get_lds_offset0(ctx, 1, temp_reg, 1784 src->Register.Dimension ? false : true); 1785 if (r) 1786 return r; 1787 /* the base address is now in temp.x */ 1788 r = r600_get_byte_address(ctx, temp_reg, 1789 NULL, src, 1790 ctx->tess_output_info, 1); 1791 if (r) 1792 return r; 1793 1794 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1795 if (r) 1796 return r; 1797 return 0; 1798} 1799 1800static int tgsi_split_lds_inputs(struct r600_shader_ctx *ctx) 1801{ 1802 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1803 int i; 1804 1805 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1806 struct tgsi_full_src_register *src = &inst->Src[i]; 1807 1808 if (ctx->type == TGSI_PROCESSOR_TESS_EVAL && src->Register.File == TGSI_FILE_INPUT) { 1809 int treg = r600_get_temp(ctx); 1810 fetch_tes_input(ctx, src, treg); 1811 ctx->src[i].sel = treg; 1812 ctx->src[i].rel = 0; 1813 } 1814 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL && src->Register.File == TGSI_FILE_INPUT) { 1815 int treg = r600_get_temp(ctx); 1816 fetch_tcs_input(ctx, src, treg); 1817 ctx->src[i].sel = treg; 1818 ctx->src[i].rel = 0; 1819 } 1820 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL && src->Register.File == TGSI_FILE_OUTPUT) { 1821 int treg = r600_get_temp(ctx); 1822 fetch_tcs_output(ctx, src, treg); 1823 ctx->src[i].sel = treg; 1824 ctx->src[i].rel = 0; 1825 } 1826 } 1827 return 0; 1828} 1829 1830static int tgsi_split_constant(struct r600_shader_ctx *ctx) 1831{ 1832 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1833 struct r600_bytecode_alu alu; 1834 int i, j, k, nconst, r; 1835 1836 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 1837 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 1838 nconst++; 1839 } 1840 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 1841 } 1842 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 1843 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 1844 continue; 1845 } 1846 1847 if (ctx->src[i].rel) { 1848 int chan = inst->Src[i].Indirect.Swizzle; 1849 int treg = r600_get_temp(ctx); 1850 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg))) 1851 return r; 1852 1853 ctx->src[i].kc_bank = 0; 1854 ctx->src[i].kc_rel = 0; 1855 ctx->src[i].sel = treg; 1856 ctx->src[i].rel = 0; 1857 j--; 1858 } else if (j > 0) { 1859 int treg = r600_get_temp(ctx); 1860 for (k = 0; k < 4; k++) { 1861 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1862 alu.op = ALU_OP1_MOV; 1863 alu.src[0].sel = ctx->src[i].sel; 1864 alu.src[0].chan = k; 1865 alu.src[0].rel = ctx->src[i].rel; 1866 alu.src[0].kc_bank = ctx->src[i].kc_bank; 1867 alu.src[0].kc_rel = ctx->src[i].kc_rel; 1868 alu.dst.sel = treg; 1869 alu.dst.chan = k; 1870 alu.dst.write = 1; 1871 if (k == 3) 1872 alu.last = 1; 1873 r = r600_bytecode_add_alu(ctx->bc, &alu); 1874 if (r) 1875 return r; 1876 } 1877 ctx->src[i].sel = treg; 1878 ctx->src[i].rel =0; 1879 j--; 1880 } 1881 } 1882 return 0; 1883} 1884 1885/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 1886static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 1887{ 1888 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1889 struct r600_bytecode_alu alu; 1890 int i, j, k, nliteral, r; 1891 1892 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 1893 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1894 nliteral++; 1895 } 1896 } 1897 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 1898 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1899 int treg = r600_get_temp(ctx); 1900 for (k = 0; k < 4; k++) { 1901 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1902 alu.op = ALU_OP1_MOV; 1903 alu.src[0].sel = ctx->src[i].sel; 1904 alu.src[0].chan = k; 1905 alu.src[0].value = ctx->src[i].value[k]; 1906 alu.dst.sel = treg; 1907 alu.dst.chan = k; 1908 alu.dst.write = 1; 1909 if (k == 3) 1910 alu.last = 1; 1911 r = r600_bytecode_add_alu(ctx->bc, &alu); 1912 if (r) 1913 return r; 1914 } 1915 ctx->src[i].sel = treg; 1916 j--; 1917 } 1918 } 1919 return 0; 1920} 1921 1922static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 1923{ 1924 int i, r, count = ctx->shader->ninput; 1925 1926 for (i = 0; i < count; i++) { 1927 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1928 r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); 1929 if (r) 1930 return r; 1931 } 1932 } 1933 return 0; 1934} 1935 1936static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so, 1937 int stream, unsigned *stream_item_size) 1938{ 1939 unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; 1940 unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; 1941 int i, j, r; 1942 1943 /* Sanity checking. */ 1944 if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) { 1945 R600_ERR("Too many stream outputs: %d\n", so->num_outputs); 1946 r = -EINVAL; 1947 goto out_err; 1948 } 1949 for (i = 0; i < so->num_outputs; i++) { 1950 if (so->output[i].output_buffer >= 4) { 1951 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", 1952 so->output[i].output_buffer); 1953 r = -EINVAL; 1954 goto out_err; 1955 } 1956 } 1957 1958 /* Initialize locations where the outputs are stored. */ 1959 for (i = 0; i < so->num_outputs; i++) { 1960 1961 so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr; 1962 start_comp[i] = so->output[i].start_component; 1963 /* Lower outputs with dst_offset < start_component. 1964 * 1965 * We can only output 4D vectors with a write mask, e.g. we can 1966 * only output the W component at offset 3, etc. If we want 1967 * to store Y, Z, or W at buffer offset 0, we need to use MOV 1968 * to move it to X and output X. */ 1969 if (so->output[i].dst_offset < so->output[i].start_component) { 1970 unsigned tmp = r600_get_temp(ctx); 1971 1972 for (j = 0; j < so->output[i].num_components; j++) { 1973 struct r600_bytecode_alu alu; 1974 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1975 alu.op = ALU_OP1_MOV; 1976 alu.src[0].sel = so_gpr[i]; 1977 alu.src[0].chan = so->output[i].start_component + j; 1978 1979 alu.dst.sel = tmp; 1980 alu.dst.chan = j; 1981 alu.dst.write = 1; 1982 if (j == so->output[i].num_components - 1) 1983 alu.last = 1; 1984 r = r600_bytecode_add_alu(ctx->bc, &alu); 1985 if (r) 1986 return r; 1987 } 1988 start_comp[i] = 0; 1989 so_gpr[i] = tmp; 1990 } 1991 } 1992 1993 /* Write outputs to buffers. */ 1994 for (i = 0; i < so->num_outputs; i++) { 1995 struct r600_bytecode_output output; 1996 1997 if (stream != -1 && stream != so->output[i].output_buffer) 1998 continue; 1999 2000 memset(&output, 0, sizeof(struct r600_bytecode_output)); 2001 output.gpr = so_gpr[i]; 2002 output.elem_size = so->output[i].num_components - 1; 2003 if (output.elem_size == 2) 2004 output.elem_size = 3; // 3 not supported, write 4 with junk at end 2005 output.array_base = so->output[i].dst_offset - start_comp[i]; 2006 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2007 output.burst_count = 1; 2008 /* array_size is an upper limit for the burst_count 2009 * with MEM_STREAM instructions */ 2010 output.array_size = 0xFFF; 2011 output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i]; 2012 2013 if (ctx->bc->chip_class >= EVERGREEN) { 2014 switch (so->output[i].output_buffer) { 2015 case 0: 2016 output.op = CF_OP_MEM_STREAM0_BUF0; 2017 break; 2018 case 1: 2019 output.op = CF_OP_MEM_STREAM0_BUF1; 2020 break; 2021 case 2: 2022 output.op = CF_OP_MEM_STREAM0_BUF2; 2023 break; 2024 case 3: 2025 output.op = CF_OP_MEM_STREAM0_BUF3; 2026 break; 2027 } 2028 output.op += so->output[i].stream * 4; 2029 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); 2030 ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4; 2031 } else { 2032 switch (so->output[i].output_buffer) { 2033 case 0: 2034 output.op = CF_OP_MEM_STREAM0; 2035 break; 2036 case 1: 2037 output.op = CF_OP_MEM_STREAM1; 2038 break; 2039 case 2: 2040 output.op = CF_OP_MEM_STREAM2; 2041 break; 2042 case 3: 2043 output.op = CF_OP_MEM_STREAM3; 2044 break; 2045 } 2046 ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer; 2047 } 2048 r = r600_bytecode_add_output(ctx->bc, &output); 2049 if (r) 2050 goto out_err; 2051 } 2052 return 0; 2053out_err: 2054 return r; 2055} 2056 2057static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) 2058{ 2059 struct r600_bytecode_alu alu; 2060 unsigned reg; 2061 2062 if (!ctx->shader->vs_out_edgeflag) 2063 return; 2064 2065 reg = ctx->shader->output[ctx->edgeflag_output].gpr; 2066 2067 /* clamp(x, 0, 1) */ 2068 memset(&alu, 0, sizeof(alu)); 2069 alu.op = ALU_OP1_MOV; 2070 alu.src[0].sel = reg; 2071 alu.dst.sel = reg; 2072 alu.dst.write = 1; 2073 alu.dst.clamp = 1; 2074 alu.last = 1; 2075 r600_bytecode_add_alu(ctx->bc, &alu); 2076 2077 memset(&alu, 0, sizeof(alu)); 2078 alu.op = ALU_OP1_FLT_TO_INT; 2079 alu.src[0].sel = reg; 2080 alu.dst.sel = reg; 2081 alu.dst.write = 1; 2082 alu.last = 1; 2083 r600_bytecode_add_alu(ctx->bc, &alu); 2084} 2085 2086static int generate_gs_copy_shader(struct r600_context *rctx, 2087 struct r600_pipe_shader *gs, 2088 struct pipe_stream_output_info *so) 2089{ 2090 struct r600_shader_ctx ctx = {}; 2091 struct r600_shader *gs_shader = &gs->shader; 2092 struct r600_pipe_shader *cshader; 2093 int ocnt = gs_shader->noutput; 2094 struct r600_bytecode_alu alu; 2095 struct r600_bytecode_vtx vtx; 2096 struct r600_bytecode_output output; 2097 struct r600_bytecode_cf *cf_jump, *cf_pop, 2098 *last_exp_pos = NULL, *last_exp_param = NULL; 2099 int i, j, next_clip_pos = 61, next_param = 0; 2100 int ring; 2101 2102 cshader = calloc(1, sizeof(struct r600_pipe_shader)); 2103 if (!cshader) 2104 return 0; 2105 2106 memcpy(cshader->shader.output, gs_shader->output, ocnt * 2107 sizeof(struct r600_shader_io)); 2108 2109 cshader->shader.noutput = ocnt; 2110 2111 ctx.shader = &cshader->shader; 2112 ctx.bc = &ctx.shader->bc; 2113 ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX; 2114 2115 r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, 2116 rctx->screen->has_compressed_msaa_texturing); 2117 2118 ctx.bc->isa = rctx->isa; 2119 2120 cf_jump = NULL; 2121 memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes)); 2122 2123 /* R0.x = R0.x & 0x3fffffff */ 2124 memset(&alu, 0, sizeof(alu)); 2125 alu.op = ALU_OP2_AND_INT; 2126 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2127 alu.src[1].value = 0x3fffffff; 2128 alu.dst.write = 1; 2129 r600_bytecode_add_alu(ctx.bc, &alu); 2130 2131 /* R0.y = R0.x >> 30 */ 2132 memset(&alu, 0, sizeof(alu)); 2133 alu.op = ALU_OP2_LSHR_INT; 2134 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2135 alu.src[1].value = 0x1e; 2136 alu.dst.chan = 1; 2137 alu.dst.write = 1; 2138 alu.last = 1; 2139 r600_bytecode_add_alu(ctx.bc, &alu); 2140 2141 /* fetch vertex data from GSVS ring */ 2142 for (i = 0; i < ocnt; ++i) { 2143 struct r600_shader_io *out = &ctx.shader->output[i]; 2144 2145 out->gpr = i + 1; 2146 out->ring_offset = i * 16; 2147 2148 memset(&vtx, 0, sizeof(vtx)); 2149 vtx.op = FETCH_OP_VFETCH; 2150 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 2151 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2152 vtx.mega_fetch_count = 16; 2153 vtx.offset = out->ring_offset; 2154 vtx.dst_gpr = out->gpr; 2155 vtx.src_gpr = 0; 2156 vtx.dst_sel_x = 0; 2157 vtx.dst_sel_y = 1; 2158 vtx.dst_sel_z = 2; 2159 vtx.dst_sel_w = 3; 2160 if (rctx->b.chip_class >= EVERGREEN) { 2161 vtx.use_const_fields = 1; 2162 } else { 2163 vtx.data_format = FMT_32_32_32_32_FLOAT; 2164 } 2165 2166 r600_bytecode_add_vtx(ctx.bc, &vtx); 2167 } 2168 ctx.temp_reg = i + 1; 2169 for (ring = 3; ring >= 0; --ring) { 2170 bool enabled = false; 2171 for (i = 0; i < so->num_outputs; i++) { 2172 if (so->output[i].stream == ring) { 2173 enabled = true; 2174 break; 2175 } 2176 } 2177 if (ring != 0 && !enabled) { 2178 cshader->shader.ring_item_sizes[ring] = 0; 2179 continue; 2180 } 2181 2182 if (cf_jump) { 2183 // Patch up jump label 2184 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2185 cf_pop = ctx.bc->cf_last; 2186 2187 cf_jump->cf_addr = cf_pop->id + 2; 2188 cf_jump->pop_count = 1; 2189 cf_pop->cf_addr = cf_pop->id + 2; 2190 cf_pop->pop_count = 1; 2191 } 2192 2193 /* PRED_SETE_INT __, R0.y, ring */ 2194 memset(&alu, 0, sizeof(alu)); 2195 alu.op = ALU_OP2_PRED_SETE_INT; 2196 alu.src[0].chan = 1; 2197 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2198 alu.src[1].value = ring; 2199 alu.execute_mask = 1; 2200 alu.update_pred = 1; 2201 alu.last = 1; 2202 r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); 2203 2204 r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); 2205 cf_jump = ctx.bc->cf_last; 2206 2207 if (enabled) 2208 emit_streamout(&ctx, so, ring, &cshader->shader.ring_item_sizes[ring]); 2209 cshader->shader.ring_item_sizes[ring] = ocnt * 16; 2210 } 2211 2212 /* bc adds nops - copy it */ 2213 if (ctx.bc->chip_class == R600) { 2214 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2215 alu.op = ALU_OP0_NOP; 2216 alu.last = 1; 2217 r600_bytecode_add_alu(ctx.bc, &alu); 2218 2219 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2220 } 2221 2222 /* export vertex data */ 2223 /* XXX factor out common code with r600_shader_from_tgsi ? */ 2224 for (i = 0; i < ocnt; ++i) { 2225 struct r600_shader_io *out = &ctx.shader->output[i]; 2226 bool instream0 = true; 2227 if (out->name == TGSI_SEMANTIC_CLIPVERTEX) 2228 continue; 2229 2230 for (j = 0; j < so->num_outputs; j++) { 2231 if (so->output[j].register_index == i) { 2232 if (so->output[j].stream == 0) 2233 break; 2234 if (so->output[j].stream > 0) 2235 instream0 = false; 2236 } 2237 } 2238 if (!instream0) 2239 continue; 2240 memset(&output, 0, sizeof(output)); 2241 output.gpr = out->gpr; 2242 output.elem_size = 3; 2243 output.swizzle_x = 0; 2244 output.swizzle_y = 1; 2245 output.swizzle_z = 2; 2246 output.swizzle_w = 3; 2247 output.burst_count = 1; 2248 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2249 output.op = CF_OP_EXPORT; 2250 switch (out->name) { 2251 case TGSI_SEMANTIC_POSITION: 2252 output.array_base = 60; 2253 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2254 break; 2255 2256 case TGSI_SEMANTIC_PSIZE: 2257 output.array_base = 61; 2258 if (next_clip_pos == 61) 2259 next_clip_pos = 62; 2260 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2261 output.swizzle_y = 7; 2262 output.swizzle_z = 7; 2263 output.swizzle_w = 7; 2264 ctx.shader->vs_out_misc_write = 1; 2265 ctx.shader->vs_out_point_size = 1; 2266 break; 2267 case TGSI_SEMANTIC_LAYER: 2268 if (out->spi_sid) { 2269 /* duplicate it as PARAM to pass to the pixel shader */ 2270 output.array_base = next_param++; 2271 r600_bytecode_add_output(ctx.bc, &output); 2272 last_exp_param = ctx.bc->cf_last; 2273 } 2274 output.array_base = 61; 2275 if (next_clip_pos == 61) 2276 next_clip_pos = 62; 2277 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2278 output.swizzle_x = 7; 2279 output.swizzle_y = 7; 2280 output.swizzle_z = 0; 2281 output.swizzle_w = 7; 2282 ctx.shader->vs_out_misc_write = 1; 2283 ctx.shader->vs_out_layer = 1; 2284 break; 2285 case TGSI_SEMANTIC_VIEWPORT_INDEX: 2286 if (out->spi_sid) { 2287 /* duplicate it as PARAM to pass to the pixel shader */ 2288 output.array_base = next_param++; 2289 r600_bytecode_add_output(ctx.bc, &output); 2290 last_exp_param = ctx.bc->cf_last; 2291 } 2292 output.array_base = 61; 2293 if (next_clip_pos == 61) 2294 next_clip_pos = 62; 2295 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2296 ctx.shader->vs_out_misc_write = 1; 2297 ctx.shader->vs_out_viewport = 1; 2298 output.swizzle_x = 7; 2299 output.swizzle_y = 7; 2300 output.swizzle_z = 7; 2301 output.swizzle_w = 0; 2302 break; 2303 case TGSI_SEMANTIC_CLIPDIST: 2304 /* spi_sid is 0 for clipdistance outputs that were generated 2305 * for clipvertex - we don't need to pass them to PS */ 2306 ctx.shader->clip_dist_write = gs->shader.clip_dist_write; 2307 if (out->spi_sid) { 2308 /* duplicate it as PARAM to pass to the pixel shader */ 2309 output.array_base = next_param++; 2310 r600_bytecode_add_output(ctx.bc, &output); 2311 last_exp_param = ctx.bc->cf_last; 2312 } 2313 output.array_base = next_clip_pos++; 2314 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2315 break; 2316 case TGSI_SEMANTIC_FOG: 2317 output.swizzle_y = 4; /* 0 */ 2318 output.swizzle_z = 4; /* 0 */ 2319 output.swizzle_w = 5; /* 1 */ 2320 break; 2321 default: 2322 output.array_base = next_param++; 2323 break; 2324 } 2325 r600_bytecode_add_output(ctx.bc, &output); 2326 if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) 2327 last_exp_param = ctx.bc->cf_last; 2328 else 2329 last_exp_pos = ctx.bc->cf_last; 2330 } 2331 2332 if (!last_exp_pos) { 2333 memset(&output, 0, sizeof(output)); 2334 output.gpr = 0; 2335 output.elem_size = 3; 2336 output.swizzle_x = 7; 2337 output.swizzle_y = 7; 2338 output.swizzle_z = 7; 2339 output.swizzle_w = 7; 2340 output.burst_count = 1; 2341 output.type = 2; 2342 output.op = CF_OP_EXPORT; 2343 output.array_base = 60; 2344 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2345 r600_bytecode_add_output(ctx.bc, &output); 2346 last_exp_pos = ctx.bc->cf_last; 2347 } 2348 2349 if (!last_exp_param) { 2350 memset(&output, 0, sizeof(output)); 2351 output.gpr = 0; 2352 output.elem_size = 3; 2353 output.swizzle_x = 7; 2354 output.swizzle_y = 7; 2355 output.swizzle_z = 7; 2356 output.swizzle_w = 7; 2357 output.burst_count = 1; 2358 output.type = 2; 2359 output.op = CF_OP_EXPORT; 2360 output.array_base = next_param++; 2361 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2362 r600_bytecode_add_output(ctx.bc, &output); 2363 last_exp_param = ctx.bc->cf_last; 2364 } 2365 2366 last_exp_pos->op = CF_OP_EXPORT_DONE; 2367 last_exp_param->op = CF_OP_EXPORT_DONE; 2368 2369 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2370 cf_pop = ctx.bc->cf_last; 2371 2372 cf_jump->cf_addr = cf_pop->id + 2; 2373 cf_jump->pop_count = 1; 2374 cf_pop->cf_addr = cf_pop->id + 2; 2375 cf_pop->pop_count = 1; 2376 2377 if (ctx.bc->chip_class == CAYMAN) 2378 cm_bytecode_add_cf_end(ctx.bc); 2379 else { 2380 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2381 ctx.bc->cf_last->end_of_program = 1; 2382 } 2383 2384 gs->gs_copy_shader = cshader; 2385 cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 2386 2387 ctx.bc->nstack = 1; 2388 2389 return r600_bytecode_build(ctx.bc); 2390} 2391 2392static int emit_inc_ring_offset(struct r600_shader_ctx *ctx, int idx, bool ind) 2393{ 2394 if (ind) { 2395 struct r600_bytecode_alu alu; 2396 int r; 2397 2398 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2399 alu.op = ALU_OP2_ADD_INT; 2400 alu.src[0].sel = ctx->gs_export_gpr_tregs[idx]; 2401 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2402 alu.src[1].value = ctx->gs_out_ring_offset >> 4; 2403 alu.dst.sel = ctx->gs_export_gpr_tregs[idx]; 2404 alu.dst.write = 1; 2405 alu.last = 1; 2406 r = r600_bytecode_add_alu(ctx->bc, &alu); 2407 if (r) 2408 return r; 2409 } 2410 return 0; 2411} 2412 2413static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind) 2414{ 2415 struct r600_bytecode_output output; 2416 int i, k, ring_offset; 2417 int effective_stream = stream == -1 ? 0 : stream; 2418 int idx = 0; 2419 2420 for (i = 0; i < ctx->shader->noutput; i++) { 2421 if (ctx->gs_for_vs) { 2422 /* for ES we need to lookup corresponding ring offset expected by GS 2423 * (map this output to GS input by name and sid) */ 2424 /* FIXME precompute offsets */ 2425 ring_offset = -1; 2426 for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { 2427 struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; 2428 struct r600_shader_io *out = &ctx->shader->output[i]; 2429 if (in->name == out->name && in->sid == out->sid) 2430 ring_offset = in->ring_offset; 2431 } 2432 2433 if (ring_offset == -1) 2434 continue; 2435 } else { 2436 ring_offset = idx * 16; 2437 idx++; 2438 } 2439 2440 if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) 2441 continue; 2442 /* next_ring_offset after parsing input decls contains total size of 2443 * single vertex data, gs_next_vertex - current vertex index */ 2444 if (!ind) 2445 ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex; 2446 2447 memset(&output, 0, sizeof(struct r600_bytecode_output)); 2448 output.gpr = ctx->shader->output[i].gpr; 2449 output.elem_size = 3; 2450 output.comp_mask = 0xF; 2451 output.burst_count = 1; 2452 2453 if (ind) 2454 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 2455 else 2456 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2457 2458 switch (stream) { 2459 default: 2460 case 0: 2461 output.op = CF_OP_MEM_RING; break; 2462 case 1: 2463 output.op = CF_OP_MEM_RING1; break; 2464 case 2: 2465 output.op = CF_OP_MEM_RING2; break; 2466 case 3: 2467 output.op = CF_OP_MEM_RING3; break; 2468 } 2469 2470 if (ind) { 2471 output.array_base = ring_offset >> 2; /* in dwords */ 2472 output.array_size = 0xfff; 2473 output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream]; 2474 } else 2475 output.array_base = ring_offset >> 2; /* in dwords */ 2476 r600_bytecode_add_output(ctx->bc, &output); 2477 } 2478 2479 ++ctx->gs_next_vertex; 2480 return 0; 2481} 2482 2483 2484static int r600_fetch_tess_io_info(struct r600_shader_ctx *ctx) 2485{ 2486 int r; 2487 struct r600_bytecode_vtx vtx; 2488 int temp_val = ctx->temp_reg; 2489 /* need to store the TCS output somewhere */ 2490 r = single_alu_op2(ctx, ALU_OP1_MOV, 2491 temp_val, 0, 2492 V_SQ_ALU_SRC_LITERAL, 0, 2493 0, 0); 2494 if (r) 2495 return r; 2496 2497 /* used by VS/TCS */ 2498 if (ctx->tess_input_info) { 2499 /* fetch tcs input values into resv space */ 2500 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2501 vtx.op = FETCH_OP_VFETCH; 2502 vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2503 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2504 vtx.mega_fetch_count = 16; 2505 vtx.data_format = FMT_32_32_32_32; 2506 vtx.num_format_all = 2; 2507 vtx.format_comp_all = 1; 2508 vtx.use_const_fields = 0; 2509 vtx.endian = r600_endian_swap(32); 2510 vtx.srf_mode_all = 1; 2511 vtx.offset = 0; 2512 vtx.dst_gpr = ctx->tess_input_info; 2513 vtx.dst_sel_x = 0; 2514 vtx.dst_sel_y = 1; 2515 vtx.dst_sel_z = 2; 2516 vtx.dst_sel_w = 3; 2517 vtx.src_gpr = temp_val; 2518 vtx.src_sel_x = 0; 2519 2520 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2521 if (r) 2522 return r; 2523 } 2524 2525 /* used by TCS/TES */ 2526 if (ctx->tess_output_info) { 2527 /* fetch tcs output values into resv space */ 2528 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2529 vtx.op = FETCH_OP_VFETCH; 2530 vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2531 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2532 vtx.mega_fetch_count = 16; 2533 vtx.data_format = FMT_32_32_32_32; 2534 vtx.num_format_all = 2; 2535 vtx.format_comp_all = 1; 2536 vtx.use_const_fields = 0; 2537 vtx.endian = r600_endian_swap(32); 2538 vtx.srf_mode_all = 1; 2539 vtx.offset = 16; 2540 vtx.dst_gpr = ctx->tess_output_info; 2541 vtx.dst_sel_x = 0; 2542 vtx.dst_sel_y = 1; 2543 vtx.dst_sel_z = 2; 2544 vtx.dst_sel_w = 3; 2545 vtx.src_gpr = temp_val; 2546 vtx.src_sel_x = 0; 2547 2548 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2549 if (r) 2550 return r; 2551 } 2552 return 0; 2553} 2554 2555static int emit_lds_vs_writes(struct r600_shader_ctx *ctx) 2556{ 2557 int i, j, r; 2558 int temp_reg; 2559 2560 /* fetch tcs input values into input_vals */ 2561 ctx->tess_input_info = r600_get_temp(ctx); 2562 ctx->tess_output_info = 0; 2563 r = r600_fetch_tess_io_info(ctx); 2564 if (r) 2565 return r; 2566 2567 temp_reg = r600_get_temp(ctx); 2568 /* dst reg contains LDS address stride * idx */ 2569 /* MUL vertexID, vertex_dw_stride */ 2570 r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 2571 temp_reg, 0, 2572 ctx->tess_input_info, 1, 2573 0, 1); /* rel id in r0.y? */ 2574 if (r) 2575 return r; 2576 2577 for (i = 0; i < ctx->shader->noutput; i++) { 2578 struct r600_bytecode_alu alu; 2579 int param = r600_get_lds_unique_index(ctx->shader->output[i].name, ctx->shader->output[i].sid); 2580 2581 if (param) { 2582 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2583 temp_reg, 1, 2584 temp_reg, 0, 2585 V_SQ_ALU_SRC_LITERAL, param * 16); 2586 if (r) 2587 return r; 2588 } 2589 2590 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2591 temp_reg, 2, 2592 temp_reg, param ? 1 : 0, 2593 V_SQ_ALU_SRC_LITERAL, 8); 2594 if (r) 2595 return r; 2596 2597 2598 for (j = 0; j < 2; j++) { 2599 int chan = (j == 1) ? 2 : (param ? 1 : 0); 2600 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2601 alu.op = LDS_OP3_LDS_WRITE_REL; 2602 alu.src[0].sel = temp_reg; 2603 alu.src[0].chan = chan; 2604 alu.src[1].sel = ctx->shader->output[i].gpr; 2605 alu.src[1].chan = j * 2; 2606 alu.src[2].sel = ctx->shader->output[i].gpr; 2607 alu.src[2].chan = (j * 2) + 1; 2608 alu.last = 1; 2609 alu.dst.chan = 0; 2610 alu.lds_idx = 1; 2611 alu.is_lds_idx_op = true; 2612 r = r600_bytecode_add_alu(ctx->bc, &alu); 2613 if (r) 2614 return r; 2615 } 2616 } 2617 return 0; 2618} 2619 2620static int r600_store_tcs_output(struct r600_shader_ctx *ctx) 2621{ 2622 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2623 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 2624 int i, r, lasti; 2625 int temp_reg = r600_get_temp(ctx); 2626 struct r600_bytecode_alu alu; 2627 unsigned write_mask = dst->Register.WriteMask; 2628 2629 if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT) 2630 return 0; 2631 2632 r = get_lds_offset0(ctx, 1, temp_reg, dst->Register.Dimension ? false : true); 2633 if (r) 2634 return r; 2635 2636 /* the base address is now in temp.x */ 2637 r = r600_get_byte_address(ctx, temp_reg, 2638 &inst->Dst[0], NULL, ctx->tess_output_info, 1); 2639 if (r) 2640 return r; 2641 2642 /* LDS write */ 2643 lasti = tgsi_last_instruction(write_mask); 2644 for (i = 1; i <= lasti; i++) { 2645 2646 if (!(write_mask & (1 << i))) 2647 continue; 2648 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2649 temp_reg, i, 2650 temp_reg, 0, 2651 V_SQ_ALU_SRC_LITERAL, 4 * i); 2652 if (r) 2653 return r; 2654 } 2655 2656 for (i = 0; i <= lasti; i++) { 2657 if (!(write_mask & (1 << i))) 2658 continue; 2659 2660 if ((i == 0 && ((write_mask & 3) == 3)) || 2661 (i == 2 && ((write_mask & 0xc) == 0xc))) { 2662 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2663 alu.op = LDS_OP3_LDS_WRITE_REL; 2664 alu.src[0].sel = temp_reg; 2665 alu.src[0].chan = i; 2666 2667 alu.src[1].sel = dst->Register.Index; 2668 alu.src[1].sel += ctx->file_offset[dst->Register.File]; 2669 alu.src[1].chan = i; 2670 2671 alu.src[2].sel = dst->Register.Index; 2672 alu.src[2].sel += ctx->file_offset[dst->Register.File]; 2673 alu.src[2].chan = i + 1; 2674 alu.lds_idx = 1; 2675 alu.dst.chan = 0; 2676 alu.last = 1; 2677 alu.is_lds_idx_op = true; 2678 r = r600_bytecode_add_alu(ctx->bc, &alu); 2679 if (r) 2680 return r; 2681 i += 1; 2682 continue; 2683 } 2684 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2685 alu.op = LDS_OP2_LDS_WRITE; 2686 alu.src[0].sel = temp_reg; 2687 alu.src[0].chan = i; 2688 2689 alu.src[1].sel = dst->Register.Index; 2690 alu.src[1].sel += ctx->file_offset[dst->Register.File]; 2691 alu.src[1].chan = i; 2692 2693 alu.src[2].sel = V_SQ_ALU_SRC_0; 2694 alu.dst.chan = 0; 2695 alu.last = 1; 2696 alu.is_lds_idx_op = true; 2697 r = r600_bytecode_add_alu(ctx->bc, &alu); 2698 if (r) 2699 return r; 2700 } 2701 return 0; 2702} 2703 2704static int r600_tess_factor_read(struct r600_shader_ctx *ctx, 2705 int output_idx) 2706{ 2707 int param; 2708 unsigned temp_reg = r600_get_temp(ctx); 2709 unsigned name = ctx->shader->output[output_idx].name; 2710 int dreg = ctx->shader->output[output_idx].gpr; 2711 int r; 2712 2713 param = r600_get_lds_unique_index(name, 0); 2714 r = get_lds_offset0(ctx, 1, temp_reg, true); 2715 if (r) 2716 return r; 2717 2718 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2719 temp_reg, 0, 2720 temp_reg, 0, 2721 V_SQ_ALU_SRC_LITERAL, param * 16); 2722 if (r) 2723 return r; 2724 2725 do_lds_fetch_values(ctx, temp_reg, dreg); 2726 return 0; 2727} 2728 2729static int r600_emit_tess_factor(struct r600_shader_ctx *ctx) 2730{ 2731 int i; 2732 int stride, outer_comps, inner_comps; 2733 int tessinner_idx = -1, tessouter_idx = -1; 2734 int r; 2735 int temp_reg = r600_get_temp(ctx); 2736 int treg[3] = {-1, -1, -1}; 2737 struct r600_bytecode_alu alu; 2738 struct r600_bytecode_cf *cf_jump, *cf_pop; 2739 2740 /* only execute factor emission for invocation 0 */ 2741 /* PRED_SETE_INT __, R0.x, 0 */ 2742 memset(&alu, 0, sizeof(alu)); 2743 alu.op = ALU_OP2_PRED_SETE_INT; 2744 alu.src[0].chan = 2; 2745 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2746 alu.execute_mask = 1; 2747 alu.update_pred = 1; 2748 alu.last = 1; 2749 r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE); 2750 2751 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 2752 cf_jump = ctx->bc->cf_last; 2753 2754 treg[0] = r600_get_temp(ctx); 2755 switch (ctx->shader->tcs_prim_mode) { 2756 case PIPE_PRIM_LINES: 2757 stride = 8; /* 2 dwords, 1 vec2 store */ 2758 outer_comps = 2; 2759 inner_comps = 0; 2760 break; 2761 case PIPE_PRIM_TRIANGLES: 2762 stride = 16; /* 4 dwords, 1 vec4 store */ 2763 outer_comps = 3; 2764 inner_comps = 1; 2765 treg[1] = r600_get_temp(ctx); 2766 break; 2767 case PIPE_PRIM_QUADS: 2768 stride = 24; /* 6 dwords, 2 stores (vec4 + vec2) */ 2769 outer_comps = 4; 2770 inner_comps = 2; 2771 treg[1] = r600_get_temp(ctx); 2772 treg[2] = r600_get_temp(ctx); 2773 break; 2774 default: 2775 assert(0); 2776 return -1; 2777 } 2778 2779 /* R0 is InvocationID, RelPatchID, PatchID, tf_base */ 2780 /* TF_WRITE takes index in R.x, value in R.y */ 2781 for (i = 0; i < ctx->shader->noutput; i++) { 2782 if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSINNER) 2783 tessinner_idx = i; 2784 if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSOUTER) 2785 tessouter_idx = i; 2786 } 2787 2788 if (tessouter_idx == -1) 2789 return -1; 2790 2791 if (tessinner_idx == -1 && inner_comps) 2792 return -1; 2793 2794 if (tessouter_idx != -1) { 2795 r = r600_tess_factor_read(ctx, tessouter_idx); 2796 if (r) 2797 return r; 2798 } 2799 2800 if (tessinner_idx != -1) { 2801 r = r600_tess_factor_read(ctx, tessinner_idx); 2802 if (r) 2803 return r; 2804 } 2805 2806 /* r.x = tf_base(r0.w) + relpatchid(r0.y) * tf_stride */ 2807 /* r.x = relpatchid(r0.y) * tf_stride */ 2808 2809 /* multiply incoming r0.y * stride - t.x = r0.y * stride */ 2810 /* add incoming r0.w to it: t.x = t.x + r0.w */ 2811 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 2812 temp_reg, 0, 2813 0, 1, 2814 V_SQ_ALU_SRC_LITERAL, stride, 2815 0, 3); 2816 if (r) 2817 return r; 2818 2819 for (i = 0; i < outer_comps + inner_comps; i++) { 2820 int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx; 2821 int out_comp = i >= outer_comps ? i - outer_comps : i; 2822 2823 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2824 treg[i / 2], (2 * (i % 2)), 2825 temp_reg, 0, 2826 V_SQ_ALU_SRC_LITERAL, 4 * i); 2827 if (r) 2828 return r; 2829 r = single_alu_op2(ctx, ALU_OP1_MOV, 2830 treg[i / 2], 1 + (2 * (i%2)), 2831 ctx->shader->output[out_idx].gpr, out_comp, 2832 0, 0); 2833 if (r) 2834 return r; 2835 } 2836 for (i = 0; i < outer_comps + inner_comps; i++) { 2837 struct r600_bytecode_gds gds; 2838 2839 memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 2840 gds.src_gpr = treg[i / 2]; 2841 gds.src_sel_x = 2 * (i % 2); 2842 gds.src_sel_y = 1 + (2 * (i % 2)); 2843 gds.src_sel_z = 4; 2844 gds.dst_sel_x = 7; 2845 gds.dst_sel_y = 7; 2846 gds.dst_sel_z = 7; 2847 gds.dst_sel_w = 7; 2848 gds.op = FETCH_OP_TF_WRITE; 2849 r = r600_bytecode_add_gds(ctx->bc, &gds); 2850 if (r) 2851 return r; 2852 } 2853 2854 // Patch up jump label 2855 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 2856 cf_pop = ctx->bc->cf_last; 2857 2858 cf_jump->cf_addr = cf_pop->id + 2; 2859 cf_jump->pop_count = 1; 2860 cf_pop->cf_addr = cf_pop->id + 2; 2861 cf_pop->pop_count = 1; 2862 2863 return 0; 2864} 2865 2866static int r600_shader_from_tgsi(struct r600_context *rctx, 2867 struct r600_pipe_shader *pipeshader, 2868 union r600_shader_key key) 2869{ 2870 struct r600_screen *rscreen = rctx->screen; 2871 struct r600_shader *shader = &pipeshader->shader; 2872 struct tgsi_token *tokens = pipeshader->selector->tokens; 2873 struct pipe_stream_output_info so = pipeshader->selector->so; 2874 struct tgsi_full_immediate *immediate; 2875 struct r600_shader_ctx ctx; 2876 struct r600_bytecode_output output[32]; 2877 unsigned output_done, noutput; 2878 unsigned opcode; 2879 int i, j, k, r = 0; 2880 int next_param_base = 0, next_clip_base; 2881 int max_color_exports = MAX2(key.ps.nr_cbufs, 1); 2882 /* Declarations used by llvm code */ 2883 bool use_llvm = false; 2884 bool indirect_gprs; 2885 bool ring_outputs = false; 2886 bool lds_outputs = false; 2887 bool lds_inputs = false; 2888 bool pos_emitted = false; 2889 2890#ifdef R600_USE_LLVM 2891 use_llvm = rscreen->b.debug_flags & DBG_LLVM; 2892#endif 2893 ctx.bc = &shader->bc; 2894 ctx.shader = shader; 2895 ctx.native_integers = true; 2896 2897 2898 r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, 2899 rscreen->has_compressed_msaa_texturing); 2900 ctx.tokens = tokens; 2901 tgsi_scan_shader(tokens, &ctx.info); 2902 shader->indirect_files = ctx.info.indirect_files; 2903 2904 shader->uses_doubles = ctx.info.uses_doubles; 2905 2906 indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER)); 2907 tgsi_parse_init(&ctx.parse, tokens); 2908 ctx.type = ctx.info.processor; 2909 shader->processor_type = ctx.type; 2910 ctx.bc->type = shader->processor_type; 2911 2912 switch (ctx.type) { 2913 case TGSI_PROCESSOR_VERTEX: 2914 shader->vs_as_gs_a = key.vs.as_gs_a; 2915 shader->vs_as_es = key.vs.as_es; 2916 shader->vs_as_ls = key.vs.as_ls; 2917 if (shader->vs_as_es) 2918 ring_outputs = true; 2919 if (shader->vs_as_ls) 2920 lds_outputs = true; 2921 break; 2922 case TGSI_PROCESSOR_GEOMETRY: 2923 ring_outputs = true; 2924 break; 2925 case TGSI_PROCESSOR_TESS_CTRL: 2926 shader->tcs_prim_mode = key.tcs.prim_mode; 2927 lds_outputs = true; 2928 lds_inputs = true; 2929 break; 2930 case TGSI_PROCESSOR_TESS_EVAL: 2931 shader->tes_as_es = key.tes.as_es; 2932 lds_inputs = true; 2933 if (shader->tes_as_es) 2934 ring_outputs = true; 2935 break; 2936 case TGSI_PROCESSOR_FRAGMENT: 2937 shader->two_side = key.ps.color_two_side; 2938 break; 2939 default: 2940 break; 2941 } 2942 2943 if (shader->vs_as_es || shader->tes_as_es) { 2944 ctx.gs_for_vs = &rctx->gs_shader->current->shader; 2945 } else { 2946 ctx.gs_for_vs = NULL; 2947 } 2948 2949 ctx.next_ring_offset = 0; 2950 ctx.gs_out_ring_offset = 0; 2951 ctx.gs_next_vertex = 0; 2952 ctx.gs_stream_output_info = &so; 2953 2954 ctx.face_gpr = -1; 2955 ctx.fixed_pt_position_gpr = -1; 2956 ctx.fragcoord_input = -1; 2957 ctx.colors_used = 0; 2958 ctx.clip_vertex_write = 0; 2959 2960 shader->nr_ps_color_exports = 0; 2961 shader->nr_ps_max_color_exports = 0; 2962 2963 2964 /* register allocations */ 2965 /* Values [0,127] correspond to GPR[0..127]. 2966 * Values [128,159] correspond to constant buffer bank 0 2967 * Values [160,191] correspond to constant buffer bank 1 2968 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 2969 * Values [256,287] correspond to constant buffer bank 2 (EG) 2970 * Values [288,319] correspond to constant buffer bank 3 (EG) 2971 * Other special values are shown in the list below. 2972 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 2973 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 2974 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 2975 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 2976 * 248 SQ_ALU_SRC_0: special constant 0.0. 2977 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 2978 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 2979 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 2980 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 2981 * 253 SQ_ALU_SRC_LITERAL: literal constant. 2982 * 254 SQ_ALU_SRC_PV: previous vector result. 2983 * 255 SQ_ALU_SRC_PS: previous scalar result. 2984 */ 2985 for (i = 0; i < TGSI_FILE_COUNT; i++) { 2986 ctx.file_offset[i] = 0; 2987 } 2988 2989#ifdef R600_USE_LLVM 2990 if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) { 2991 fprintf(stderr, "Warning: R600 LLVM backend does not support " 2992 "indirect adressing. Falling back to TGSI " 2993 "backend.\n"); 2994 use_llvm = 0; 2995 } 2996#endif 2997 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 2998 ctx.file_offset[TGSI_FILE_INPUT] = 1; 2999 if (!use_llvm) { 3000 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); 3001 } 3002 } 3003 if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { 3004 if (ctx.bc->chip_class >= EVERGREEN) 3005 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 3006 else 3007 ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); 3008 } 3009 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3010 /* FIXME 1 would be enough in some cases (3 or less input vertices) */ 3011 ctx.file_offset[TGSI_FILE_INPUT] = 2; 3012 } 3013 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) 3014 ctx.file_offset[TGSI_FILE_INPUT] = 1; 3015 if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) { 3016 bool add_tesscoord = false, add_tess_inout = false; 3017 ctx.file_offset[TGSI_FILE_INPUT] = 1; 3018 for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) { 3019 /* if we have tesscoord save one reg */ 3020 if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSCOORD) 3021 add_tesscoord = true; 3022 if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSINNER || 3023 ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSOUTER) 3024 add_tess_inout = true; 3025 } 3026 if (add_tesscoord || add_tess_inout) 3027 ctx.file_offset[TGSI_FILE_INPUT]++; 3028 if (add_tess_inout) 3029 ctx.file_offset[TGSI_FILE_INPUT]+=2; 3030 } 3031 ctx.use_llvm = use_llvm; 3032 3033 if (use_llvm) { 3034 ctx.file_offset[TGSI_FILE_OUTPUT] = 3035 ctx.file_offset[TGSI_FILE_INPUT]; 3036 } else { 3037 ctx.file_offset[TGSI_FILE_OUTPUT] = 3038 ctx.file_offset[TGSI_FILE_INPUT] + 3039 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 3040 } 3041 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 3042 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 3043 3044 /* Outside the GPR range. This will be translated to one of the 3045 * kcache banks later. */ 3046 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 3047 3048 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 3049 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 3050 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 3051 ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1; 3052 ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2; 3053 3054 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) { 3055 ctx.tess_input_info = ctx.bc->ar_reg + 3; 3056 ctx.tess_output_info = ctx.bc->ar_reg + 4; 3057 ctx.temp_reg = ctx.bc->ar_reg + 5; 3058 } else if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) { 3059 ctx.tess_input_info = 0; 3060 ctx.tess_output_info = ctx.bc->ar_reg + 3; 3061 ctx.temp_reg = ctx.bc->ar_reg + 4; 3062 } else if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3063 ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3; 3064 ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4; 3065 ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5; 3066 ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6; 3067 ctx.temp_reg = ctx.bc->ar_reg + 7; 3068 } else { 3069 ctx.temp_reg = ctx.bc->ar_reg + 3; 3070 } 3071 3072 shader->max_arrays = 0; 3073 shader->num_arrays = 0; 3074 if (indirect_gprs) { 3075 3076 if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { 3077 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], 3078 ctx.file_offset[TGSI_FILE_OUTPUT] - 3079 ctx.file_offset[TGSI_FILE_INPUT], 3080 0x0F); 3081 } 3082 if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 3083 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT], 3084 ctx.file_offset[TGSI_FILE_TEMPORARY] - 3085 ctx.file_offset[TGSI_FILE_OUTPUT], 3086 0x0F); 3087 } 3088 } 3089 3090 ctx.nliterals = 0; 3091 ctx.literals = NULL; 3092 3093 shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]; 3094 shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 3095 shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]; 3096 3097 if (shader->vs_as_gs_a) 3098 vs_add_primid_output(&ctx, key.vs.prim_id_out); 3099 3100 if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) 3101 r600_fetch_tess_io_info(&ctx); 3102 3103 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 3104 tgsi_parse_token(&ctx.parse); 3105 switch (ctx.parse.FullToken.Token.Type) { 3106 case TGSI_TOKEN_TYPE_IMMEDIATE: 3107 immediate = &ctx.parse.FullToken.FullImmediate; 3108 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 3109 if(ctx.literals == NULL) { 3110 r = -ENOMEM; 3111 goto out_err; 3112 } 3113 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 3114 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 3115 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 3116 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 3117 ctx.nliterals++; 3118 break; 3119 case TGSI_TOKEN_TYPE_DECLARATION: 3120 r = tgsi_declaration(&ctx); 3121 if (r) 3122 goto out_err; 3123 break; 3124 case TGSI_TOKEN_TYPE_INSTRUCTION: 3125 case TGSI_TOKEN_TYPE_PROPERTY: 3126 break; 3127 default: 3128 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 3129 r = -EINVAL; 3130 goto out_err; 3131 } 3132 } 3133 3134 shader->ring_item_sizes[0] = ctx.next_ring_offset; 3135 shader->ring_item_sizes[1] = 0; 3136 shader->ring_item_sizes[2] = 0; 3137 shader->ring_item_sizes[3] = 0; 3138 3139 /* Process two side if needed */ 3140 if (shader->two_side && ctx.colors_used) { 3141 int i, count = ctx.shader->ninput; 3142 unsigned next_lds_loc = ctx.shader->nlds; 3143 3144 /* additional inputs will be allocated right after the existing inputs, 3145 * we won't need them after the color selection, so we don't need to 3146 * reserve these gprs for the rest of the shader code and to adjust 3147 * output offsets etc. */ 3148 int gpr = ctx.file_offset[TGSI_FILE_INPUT] + 3149 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 3150 3151 /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */ 3152 if (ctx.face_gpr == -1) { 3153 i = ctx.shader->ninput++; 3154 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; 3155 ctx.shader->input[i].spi_sid = 0; 3156 ctx.shader->input[i].gpr = gpr++; 3157 ctx.face_gpr = ctx.shader->input[i].gpr; 3158 } 3159 3160 for (i = 0; i < count; i++) { 3161 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { 3162 int ni = ctx.shader->ninput++; 3163 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); 3164 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 3165 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); 3166 ctx.shader->input[ni].gpr = gpr++; 3167 // TGSI to LLVM needs to know the lds position of inputs. 3168 // Non LLVM path computes it later (in process_twoside_color) 3169 ctx.shader->input[ni].lds_pos = next_lds_loc++; 3170 ctx.shader->input[i].back_color_input = ni; 3171 if (ctx.bc->chip_class >= EVERGREEN) { 3172 if ((r = evergreen_interp_input(&ctx, ni))) 3173 return r; 3174 } 3175 } 3176 } 3177 } 3178 3179/* LLVM backend setup */ 3180#ifdef R600_USE_LLVM 3181 if (use_llvm) { 3182 struct radeon_llvm_context radeon_llvm_ctx; 3183 LLVMModuleRef mod; 3184 bool dump = r600_can_dump_shader(&rscreen->b, tokens); 3185 boolean use_kill = false; 3186 3187 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); 3188 radeon_llvm_ctx.type = ctx.type; 3189 radeon_llvm_ctx.two_side = shader->two_side; 3190 radeon_llvm_ctx.face_gpr = ctx.face_gpr; 3191 radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1; 3192 radeon_llvm_ctx.r600_inputs = ctx.shader->input; 3193 radeon_llvm_ctx.r600_outputs = ctx.shader->output; 3194 radeon_llvm_ctx.color_buffer_count = max_color_exports; 3195 radeon_llvm_ctx.chip_class = ctx.bc->chip_class; 3196 radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN); 3197 radeon_llvm_ctx.stream_outputs = &so; 3198 radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one; 3199 radeon_llvm_ctx.has_compressed_msaa_texturing = 3200 ctx.bc->has_compressed_msaa_texturing; 3201 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); 3202 ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp; 3203 ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers; 3204 3205 if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, dump)) { 3206 radeon_llvm_dispose(&radeon_llvm_ctx); 3207 use_llvm = 0; 3208 fprintf(stderr, "R600 LLVM backend failed to compile " 3209 "shader. Falling back to TGSI\n"); 3210 } else { 3211 ctx.file_offset[TGSI_FILE_OUTPUT] = 3212 ctx.file_offset[TGSI_FILE_INPUT]; 3213 } 3214 if (use_kill) 3215 ctx.shader->uses_kill = use_kill; 3216 radeon_llvm_dispose(&radeon_llvm_ctx); 3217 } 3218#endif 3219/* End of LLVM backend setup */ 3220 3221 if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) 3222 shader->nr_ps_max_color_exports = 8; 3223 3224 if (!use_llvm) { 3225 if (ctx.fragcoord_input >= 0) { 3226 if (ctx.bc->chip_class == CAYMAN) { 3227 for (j = 0 ; j < 4; j++) { 3228 struct r600_bytecode_alu alu; 3229 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3230 alu.op = ALU_OP1_RECIP_IEEE; 3231 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3232 alu.src[0].chan = 3; 3233 3234 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 3235 alu.dst.chan = j; 3236 alu.dst.write = (j == 3); 3237 alu.last = 1; 3238 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3239 return r; 3240 } 3241 } else { 3242 struct r600_bytecode_alu alu; 3243 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3244 alu.op = ALU_OP1_RECIP_IEEE; 3245 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3246 alu.src[0].chan = 3; 3247 3248 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 3249 alu.dst.chan = 3; 3250 alu.dst.write = 1; 3251 alu.last = 1; 3252 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3253 return r; 3254 } 3255 } 3256 3257 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3258 struct r600_bytecode_alu alu; 3259 int r; 3260 3261 /* GS thread with no output workaround - emit a cut at start of GS */ 3262 if (ctx.bc->chip_class == R600) 3263 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); 3264 3265 for (j = 0; j < 4; j++) { 3266 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3267 alu.op = ALU_OP1_MOV; 3268 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 3269 alu.src[0].value = 0; 3270 alu.dst.sel = ctx.gs_export_gpr_tregs[j]; 3271 alu.dst.write = 1; 3272 alu.last = 1; 3273 r = r600_bytecode_add_alu(ctx.bc, &alu); 3274 if (r) 3275 return r; 3276 } 3277 } 3278 3279 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) 3280 r600_fetch_tess_io_info(&ctx); 3281 3282 if (shader->two_side && ctx.colors_used) { 3283 if ((r = process_twoside_color_inputs(&ctx))) 3284 return r; 3285 } 3286 3287 tgsi_parse_init(&ctx.parse, tokens); 3288 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 3289 tgsi_parse_token(&ctx.parse); 3290 switch (ctx.parse.FullToken.Token.Type) { 3291 case TGSI_TOKEN_TYPE_INSTRUCTION: 3292 r = tgsi_is_supported(&ctx); 3293 if (r) 3294 goto out_err; 3295 ctx.max_driver_temp_used = 0; 3296 /* reserve first tmp for everyone */ 3297 r600_get_temp(&ctx); 3298 3299 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 3300 if ((r = tgsi_split_constant(&ctx))) 3301 goto out_err; 3302 if ((r = tgsi_split_literal_constant(&ctx))) 3303 goto out_err; 3304 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3305 if ((r = tgsi_split_gs_inputs(&ctx))) 3306 goto out_err; 3307 } else if (lds_inputs) { 3308 if ((r = tgsi_split_lds_inputs(&ctx))) 3309 goto out_err; 3310 } 3311 if (ctx.bc->chip_class == CAYMAN) 3312 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 3313 else if (ctx.bc->chip_class >= EVERGREEN) 3314 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 3315 else 3316 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 3317 r = ctx.inst_info->process(&ctx); 3318 if (r) 3319 goto out_err; 3320 3321 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) { 3322 r = r600_store_tcs_output(&ctx); 3323 if (r) 3324 goto out_err; 3325 } 3326 break; 3327 default: 3328 break; 3329 } 3330 } 3331 } 3332 3333 /* Reset the temporary register counter. */ 3334 ctx.max_driver_temp_used = 0; 3335 3336 noutput = shader->noutput; 3337 3338 if (!ring_outputs && ctx.clip_vertex_write) { 3339 unsigned clipdist_temp[2]; 3340 3341 clipdist_temp[0] = r600_get_temp(&ctx); 3342 clipdist_temp[1] = r600_get_temp(&ctx); 3343 3344 /* need to convert a clipvertex write into clipdistance writes and not export 3345 the clip vertex anymore */ 3346 3347 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 3348 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 3349 shader->output[noutput].gpr = clipdist_temp[0]; 3350 noutput++; 3351 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 3352 shader->output[noutput].gpr = clipdist_temp[1]; 3353 noutput++; 3354 3355 /* reset spi_sid for clipvertex output to avoid confusing spi */ 3356 shader->output[ctx.cv_output].spi_sid = 0; 3357 3358 shader->clip_dist_write = 0xFF; 3359 3360 for (i = 0; i < 8; i++) { 3361 int oreg = i >> 2; 3362 int ochan = i & 3; 3363 3364 for (j = 0; j < 4; j++) { 3365 struct r600_bytecode_alu alu; 3366 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3367 alu.op = ALU_OP2_DOT4; 3368 alu.src[0].sel = shader->output[ctx.cv_output].gpr; 3369 alu.src[0].chan = j; 3370 3371 alu.src[1].sel = 512 + i; 3372 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 3373 alu.src[1].chan = j; 3374 3375 alu.dst.sel = clipdist_temp[oreg]; 3376 alu.dst.chan = j; 3377 alu.dst.write = (j == ochan); 3378 if (j == 3) 3379 alu.last = 1; 3380 if (!use_llvm) 3381 r = r600_bytecode_add_alu(ctx.bc, &alu); 3382 if (r) 3383 return r; 3384 } 3385 } 3386 } 3387 3388 /* Add stream outputs. */ 3389 if (!use_llvm && so.num_outputs) { 3390 bool emit = false; 3391 if (!lds_outputs && !ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX) 3392 emit = true; 3393 if (!ring_outputs && ctx.type == TGSI_PROCESSOR_TESS_EVAL) 3394 emit = true; 3395 if (emit) 3396 emit_streamout(&ctx, &so, -1, NULL); 3397 } 3398 pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 3399 convert_edgeflag_to_int(&ctx); 3400 3401 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) 3402 r600_emit_tess_factor(&ctx); 3403 3404 if (lds_outputs) { 3405 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 3406 if (ctx.shader->noutput) 3407 emit_lds_vs_writes(&ctx); 3408 } 3409 } else if (ring_outputs) { 3410 if (shader->vs_as_es || shader->tes_as_es) { 3411 ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx); 3412 ctx.gs_export_gpr_tregs[1] = -1; 3413 ctx.gs_export_gpr_tregs[2] = -1; 3414 ctx.gs_export_gpr_tregs[3] = -1; 3415 3416 emit_gs_ring_writes(&ctx, &so, -1, FALSE); 3417 } 3418 } else { 3419 /* Export output */ 3420 next_clip_base = shader->vs_out_misc_write ? 62 : 61; 3421 3422 for (i = 0, j = 0; i < noutput; i++, j++) { 3423 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3424 output[j].gpr = shader->output[i].gpr; 3425 output[j].elem_size = 3; 3426 output[j].swizzle_x = 0; 3427 output[j].swizzle_y = 1; 3428 output[j].swizzle_z = 2; 3429 output[j].swizzle_w = 3; 3430 output[j].burst_count = 1; 3431 output[j].type = -1; 3432 output[j].op = CF_OP_EXPORT; 3433 switch (ctx.type) { 3434 case TGSI_PROCESSOR_VERTEX: 3435 case TGSI_PROCESSOR_TESS_EVAL: 3436 switch (shader->output[i].name) { 3437 case TGSI_SEMANTIC_POSITION: 3438 output[j].array_base = 60; 3439 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3440 pos_emitted = true; 3441 break; 3442 3443 case TGSI_SEMANTIC_PSIZE: 3444 output[j].array_base = 61; 3445 output[j].swizzle_y = 7; 3446 output[j].swizzle_z = 7; 3447 output[j].swizzle_w = 7; 3448 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3449 pos_emitted = true; 3450 break; 3451 case TGSI_SEMANTIC_EDGEFLAG: 3452 output[j].array_base = 61; 3453 output[j].swizzle_x = 7; 3454 output[j].swizzle_y = 0; 3455 output[j].swizzle_z = 7; 3456 output[j].swizzle_w = 7; 3457 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3458 pos_emitted = true; 3459 break; 3460 case TGSI_SEMANTIC_LAYER: 3461 /* spi_sid is 0 for outputs that are 3462 * not consumed by PS */ 3463 if (shader->output[i].spi_sid) { 3464 output[j].array_base = next_param_base++; 3465 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3466 j++; 3467 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3468 } 3469 output[j].array_base = 61; 3470 output[j].swizzle_x = 7; 3471 output[j].swizzle_y = 7; 3472 output[j].swizzle_z = 0; 3473 output[j].swizzle_w = 7; 3474 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3475 pos_emitted = true; 3476 break; 3477 case TGSI_SEMANTIC_VIEWPORT_INDEX: 3478 /* spi_sid is 0 for outputs that are 3479 * not consumed by PS */ 3480 if (shader->output[i].spi_sid) { 3481 output[j].array_base = next_param_base++; 3482 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3483 j++; 3484 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3485 } 3486 output[j].array_base = 61; 3487 output[j].swizzle_x = 7; 3488 output[j].swizzle_y = 7; 3489 output[j].swizzle_z = 7; 3490 output[j].swizzle_w = 0; 3491 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3492 pos_emitted = true; 3493 break; 3494 case TGSI_SEMANTIC_CLIPVERTEX: 3495 j--; 3496 break; 3497 case TGSI_SEMANTIC_CLIPDIST: 3498 output[j].array_base = next_clip_base++; 3499 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3500 pos_emitted = true; 3501 /* spi_sid is 0 for clipdistance outputs that were generated 3502 * for clipvertex - we don't need to pass them to PS */ 3503 if (shader->output[i].spi_sid) { 3504 j++; 3505 /* duplicate it as PARAM to pass to the pixel shader */ 3506 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3507 output[j].array_base = next_param_base++; 3508 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3509 } 3510 break; 3511 case TGSI_SEMANTIC_FOG: 3512 output[j].swizzle_y = 4; /* 0 */ 3513 output[j].swizzle_z = 4; /* 0 */ 3514 output[j].swizzle_w = 5; /* 1 */ 3515 break; 3516 case TGSI_SEMANTIC_PRIMID: 3517 output[j].swizzle_x = 2; 3518 output[j].swizzle_y = 4; /* 0 */ 3519 output[j].swizzle_z = 4; /* 0 */ 3520 output[j].swizzle_w = 4; /* 0 */ 3521 break; 3522 } 3523 3524 break; 3525 case TGSI_PROCESSOR_FRAGMENT: 3526 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 3527 /* never export more colors than the number of CBs */ 3528 if (shader->output[i].sid >= max_color_exports) { 3529 /* skip export */ 3530 j--; 3531 continue; 3532 } 3533 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 3534 output[j].array_base = shader->output[i].sid; 3535 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3536 shader->nr_ps_color_exports++; 3537 if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { 3538 for (k = 1; k < max_color_exports; k++) { 3539 j++; 3540 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3541 output[j].gpr = shader->output[i].gpr; 3542 output[j].elem_size = 3; 3543 output[j].swizzle_x = 0; 3544 output[j].swizzle_y = 1; 3545 output[j].swizzle_z = 2; 3546 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 3547 output[j].burst_count = 1; 3548 output[j].array_base = k; 3549 output[j].op = CF_OP_EXPORT; 3550 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3551 shader->nr_ps_color_exports++; 3552 } 3553 } 3554 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 3555 output[j].array_base = 61; 3556 output[j].swizzle_x = 2; 3557 output[j].swizzle_y = 7; 3558 output[j].swizzle_z = output[j].swizzle_w = 7; 3559 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3560 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 3561 output[j].array_base = 61; 3562 output[j].swizzle_x = 7; 3563 output[j].swizzle_y = 1; 3564 output[j].swizzle_z = output[j].swizzle_w = 7; 3565 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3566 } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 3567 output[j].array_base = 61; 3568 output[j].swizzle_x = 7; 3569 output[j].swizzle_y = 7; 3570 output[j].swizzle_z = 0; 3571 output[j].swizzle_w = 7; 3572 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3573 } else { 3574 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 3575 r = -EINVAL; 3576 goto out_err; 3577 } 3578 break; 3579 case TGSI_PROCESSOR_TESS_CTRL: 3580 break; 3581 default: 3582 R600_ERR("unsupported processor type %d\n", ctx.type); 3583 r = -EINVAL; 3584 goto out_err; 3585 } 3586 3587 if (output[j].type==-1) { 3588 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3589 output[j].array_base = next_param_base++; 3590 } 3591 } 3592 3593 /* add fake position export */ 3594 if ((ctx.type == TGSI_PROCESSOR_VERTEX || ctx.type == TGSI_PROCESSOR_TESS_EVAL) && pos_emitted == false) { 3595 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3596 output[j].gpr = 0; 3597 output[j].elem_size = 3; 3598 output[j].swizzle_x = 7; 3599 output[j].swizzle_y = 7; 3600 output[j].swizzle_z = 7; 3601 output[j].swizzle_w = 7; 3602 output[j].burst_count = 1; 3603 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3604 output[j].array_base = 60; 3605 output[j].op = CF_OP_EXPORT; 3606 j++; 3607 } 3608 3609 /* add fake param output for vertex shader if no param is exported */ 3610 if ((ctx.type == TGSI_PROCESSOR_VERTEX || ctx.type == TGSI_PROCESSOR_TESS_EVAL) && next_param_base == 0) { 3611 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3612 output[j].gpr = 0; 3613 output[j].elem_size = 3; 3614 output[j].swizzle_x = 7; 3615 output[j].swizzle_y = 7; 3616 output[j].swizzle_z = 7; 3617 output[j].swizzle_w = 7; 3618 output[j].burst_count = 1; 3619 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3620 output[j].array_base = 0; 3621 output[j].op = CF_OP_EXPORT; 3622 j++; 3623 } 3624 3625 /* add fake pixel export */ 3626 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { 3627 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3628 output[j].gpr = 0; 3629 output[j].elem_size = 3; 3630 output[j].swizzle_x = 7; 3631 output[j].swizzle_y = 7; 3632 output[j].swizzle_z = 7; 3633 output[j].swizzle_w = 7; 3634 output[j].burst_count = 1; 3635 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3636 output[j].array_base = 0; 3637 output[j].op = CF_OP_EXPORT; 3638 j++; 3639 shader->nr_ps_color_exports++; 3640 } 3641 3642 noutput = j; 3643 3644 /* set export done on last export of each type */ 3645 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 3646 if (!(output_done & (1 << output[i].type))) { 3647 output_done |= (1 << output[i].type); 3648 output[i].op = CF_OP_EXPORT_DONE; 3649 } 3650 } 3651 /* add output to bytecode */ 3652 if (!use_llvm) { 3653 for (i = 0; i < noutput; i++) { 3654 r = r600_bytecode_add_output(ctx.bc, &output[i]); 3655 if (r) 3656 goto out_err; 3657 } 3658 } 3659 } 3660 3661 /* add program end */ 3662 if (!use_llvm) { 3663 if (ctx.bc->chip_class == CAYMAN) 3664 cm_bytecode_add_cf_end(ctx.bc); 3665 else { 3666 const struct cf_op_info *last = NULL; 3667 3668 if (ctx.bc->cf_last) 3669 last = r600_isa_cf(ctx.bc->cf_last->op); 3670 3671 /* alu clause instructions don't have EOP bit, so add NOP */ 3672 if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS) 3673 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 3674 3675 ctx.bc->cf_last->end_of_program = 1; 3676 } 3677 } 3678 3679 /* check GPR limit - we have 124 = 128 - 4 3680 * (4 are reserved as alu clause temporary registers) */ 3681 if (ctx.bc->ngpr > 124) { 3682 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 3683 r = -ENOMEM; 3684 goto out_err; 3685 } 3686 3687 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3688 if ((r = generate_gs_copy_shader(rctx, pipeshader, &so))) 3689 return r; 3690 } 3691 3692 free(ctx.literals); 3693 tgsi_parse_free(&ctx.parse); 3694 return 0; 3695out_err: 3696 free(ctx.literals); 3697 tgsi_parse_free(&ctx.parse); 3698 return r; 3699} 3700 3701static int tgsi_unsupported(struct r600_shader_ctx *ctx) 3702{ 3703 const unsigned tgsi_opcode = 3704 ctx->parse.FullToken.FullInstruction.Instruction.Opcode; 3705 R600_ERR("%s tgsi opcode unsupported\n", 3706 tgsi_get_opcode_name(tgsi_opcode)); 3707 return -EINVAL; 3708} 3709 3710static int tgsi_end(struct r600_shader_ctx *ctx) 3711{ 3712 return 0; 3713} 3714 3715static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 3716 const struct r600_shader_src *shader_src, 3717 unsigned chan) 3718{ 3719 bc_src->sel = shader_src->sel; 3720 bc_src->chan = shader_src->swizzle[chan]; 3721 bc_src->neg = shader_src->neg; 3722 bc_src->abs = shader_src->abs; 3723 bc_src->rel = shader_src->rel; 3724 bc_src->value = shader_src->value[bc_src->chan]; 3725 bc_src->kc_bank = shader_src->kc_bank; 3726 bc_src->kc_rel = shader_src->kc_rel; 3727} 3728 3729static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 3730{ 3731 bc_src->abs = 1; 3732 bc_src->neg = 0; 3733} 3734 3735static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 3736{ 3737 bc_src->neg = !bc_src->neg; 3738} 3739 3740static void tgsi_dst(struct r600_shader_ctx *ctx, 3741 const struct tgsi_full_dst_register *tgsi_dst, 3742 unsigned swizzle, 3743 struct r600_bytecode_alu_dst *r600_dst) 3744{ 3745 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3746 3747 r600_dst->sel = tgsi_dst->Register.Index; 3748 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 3749 r600_dst->chan = swizzle; 3750 r600_dst->write = 1; 3751 if (inst->Instruction.Saturate) { 3752 r600_dst->clamp = 1; 3753 } 3754 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) { 3755 if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) { 3756 return; 3757 } 3758 } 3759 if (tgsi_dst->Register.Indirect) 3760 r600_dst->rel = V_SQ_REL_RELATIVE; 3761 3762} 3763 3764static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap) 3765{ 3766 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3767 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3768 struct r600_bytecode_alu alu; 3769 int i, j, r, lasti = tgsi_last_instruction(write_mask); 3770 int use_tmp = 0; 3771 3772 if (singledest) { 3773 switch (write_mask) { 3774 case 0x1: 3775 write_mask = 0x3; 3776 break; 3777 case 0x2: 3778 use_tmp = 1; 3779 write_mask = 0x3; 3780 break; 3781 case 0x4: 3782 write_mask = 0xc; 3783 break; 3784 case 0x8: 3785 write_mask = 0xc; 3786 use_tmp = 3; 3787 break; 3788 } 3789 } 3790 3791 lasti = tgsi_last_instruction(write_mask); 3792 for (i = 0; i <= lasti; i++) { 3793 3794 if (!(write_mask & (1 << i))) 3795 continue; 3796 3797 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3798 3799 if (singledest) { 3800 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3801 if (use_tmp) { 3802 alu.dst.sel = ctx->temp_reg; 3803 alu.dst.chan = i; 3804 alu.dst.write = 1; 3805 } 3806 if (i == 1 || i == 3) 3807 alu.dst.write = 0; 3808 } else 3809 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3810 3811 alu.op = ctx->inst_info->op; 3812 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) { 3813 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3814 } else if (!swap) { 3815 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3816 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 3817 } 3818 } else { 3819 r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i)); 3820 r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i)); 3821 } 3822 3823 /* handle some special cases */ 3824 if (i == 1 || i == 3) { 3825 switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { 3826 case TGSI_OPCODE_SUB: 3827 r600_bytecode_src_toggle_neg(&alu.src[1]); 3828 break; 3829 case TGSI_OPCODE_DABS: 3830 r600_bytecode_src_set_abs(&alu.src[0]); 3831 break; 3832 default: 3833 break; 3834 } 3835 } 3836 if (i == lasti) { 3837 alu.last = 1; 3838 } 3839 r = r600_bytecode_add_alu(ctx->bc, &alu); 3840 if (r) 3841 return r; 3842 } 3843 3844 if (use_tmp) { 3845 write_mask = inst->Dst[0].Register.WriteMask; 3846 3847 /* move result from temp to dst */ 3848 for (i = 0; i <= lasti; i++) { 3849 if (!(write_mask & (1 << i))) 3850 continue; 3851 3852 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3853 alu.op = ALU_OP1_MOV; 3854 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3855 alu.src[0].sel = ctx->temp_reg; 3856 alu.src[0].chan = use_tmp - 1; 3857 alu.last = (i == lasti); 3858 3859 r = r600_bytecode_add_alu(ctx->bc, &alu); 3860 if (r) 3861 return r; 3862 } 3863 } 3864 return 0; 3865} 3866 3867static int tgsi_op2_64(struct r600_shader_ctx *ctx) 3868{ 3869 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3870 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3871 /* confirm writemasking */ 3872 if ((write_mask & 0x3) != 0x3 && 3873 (write_mask & 0xc) != 0xc) { 3874 fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask); 3875 return -1; 3876 } 3877 return tgsi_op2_64_params(ctx, false, false); 3878} 3879 3880static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx) 3881{ 3882 return tgsi_op2_64_params(ctx, true, false); 3883} 3884 3885static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx) 3886{ 3887 return tgsi_op2_64_params(ctx, true, true); 3888} 3889 3890static int tgsi_op3_64(struct r600_shader_ctx *ctx) 3891{ 3892 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3893 struct r600_bytecode_alu alu; 3894 int i, j, r; 3895 int lasti = 3; 3896 int tmp = r600_get_temp(ctx); 3897 3898 for (i = 0; i < lasti + 1; i++) { 3899 3900 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3901 alu.op = ctx->inst_info->op; 3902 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3903 r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1); 3904 } 3905 3906 if (inst->Dst[0].Register.WriteMask & (1 << i)) 3907 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3908 else 3909 alu.dst.sel = tmp; 3910 3911 alu.dst.chan = i; 3912 alu.is_op3 = 1; 3913 if (i == lasti) { 3914 alu.last = 1; 3915 } 3916 r = r600_bytecode_add_alu(ctx->bc, &alu); 3917 if (r) 3918 return r; 3919 } 3920 return 0; 3921} 3922 3923static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 3924{ 3925 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3926 struct r600_bytecode_alu alu; 3927 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3928 int i, j, r, lasti = tgsi_last_instruction(write_mask); 3929 /* use temp register if trans_only and more than one dst component */ 3930 int use_tmp = trans_only && (write_mask ^ (1 << lasti)); 3931 3932 for (i = 0; i <= lasti; i++) { 3933 if (!(write_mask & (1 << i))) 3934 continue; 3935 3936 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3937 if (use_tmp) { 3938 alu.dst.sel = ctx->temp_reg; 3939 alu.dst.chan = i; 3940 alu.dst.write = 1; 3941 } else 3942 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3943 3944 alu.op = ctx->inst_info->op; 3945 if (!swap) { 3946 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3947 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3948 } 3949 } else { 3950 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3951 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3952 } 3953 /* handle some special cases */ 3954 switch (inst->Instruction.Opcode) { 3955 case TGSI_OPCODE_SUB: 3956 r600_bytecode_src_toggle_neg(&alu.src[1]); 3957 break; 3958 case TGSI_OPCODE_ABS: 3959 r600_bytecode_src_set_abs(&alu.src[0]); 3960 break; 3961 default: 3962 break; 3963 } 3964 if (i == lasti || trans_only) { 3965 alu.last = 1; 3966 } 3967 r = r600_bytecode_add_alu(ctx->bc, &alu); 3968 if (r) 3969 return r; 3970 } 3971 3972 if (use_tmp) { 3973 /* move result from temp to dst */ 3974 for (i = 0; i <= lasti; i++) { 3975 if (!(write_mask & (1 << i))) 3976 continue; 3977 3978 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3979 alu.op = ALU_OP1_MOV; 3980 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3981 alu.src[0].sel = ctx->temp_reg; 3982 alu.src[0].chan = i; 3983 alu.last = (i == lasti); 3984 3985 r = r600_bytecode_add_alu(ctx->bc, &alu); 3986 if (r) 3987 return r; 3988 } 3989 } 3990 return 0; 3991} 3992 3993static int tgsi_op2(struct r600_shader_ctx *ctx) 3994{ 3995 return tgsi_op2_s(ctx, 0, 0); 3996} 3997 3998static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 3999{ 4000 return tgsi_op2_s(ctx, 1, 0); 4001} 4002 4003static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 4004{ 4005 return tgsi_op2_s(ctx, 0, 1); 4006} 4007 4008static int tgsi_ineg(struct r600_shader_ctx *ctx) 4009{ 4010 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4011 struct r600_bytecode_alu alu; 4012 int i, r; 4013 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4014 4015 for (i = 0; i < lasti + 1; i++) { 4016 4017 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4018 continue; 4019 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4020 alu.op = ctx->inst_info->op; 4021 4022 alu.src[0].sel = V_SQ_ALU_SRC_0; 4023 4024 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4025 4026 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4027 4028 if (i == lasti) { 4029 alu.last = 1; 4030 } 4031 r = r600_bytecode_add_alu(ctx->bc, &alu); 4032 if (r) 4033 return r; 4034 } 4035 return 0; 4036 4037} 4038 4039static int tgsi_dneg(struct r600_shader_ctx *ctx) 4040{ 4041 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4042 struct r600_bytecode_alu alu; 4043 int i, r; 4044 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4045 4046 for (i = 0; i < lasti + 1; i++) { 4047 4048 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4049 continue; 4050 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4051 alu.op = ALU_OP1_MOV; 4052 4053 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4054 4055 if (i == 1 || i == 3) 4056 r600_bytecode_src_toggle_neg(&alu.src[0]); 4057 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4058 4059 if (i == lasti) { 4060 alu.last = 1; 4061 } 4062 r = r600_bytecode_add_alu(ctx->bc, &alu); 4063 if (r) 4064 return r; 4065 } 4066 return 0; 4067 4068} 4069 4070static int tgsi_dfracexp(struct r600_shader_ctx *ctx) 4071{ 4072 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4073 struct r600_bytecode_alu alu; 4074 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4075 int i, j, r; 4076 int firsti = write_mask == 0xc ? 2 : 0; 4077 4078 for (i = 0; i <= 3; i++) { 4079 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4080 alu.op = ctx->inst_info->op; 4081 4082 alu.dst.sel = ctx->temp_reg; 4083 alu.dst.chan = i; 4084 alu.dst.write = 1; 4085 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4086 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 4087 } 4088 4089 if (i == 3) 4090 alu.last = 1; 4091 4092 r = r600_bytecode_add_alu(ctx->bc, &alu); 4093 if (r) 4094 return r; 4095 } 4096 4097 /* MOV first two channels to writemask dst0 */ 4098 for (i = 0; i <= 1; i++) { 4099 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4100 alu.op = ALU_OP1_MOV; 4101 alu.src[0].chan = i + 2; 4102 alu.src[0].sel = ctx->temp_reg; 4103 4104 tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst); 4105 alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1; 4106 alu.last = 1; 4107 r = r600_bytecode_add_alu(ctx->bc, &alu); 4108 if (r) 4109 return r; 4110 } 4111 4112 for (i = 0; i <= 3; i++) { 4113 if (inst->Dst[1].Register.WriteMask & (1 << i)) { 4114 /* MOV third channels to writemask dst1 */ 4115 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4116 alu.op = ALU_OP1_MOV; 4117 alu.src[0].chan = 1; 4118 alu.src[0].sel = ctx->temp_reg; 4119 4120 tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst); 4121 alu.last = 1; 4122 r = r600_bytecode_add_alu(ctx->bc, &alu); 4123 if (r) 4124 return r; 4125 break; 4126 } 4127 } 4128 return 0; 4129} 4130 4131 4132static int egcm_int_to_double(struct r600_shader_ctx *ctx) 4133{ 4134 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4135 struct r600_bytecode_alu alu; 4136 int i, r; 4137 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4138 4139 assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D || 4140 inst->Instruction.Opcode == TGSI_OPCODE_U2D); 4141 4142 for (i = 0; i <= (lasti+1)/2; i++) { 4143 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4144 alu.op = ctx->inst_info->op; 4145 4146 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4147 alu.dst.sel = ctx->temp_reg; 4148 alu.dst.chan = i; 4149 alu.dst.write = 1; 4150 alu.last = 1; 4151 4152 r = r600_bytecode_add_alu(ctx->bc, &alu); 4153 if (r) 4154 return r; 4155 } 4156 4157 for (i = 0; i <= lasti; i++) { 4158 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4159 alu.op = ALU_OP1_FLT32_TO_FLT64; 4160 4161 alu.src[0].chan = i/2; 4162 if (i%2 == 0) 4163 alu.src[0].sel = ctx->temp_reg; 4164 else { 4165 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 4166 alu.src[0].value = 0x0; 4167 } 4168 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4169 alu.last = i == lasti; 4170 4171 r = r600_bytecode_add_alu(ctx->bc, &alu); 4172 if (r) 4173 return r; 4174 } 4175 4176 return 0; 4177} 4178 4179static int egcm_double_to_int(struct r600_shader_ctx *ctx) 4180{ 4181 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4182 struct r600_bytecode_alu alu; 4183 int i, r; 4184 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4185 4186 assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I || 4187 inst->Instruction.Opcode == TGSI_OPCODE_D2U); 4188 4189 for (i = 0; i <= lasti; i++) { 4190 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4191 alu.op = ALU_OP1_FLT64_TO_FLT32; 4192 4193 r600_bytecode_src(&alu.src[0], &ctx->src[0], fp64_switch(i)); 4194 alu.dst.chan = i; 4195 alu.dst.sel = ctx->temp_reg; 4196 alu.dst.write = i%2 == 0; 4197 alu.last = i == lasti; 4198 4199 r = r600_bytecode_add_alu(ctx->bc, &alu); 4200 if (r) 4201 return r; 4202 } 4203 4204 for (i = 0; i <= (lasti+1)/2; i++) { 4205 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4206 alu.op = ctx->inst_info->op; 4207 4208 alu.src[0].chan = i*2; 4209 alu.src[0].sel = ctx->temp_reg; 4210 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4211 alu.last = 1; 4212 4213 r = r600_bytecode_add_alu(ctx->bc, &alu); 4214 if (r) 4215 return r; 4216 } 4217 4218 return 0; 4219} 4220 4221static int cayman_emit_double_instr(struct r600_shader_ctx *ctx) 4222{ 4223 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4224 int i, r; 4225 struct r600_bytecode_alu alu; 4226 int last_slot = 3; 4227 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4228 int t1 = ctx->temp_reg; 4229 4230 /* these have to write the result to X/Y by the looks of it */ 4231 for (i = 0 ; i < last_slot; i++) { 4232 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4233 alu.op = ctx->inst_info->op; 4234 4235 /* should only be one src regs */ 4236 assert (inst->Instruction.NumSrcRegs == 1); 4237 4238 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 4239 r600_bytecode_src(&alu.src[1], &ctx->src[0], 0); 4240 4241 /* RSQ should take the absolute value of src */ 4242 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ || 4243 ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) { 4244 r600_bytecode_src_set_abs(&alu.src[1]); 4245 } 4246 alu.dst.sel = t1; 4247 alu.dst.chan = i; 4248 alu.dst.write = (i == 0 || i == 1); 4249 4250 if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1) 4251 alu.last = 1; 4252 r = r600_bytecode_add_alu(ctx->bc, &alu); 4253 if (r) 4254 return r; 4255 } 4256 4257 for (i = 0 ; i <= lasti; i++) { 4258 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4259 continue; 4260 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4261 alu.op = ALU_OP1_MOV; 4262 alu.src[0].sel = t1; 4263 alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1; 4264 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4265 alu.dst.write = 1; 4266 if (i == lasti) 4267 alu.last = 1; 4268 r = r600_bytecode_add_alu(ctx->bc, &alu); 4269 if (r) 4270 return r; 4271 } 4272 return 0; 4273} 4274 4275static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 4276{ 4277 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4278 int i, j, r; 4279 struct r600_bytecode_alu alu; 4280 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4281 4282 for (i = 0 ; i < last_slot; i++) { 4283 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4284 alu.op = ctx->inst_info->op; 4285 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4286 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 4287 4288 /* RSQ should take the absolute value of src */ 4289 if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) { 4290 r600_bytecode_src_set_abs(&alu.src[j]); 4291 } 4292 } 4293 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4294 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4295 4296 if (i == last_slot - 1) 4297 alu.last = 1; 4298 r = r600_bytecode_add_alu(ctx->bc, &alu); 4299 if (r) 4300 return r; 4301 } 4302 return 0; 4303} 4304 4305static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 4306{ 4307 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4308 int i, j, k, r; 4309 struct r600_bytecode_alu alu; 4310 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4311 int t1 = ctx->temp_reg; 4312 4313 for (k = 0; k <= lasti; k++) { 4314 if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 4315 continue; 4316 4317 for (i = 0 ; i < 4; i++) { 4318 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4319 alu.op = ctx->inst_info->op; 4320 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4321 r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 4322 } 4323 alu.dst.sel = t1; 4324 alu.dst.chan = i; 4325 alu.dst.write = (i == k); 4326 if (i == 3) 4327 alu.last = 1; 4328 r = r600_bytecode_add_alu(ctx->bc, &alu); 4329 if (r) 4330 return r; 4331 } 4332 } 4333 4334 for (i = 0 ; i <= lasti; i++) { 4335 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4336 continue; 4337 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4338 alu.op = ALU_OP1_MOV; 4339 alu.src[0].sel = t1; 4340 alu.src[0].chan = i; 4341 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4342 alu.dst.write = 1; 4343 if (i == lasti) 4344 alu.last = 1; 4345 r = r600_bytecode_add_alu(ctx->bc, &alu); 4346 if (r) 4347 return r; 4348 } 4349 4350 return 0; 4351} 4352 4353 4354static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) 4355{ 4356 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4357 int i, j, k, r; 4358 struct r600_bytecode_alu alu; 4359 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4360 int t1 = ctx->temp_reg; 4361 4362 for (k = 0; k < 2; k++) { 4363 if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2)))) 4364 continue; 4365 4366 for (i = 0; i < 4; i++) { 4367 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4368 alu.op = ctx->inst_info->op; 4369 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4370 r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));; 4371 } 4372 alu.dst.sel = t1; 4373 alu.dst.chan = i; 4374 alu.dst.write = 1; 4375 if (i == 3) 4376 alu.last = 1; 4377 r = r600_bytecode_add_alu(ctx->bc, &alu); 4378 if (r) 4379 return r; 4380 } 4381 } 4382 4383 for (i = 0; i <= lasti; i++) { 4384 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4385 continue; 4386 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4387 alu.op = ALU_OP1_MOV; 4388 alu.src[0].sel = t1; 4389 alu.src[0].chan = i; 4390 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4391 alu.dst.write = 1; 4392 if (i == lasti) 4393 alu.last = 1; 4394 r = r600_bytecode_add_alu(ctx->bc, &alu); 4395 if (r) 4396 return r; 4397 } 4398 4399 return 0; 4400} 4401 4402/* 4403 * r600 - trunc to -PI..PI range 4404 * r700 - normalize by dividing by 2PI 4405 * see fdo bug 27901 4406 */ 4407static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 4408{ 4409 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 4410 static float double_pi = 3.1415926535 * 2; 4411 static float neg_pi = -3.1415926535; 4412 4413 int r; 4414 struct r600_bytecode_alu alu; 4415 4416 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4417 alu.op = ALU_OP3_MULADD; 4418 alu.is_op3 = 1; 4419 4420 alu.dst.chan = 0; 4421 alu.dst.sel = ctx->temp_reg; 4422 alu.dst.write = 1; 4423 4424 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4425 4426 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4427 alu.src[1].chan = 0; 4428 alu.src[1].value = *(uint32_t *)&half_inv_pi; 4429 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 4430 alu.src[2].chan = 0; 4431 alu.last = 1; 4432 r = r600_bytecode_add_alu(ctx->bc, &alu); 4433 if (r) 4434 return r; 4435 4436 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4437 alu.op = ALU_OP1_FRACT; 4438 4439 alu.dst.chan = 0; 4440 alu.dst.sel = ctx->temp_reg; 4441 alu.dst.write = 1; 4442 4443 alu.src[0].sel = ctx->temp_reg; 4444 alu.src[0].chan = 0; 4445 alu.last = 1; 4446 r = r600_bytecode_add_alu(ctx->bc, &alu); 4447 if (r) 4448 return r; 4449 4450 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4451 alu.op = ALU_OP3_MULADD; 4452 alu.is_op3 = 1; 4453 4454 alu.dst.chan = 0; 4455 alu.dst.sel = ctx->temp_reg; 4456 alu.dst.write = 1; 4457 4458 alu.src[0].sel = ctx->temp_reg; 4459 alu.src[0].chan = 0; 4460 4461 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4462 alu.src[1].chan = 0; 4463 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 4464 alu.src[2].chan = 0; 4465 4466 if (ctx->bc->chip_class == R600) { 4467 alu.src[1].value = *(uint32_t *)&double_pi; 4468 alu.src[2].value = *(uint32_t *)&neg_pi; 4469 } else { 4470 alu.src[1].sel = V_SQ_ALU_SRC_1; 4471 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 4472 alu.src[2].neg = 1; 4473 } 4474 4475 alu.last = 1; 4476 r = r600_bytecode_add_alu(ctx->bc, &alu); 4477 if (r) 4478 return r; 4479 return 0; 4480} 4481 4482static int cayman_trig(struct r600_shader_ctx *ctx) 4483{ 4484 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4485 struct r600_bytecode_alu alu; 4486 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4487 int i, r; 4488 4489 r = tgsi_setup_trig(ctx); 4490 if (r) 4491 return r; 4492 4493 4494 for (i = 0; i < last_slot; i++) { 4495 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4496 alu.op = ctx->inst_info->op; 4497 alu.dst.chan = i; 4498 4499 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4500 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4501 4502 alu.src[0].sel = ctx->temp_reg; 4503 alu.src[0].chan = 0; 4504 if (i == last_slot - 1) 4505 alu.last = 1; 4506 r = r600_bytecode_add_alu(ctx->bc, &alu); 4507 if (r) 4508 return r; 4509 } 4510 return 0; 4511} 4512 4513static int tgsi_trig(struct r600_shader_ctx *ctx) 4514{ 4515 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4516 struct r600_bytecode_alu alu; 4517 int i, r; 4518 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4519 4520 r = tgsi_setup_trig(ctx); 4521 if (r) 4522 return r; 4523 4524 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4525 alu.op = ctx->inst_info->op; 4526 alu.dst.chan = 0; 4527 alu.dst.sel = ctx->temp_reg; 4528 alu.dst.write = 1; 4529 4530 alu.src[0].sel = ctx->temp_reg; 4531 alu.src[0].chan = 0; 4532 alu.last = 1; 4533 r = r600_bytecode_add_alu(ctx->bc, &alu); 4534 if (r) 4535 return r; 4536 4537 /* replicate result */ 4538 for (i = 0; i < lasti + 1; i++) { 4539 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4540 continue; 4541 4542 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4543 alu.op = ALU_OP1_MOV; 4544 4545 alu.src[0].sel = ctx->temp_reg; 4546 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4547 if (i == lasti) 4548 alu.last = 1; 4549 r = r600_bytecode_add_alu(ctx->bc, &alu); 4550 if (r) 4551 return r; 4552 } 4553 return 0; 4554} 4555 4556static int tgsi_scs(struct r600_shader_ctx *ctx) 4557{ 4558 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4559 struct r600_bytecode_alu alu; 4560 int i, r; 4561 4562 /* We'll only need the trig stuff if we are going to write to the 4563 * X or Y components of the destination vector. 4564 */ 4565 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 4566 r = tgsi_setup_trig(ctx); 4567 if (r) 4568 return r; 4569 } 4570 4571 /* dst.x = COS */ 4572 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4573 if (ctx->bc->chip_class == CAYMAN) { 4574 for (i = 0 ; i < 3; i++) { 4575 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4576 alu.op = ALU_OP1_COS; 4577 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4578 4579 if (i == 0) 4580 alu.dst.write = 1; 4581 else 4582 alu.dst.write = 0; 4583 alu.src[0].sel = ctx->temp_reg; 4584 alu.src[0].chan = 0; 4585 if (i == 2) 4586 alu.last = 1; 4587 r = r600_bytecode_add_alu(ctx->bc, &alu); 4588 if (r) 4589 return r; 4590 } 4591 } else { 4592 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4593 alu.op = ALU_OP1_COS; 4594 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4595 4596 alu.src[0].sel = ctx->temp_reg; 4597 alu.src[0].chan = 0; 4598 alu.last = 1; 4599 r = r600_bytecode_add_alu(ctx->bc, &alu); 4600 if (r) 4601 return r; 4602 } 4603 } 4604 4605 /* dst.y = SIN */ 4606 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4607 if (ctx->bc->chip_class == CAYMAN) { 4608 for (i = 0 ; i < 3; i++) { 4609 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4610 alu.op = ALU_OP1_SIN; 4611 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4612 if (i == 1) 4613 alu.dst.write = 1; 4614 else 4615 alu.dst.write = 0; 4616 alu.src[0].sel = ctx->temp_reg; 4617 alu.src[0].chan = 0; 4618 if (i == 2) 4619 alu.last = 1; 4620 r = r600_bytecode_add_alu(ctx->bc, &alu); 4621 if (r) 4622 return r; 4623 } 4624 } else { 4625 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4626 alu.op = ALU_OP1_SIN; 4627 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 4628 4629 alu.src[0].sel = ctx->temp_reg; 4630 alu.src[0].chan = 0; 4631 alu.last = 1; 4632 r = r600_bytecode_add_alu(ctx->bc, &alu); 4633 if (r) 4634 return r; 4635 } 4636 } 4637 4638 /* dst.z = 0.0; */ 4639 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4640 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4641 4642 alu.op = ALU_OP1_MOV; 4643 4644 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 4645 4646 alu.src[0].sel = V_SQ_ALU_SRC_0; 4647 alu.src[0].chan = 0; 4648 4649 alu.last = 1; 4650 4651 r = r600_bytecode_add_alu(ctx->bc, &alu); 4652 if (r) 4653 return r; 4654 } 4655 4656 /* dst.w = 1.0; */ 4657 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4658 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4659 4660 alu.op = ALU_OP1_MOV; 4661 4662 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 4663 4664 alu.src[0].sel = V_SQ_ALU_SRC_1; 4665 alu.src[0].chan = 0; 4666 4667 alu.last = 1; 4668 4669 r = r600_bytecode_add_alu(ctx->bc, &alu); 4670 if (r) 4671 return r; 4672 } 4673 4674 return 0; 4675} 4676 4677static int tgsi_kill(struct r600_shader_ctx *ctx) 4678{ 4679 const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4680 struct r600_bytecode_alu alu; 4681 int i, r; 4682 4683 for (i = 0; i < 4; i++) { 4684 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4685 alu.op = ctx->inst_info->op; 4686 4687 alu.dst.chan = i; 4688 4689 alu.src[0].sel = V_SQ_ALU_SRC_0; 4690 4691 if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) { 4692 alu.src[1].sel = V_SQ_ALU_SRC_1; 4693 alu.src[1].neg = 1; 4694 } else { 4695 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4696 } 4697 if (i == 3) { 4698 alu.last = 1; 4699 } 4700 r = r600_bytecode_add_alu(ctx->bc, &alu); 4701 if (r) 4702 return r; 4703 } 4704 4705 /* kill must be last in ALU */ 4706 ctx->bc->force_add_cf = 1; 4707 ctx->shader->uses_kill = TRUE; 4708 return 0; 4709} 4710 4711static int tgsi_lit(struct r600_shader_ctx *ctx) 4712{ 4713 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4714 struct r600_bytecode_alu alu; 4715 int r; 4716 4717 /* tmp.x = max(src.y, 0.0) */ 4718 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4719 alu.op = ALU_OP2_MAX; 4720 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 4721 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 4722 alu.src[1].chan = 1; 4723 4724 alu.dst.sel = ctx->temp_reg; 4725 alu.dst.chan = 0; 4726 alu.dst.write = 1; 4727 4728 alu.last = 1; 4729 r = r600_bytecode_add_alu(ctx->bc, &alu); 4730 if (r) 4731 return r; 4732 4733 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 4734 { 4735 int chan; 4736 int sel; 4737 int i; 4738 4739 if (ctx->bc->chip_class == CAYMAN) { 4740 for (i = 0; i < 3; i++) { 4741 /* tmp.z = log(tmp.x) */ 4742 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4743 alu.op = ALU_OP1_LOG_CLAMPED; 4744 alu.src[0].sel = ctx->temp_reg; 4745 alu.src[0].chan = 0; 4746 alu.dst.sel = ctx->temp_reg; 4747 alu.dst.chan = i; 4748 if (i == 2) { 4749 alu.dst.write = 1; 4750 alu.last = 1; 4751 } else 4752 alu.dst.write = 0; 4753 4754 r = r600_bytecode_add_alu(ctx->bc, &alu); 4755 if (r) 4756 return r; 4757 } 4758 } else { 4759 /* tmp.z = log(tmp.x) */ 4760 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4761 alu.op = ALU_OP1_LOG_CLAMPED; 4762 alu.src[0].sel = ctx->temp_reg; 4763 alu.src[0].chan = 0; 4764 alu.dst.sel = ctx->temp_reg; 4765 alu.dst.chan = 2; 4766 alu.dst.write = 1; 4767 alu.last = 1; 4768 r = r600_bytecode_add_alu(ctx->bc, &alu); 4769 if (r) 4770 return r; 4771 } 4772 4773 chan = alu.dst.chan; 4774 sel = alu.dst.sel; 4775 4776 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 4777 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4778 alu.op = ALU_OP3_MUL_LIT; 4779 alu.src[0].sel = sel; 4780 alu.src[0].chan = chan; 4781 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 4782 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 4783 alu.dst.sel = ctx->temp_reg; 4784 alu.dst.chan = 0; 4785 alu.dst.write = 1; 4786 alu.is_op3 = 1; 4787 alu.last = 1; 4788 r = r600_bytecode_add_alu(ctx->bc, &alu); 4789 if (r) 4790 return r; 4791 4792 if (ctx->bc->chip_class == CAYMAN) { 4793 for (i = 0; i < 3; i++) { 4794 /* dst.z = exp(tmp.x) */ 4795 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4796 alu.op = ALU_OP1_EXP_IEEE; 4797 alu.src[0].sel = ctx->temp_reg; 4798 alu.src[0].chan = 0; 4799 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4800 if (i == 2) { 4801 alu.dst.write = 1; 4802 alu.last = 1; 4803 } else 4804 alu.dst.write = 0; 4805 r = r600_bytecode_add_alu(ctx->bc, &alu); 4806 if (r) 4807 return r; 4808 } 4809 } else { 4810 /* dst.z = exp(tmp.x) */ 4811 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4812 alu.op = ALU_OP1_EXP_IEEE; 4813 alu.src[0].sel = ctx->temp_reg; 4814 alu.src[0].chan = 0; 4815 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 4816 alu.last = 1; 4817 r = r600_bytecode_add_alu(ctx->bc, &alu); 4818 if (r) 4819 return r; 4820 } 4821 } 4822 4823 /* dst.x, <- 1.0 */ 4824 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4825 alu.op = ALU_OP1_MOV; 4826 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 4827 alu.src[0].chan = 0; 4828 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4829 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 4830 r = r600_bytecode_add_alu(ctx->bc, &alu); 4831 if (r) 4832 return r; 4833 4834 /* dst.y = max(src.x, 0.0) */ 4835 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4836 alu.op = ALU_OP2_MAX; 4837 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4838 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 4839 alu.src[1].chan = 0; 4840 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 4841 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 4842 r = r600_bytecode_add_alu(ctx->bc, &alu); 4843 if (r) 4844 return r; 4845 4846 /* dst.w, <- 1.0 */ 4847 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4848 alu.op = ALU_OP1_MOV; 4849 alu.src[0].sel = V_SQ_ALU_SRC_1; 4850 alu.src[0].chan = 0; 4851 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 4852 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 4853 alu.last = 1; 4854 r = r600_bytecode_add_alu(ctx->bc, &alu); 4855 if (r) 4856 return r; 4857 4858 return 0; 4859} 4860 4861static int tgsi_rsq(struct r600_shader_ctx *ctx) 4862{ 4863 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4864 struct r600_bytecode_alu alu; 4865 int i, r; 4866 4867 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4868 4869 /* XXX: 4870 * For state trackers other than OpenGL, we'll want to use 4871 * _RECIPSQRT_IEEE instead. 4872 */ 4873 alu.op = ALU_OP1_RECIPSQRT_CLAMPED; 4874 4875 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 4876 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 4877 r600_bytecode_src_set_abs(&alu.src[i]); 4878 } 4879 alu.dst.sel = ctx->temp_reg; 4880 alu.dst.write = 1; 4881 alu.last = 1; 4882 r = r600_bytecode_add_alu(ctx->bc, &alu); 4883 if (r) 4884 return r; 4885 /* replicate result */ 4886 return tgsi_helper_tempx_replicate(ctx); 4887} 4888 4889static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 4890{ 4891 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4892 struct r600_bytecode_alu alu; 4893 int i, r; 4894 4895 for (i = 0; i < 4; i++) { 4896 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4897 alu.src[0].sel = ctx->temp_reg; 4898 alu.op = ALU_OP1_MOV; 4899 alu.dst.chan = i; 4900 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4901 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4902 if (i == 3) 4903 alu.last = 1; 4904 r = r600_bytecode_add_alu(ctx->bc, &alu); 4905 if (r) 4906 return r; 4907 } 4908 return 0; 4909} 4910 4911static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 4912{ 4913 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4914 struct r600_bytecode_alu alu; 4915 int i, r; 4916 4917 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4918 alu.op = ctx->inst_info->op; 4919 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 4920 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 4921 } 4922 alu.dst.sel = ctx->temp_reg; 4923 alu.dst.write = 1; 4924 alu.last = 1; 4925 r = r600_bytecode_add_alu(ctx->bc, &alu); 4926 if (r) 4927 return r; 4928 /* replicate result */ 4929 return tgsi_helper_tempx_replicate(ctx); 4930} 4931 4932static int cayman_pow(struct r600_shader_ctx *ctx) 4933{ 4934 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4935 int i, r; 4936 struct r600_bytecode_alu alu; 4937 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4938 4939 for (i = 0; i < 3; i++) { 4940 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4941 alu.op = ALU_OP1_LOG_IEEE; 4942 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4943 alu.dst.sel = ctx->temp_reg; 4944 alu.dst.chan = i; 4945 alu.dst.write = 1; 4946 if (i == 2) 4947 alu.last = 1; 4948 r = r600_bytecode_add_alu(ctx->bc, &alu); 4949 if (r) 4950 return r; 4951 } 4952 4953 /* b * LOG2(a) */ 4954 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4955 alu.op = ALU_OP2_MUL; 4956 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 4957 alu.src[1].sel = ctx->temp_reg; 4958 alu.dst.sel = ctx->temp_reg; 4959 alu.dst.write = 1; 4960 alu.last = 1; 4961 r = r600_bytecode_add_alu(ctx->bc, &alu); 4962 if (r) 4963 return r; 4964 4965 for (i = 0; i < last_slot; i++) { 4966 /* POW(a,b) = EXP2(b * LOG2(a))*/ 4967 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4968 alu.op = ALU_OP1_EXP_IEEE; 4969 alu.src[0].sel = ctx->temp_reg; 4970 4971 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4972 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4973 if (i == last_slot - 1) 4974 alu.last = 1; 4975 r = r600_bytecode_add_alu(ctx->bc, &alu); 4976 if (r) 4977 return r; 4978 } 4979 return 0; 4980} 4981 4982static int tgsi_pow(struct r600_shader_ctx *ctx) 4983{ 4984 struct r600_bytecode_alu alu; 4985 int r; 4986 4987 /* LOG2(a) */ 4988 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4989 alu.op = ALU_OP1_LOG_IEEE; 4990 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4991 alu.dst.sel = ctx->temp_reg; 4992 alu.dst.write = 1; 4993 alu.last = 1; 4994 r = r600_bytecode_add_alu(ctx->bc, &alu); 4995 if (r) 4996 return r; 4997 /* b * LOG2(a) */ 4998 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4999 alu.op = ALU_OP2_MUL; 5000 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 5001 alu.src[1].sel = ctx->temp_reg; 5002 alu.dst.sel = ctx->temp_reg; 5003 alu.dst.write = 1; 5004 alu.last = 1; 5005 r = r600_bytecode_add_alu(ctx->bc, &alu); 5006 if (r) 5007 return r; 5008 /* POW(a,b) = EXP2(b * LOG2(a))*/ 5009 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5010 alu.op = ALU_OP1_EXP_IEEE; 5011 alu.src[0].sel = ctx->temp_reg; 5012 alu.dst.sel = ctx->temp_reg; 5013 alu.dst.write = 1; 5014 alu.last = 1; 5015 r = r600_bytecode_add_alu(ctx->bc, &alu); 5016 if (r) 5017 return r; 5018 return tgsi_helper_tempx_replicate(ctx); 5019} 5020 5021static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 5022{ 5023 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5024 struct r600_bytecode_alu alu; 5025 int i, r, j; 5026 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5027 int tmp0 = ctx->temp_reg; 5028 int tmp1 = r600_get_temp(ctx); 5029 int tmp2 = r600_get_temp(ctx); 5030 int tmp3 = r600_get_temp(ctx); 5031 /* Unsigned path: 5032 * 5033 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 5034 * 5035 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 5036 * 2. tmp0.z = lo (tmp0.x * src2) 5037 * 3. tmp0.w = -tmp0.z 5038 * 4. tmp0.y = hi (tmp0.x * src2) 5039 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 5040 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 5041 * 7. tmp1.x = tmp0.x - tmp0.w 5042 * 8. tmp1.y = tmp0.x + tmp0.w 5043 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 5044 * 10. tmp0.z = hi(tmp0.x * src1) = q 5045 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 5046 * 5047 * 12. tmp0.w = src1 - tmp0.y = r 5048 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 5049 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 5050 * 5051 * if DIV 5052 * 5053 * 15. tmp1.z = tmp0.z + 1 = q + 1 5054 * 16. tmp1.w = tmp0.z - 1 = q - 1 5055 * 5056 * else MOD 5057 * 5058 * 15. tmp1.z = tmp0.w - src2 = r - src2 5059 * 16. tmp1.w = tmp0.w + src2 = r + src2 5060 * 5061 * endif 5062 * 5063 * 17. tmp1.x = tmp1.x & tmp1.y 5064 * 5065 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 5066 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 5067 * 5068 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 5069 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 5070 * 5071 * Signed path: 5072 * 5073 * Same as unsigned, using abs values of the operands, 5074 * and fixing the sign of the result in the end. 5075 */ 5076 5077 for (i = 0; i < 4; i++) { 5078 if (!(write_mask & (1<<i))) 5079 continue; 5080 5081 if (signed_op) { 5082 5083 /* tmp2.x = -src0 */ 5084 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5085 alu.op = ALU_OP2_SUB_INT; 5086 5087 alu.dst.sel = tmp2; 5088 alu.dst.chan = 0; 5089 alu.dst.write = 1; 5090 5091 alu.src[0].sel = V_SQ_ALU_SRC_0; 5092 5093 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5094 5095 alu.last = 1; 5096 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5097 return r; 5098 5099 /* tmp2.y = -src1 */ 5100 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5101 alu.op = ALU_OP2_SUB_INT; 5102 5103 alu.dst.sel = tmp2; 5104 alu.dst.chan = 1; 5105 alu.dst.write = 1; 5106 5107 alu.src[0].sel = V_SQ_ALU_SRC_0; 5108 5109 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5110 5111 alu.last = 1; 5112 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5113 return r; 5114 5115 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 5116 /* it will be a sign of the quotient */ 5117 if (!mod) { 5118 5119 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5120 alu.op = ALU_OP2_XOR_INT; 5121 5122 alu.dst.sel = tmp2; 5123 alu.dst.chan = 2; 5124 alu.dst.write = 1; 5125 5126 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5127 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5128 5129 alu.last = 1; 5130 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5131 return r; 5132 } 5133 5134 /* tmp2.x = |src0| */ 5135 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5136 alu.op = ALU_OP3_CNDGE_INT; 5137 alu.is_op3 = 1; 5138 5139 alu.dst.sel = tmp2; 5140 alu.dst.chan = 0; 5141 alu.dst.write = 1; 5142 5143 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5144 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5145 alu.src[2].sel = tmp2; 5146 alu.src[2].chan = 0; 5147 5148 alu.last = 1; 5149 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5150 return r; 5151 5152 /* tmp2.y = |src1| */ 5153 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5154 alu.op = ALU_OP3_CNDGE_INT; 5155 alu.is_op3 = 1; 5156 5157 alu.dst.sel = tmp2; 5158 alu.dst.chan = 1; 5159 alu.dst.write = 1; 5160 5161 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5162 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5163 alu.src[2].sel = tmp2; 5164 alu.src[2].chan = 1; 5165 5166 alu.last = 1; 5167 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5168 return r; 5169 5170 } 5171 5172 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 5173 if (ctx->bc->chip_class == CAYMAN) { 5174 /* tmp3.x = u2f(src2) */ 5175 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5176 alu.op = ALU_OP1_UINT_TO_FLT; 5177 5178 alu.dst.sel = tmp3; 5179 alu.dst.chan = 0; 5180 alu.dst.write = 1; 5181 5182 if (signed_op) { 5183 alu.src[0].sel = tmp2; 5184 alu.src[0].chan = 1; 5185 } else { 5186 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5187 } 5188 5189 alu.last = 1; 5190 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5191 return r; 5192 5193 /* tmp0.x = recip(tmp3.x) */ 5194 for (j = 0 ; j < 3; j++) { 5195 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5196 alu.op = ALU_OP1_RECIP_IEEE; 5197 5198 alu.dst.sel = tmp0; 5199 alu.dst.chan = j; 5200 alu.dst.write = (j == 0); 5201 5202 alu.src[0].sel = tmp3; 5203 alu.src[0].chan = 0; 5204 5205 if (j == 2) 5206 alu.last = 1; 5207 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5208 return r; 5209 } 5210 5211 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5212 alu.op = ALU_OP2_MUL; 5213 5214 alu.src[0].sel = tmp0; 5215 alu.src[0].chan = 0; 5216 5217 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 5218 alu.src[1].value = 0x4f800000; 5219 5220 alu.dst.sel = tmp3; 5221 alu.dst.write = 1; 5222 alu.last = 1; 5223 r = r600_bytecode_add_alu(ctx->bc, &alu); 5224 if (r) 5225 return r; 5226 5227 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5228 alu.op = ALU_OP1_FLT_TO_UINT; 5229 5230 alu.dst.sel = tmp0; 5231 alu.dst.chan = 0; 5232 alu.dst.write = 1; 5233 5234 alu.src[0].sel = tmp3; 5235 alu.src[0].chan = 0; 5236 5237 alu.last = 1; 5238 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5239 return r; 5240 5241 } else { 5242 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5243 alu.op = ALU_OP1_RECIP_UINT; 5244 5245 alu.dst.sel = tmp0; 5246 alu.dst.chan = 0; 5247 alu.dst.write = 1; 5248 5249 if (signed_op) { 5250 alu.src[0].sel = tmp2; 5251 alu.src[0].chan = 1; 5252 } else { 5253 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5254 } 5255 5256 alu.last = 1; 5257 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5258 return r; 5259 } 5260 5261 /* 2. tmp0.z = lo (tmp0.x * src2) */ 5262 if (ctx->bc->chip_class == CAYMAN) { 5263 for (j = 0 ; j < 4; j++) { 5264 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5265 alu.op = ALU_OP2_MULLO_UINT; 5266 5267 alu.dst.sel = tmp0; 5268 alu.dst.chan = j; 5269 alu.dst.write = (j == 2); 5270 5271 alu.src[0].sel = tmp0; 5272 alu.src[0].chan = 0; 5273 if (signed_op) { 5274 alu.src[1].sel = tmp2; 5275 alu.src[1].chan = 1; 5276 } else { 5277 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5278 } 5279 5280 alu.last = (j == 3); 5281 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5282 return r; 5283 } 5284 } else { 5285 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5286 alu.op = ALU_OP2_MULLO_UINT; 5287 5288 alu.dst.sel = tmp0; 5289 alu.dst.chan = 2; 5290 alu.dst.write = 1; 5291 5292 alu.src[0].sel = tmp0; 5293 alu.src[0].chan = 0; 5294 if (signed_op) { 5295 alu.src[1].sel = tmp2; 5296 alu.src[1].chan = 1; 5297 } else { 5298 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5299 } 5300 5301 alu.last = 1; 5302 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5303 return r; 5304 } 5305 5306 /* 3. tmp0.w = -tmp0.z */ 5307 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5308 alu.op = ALU_OP2_SUB_INT; 5309 5310 alu.dst.sel = tmp0; 5311 alu.dst.chan = 3; 5312 alu.dst.write = 1; 5313 5314 alu.src[0].sel = V_SQ_ALU_SRC_0; 5315 alu.src[1].sel = tmp0; 5316 alu.src[1].chan = 2; 5317 5318 alu.last = 1; 5319 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5320 return r; 5321 5322 /* 4. tmp0.y = hi (tmp0.x * src2) */ 5323 if (ctx->bc->chip_class == CAYMAN) { 5324 for (j = 0 ; j < 4; j++) { 5325 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5326 alu.op = ALU_OP2_MULHI_UINT; 5327 5328 alu.dst.sel = tmp0; 5329 alu.dst.chan = j; 5330 alu.dst.write = (j == 1); 5331 5332 alu.src[0].sel = tmp0; 5333 alu.src[0].chan = 0; 5334 5335 if (signed_op) { 5336 alu.src[1].sel = tmp2; 5337 alu.src[1].chan = 1; 5338 } else { 5339 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5340 } 5341 alu.last = (j == 3); 5342 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5343 return r; 5344 } 5345 } else { 5346 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5347 alu.op = ALU_OP2_MULHI_UINT; 5348 5349 alu.dst.sel = tmp0; 5350 alu.dst.chan = 1; 5351 alu.dst.write = 1; 5352 5353 alu.src[0].sel = tmp0; 5354 alu.src[0].chan = 0; 5355 5356 if (signed_op) { 5357 alu.src[1].sel = tmp2; 5358 alu.src[1].chan = 1; 5359 } else { 5360 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5361 } 5362 5363 alu.last = 1; 5364 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5365 return r; 5366 } 5367 5368 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 5369 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5370 alu.op = ALU_OP3_CNDE_INT; 5371 alu.is_op3 = 1; 5372 5373 alu.dst.sel = tmp0; 5374 alu.dst.chan = 2; 5375 alu.dst.write = 1; 5376 5377 alu.src[0].sel = tmp0; 5378 alu.src[0].chan = 1; 5379 alu.src[1].sel = tmp0; 5380 alu.src[1].chan = 3; 5381 alu.src[2].sel = tmp0; 5382 alu.src[2].chan = 2; 5383 5384 alu.last = 1; 5385 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5386 return r; 5387 5388 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 5389 if (ctx->bc->chip_class == CAYMAN) { 5390 for (j = 0 ; j < 4; j++) { 5391 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5392 alu.op = ALU_OP2_MULHI_UINT; 5393 5394 alu.dst.sel = tmp0; 5395 alu.dst.chan = j; 5396 alu.dst.write = (j == 3); 5397 5398 alu.src[0].sel = tmp0; 5399 alu.src[0].chan = 2; 5400 5401 alu.src[1].sel = tmp0; 5402 alu.src[1].chan = 0; 5403 5404 alu.last = (j == 3); 5405 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5406 return r; 5407 } 5408 } else { 5409 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5410 alu.op = ALU_OP2_MULHI_UINT; 5411 5412 alu.dst.sel = tmp0; 5413 alu.dst.chan = 3; 5414 alu.dst.write = 1; 5415 5416 alu.src[0].sel = tmp0; 5417 alu.src[0].chan = 2; 5418 5419 alu.src[1].sel = tmp0; 5420 alu.src[1].chan = 0; 5421 5422 alu.last = 1; 5423 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5424 return r; 5425 } 5426 5427 /* 7. tmp1.x = tmp0.x - tmp0.w */ 5428 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5429 alu.op = ALU_OP2_SUB_INT; 5430 5431 alu.dst.sel = tmp1; 5432 alu.dst.chan = 0; 5433 alu.dst.write = 1; 5434 5435 alu.src[0].sel = tmp0; 5436 alu.src[0].chan = 0; 5437 alu.src[1].sel = tmp0; 5438 alu.src[1].chan = 3; 5439 5440 alu.last = 1; 5441 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5442 return r; 5443 5444 /* 8. tmp1.y = tmp0.x + tmp0.w */ 5445 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5446 alu.op = ALU_OP2_ADD_INT; 5447 5448 alu.dst.sel = tmp1; 5449 alu.dst.chan = 1; 5450 alu.dst.write = 1; 5451 5452 alu.src[0].sel = tmp0; 5453 alu.src[0].chan = 0; 5454 alu.src[1].sel = tmp0; 5455 alu.src[1].chan = 3; 5456 5457 alu.last = 1; 5458 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5459 return r; 5460 5461 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 5462 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5463 alu.op = ALU_OP3_CNDE_INT; 5464 alu.is_op3 = 1; 5465 5466 alu.dst.sel = tmp0; 5467 alu.dst.chan = 0; 5468 alu.dst.write = 1; 5469 5470 alu.src[0].sel = tmp0; 5471 alu.src[0].chan = 1; 5472 alu.src[1].sel = tmp1; 5473 alu.src[1].chan = 1; 5474 alu.src[2].sel = tmp1; 5475 alu.src[2].chan = 0; 5476 5477 alu.last = 1; 5478 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5479 return r; 5480 5481 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 5482 if (ctx->bc->chip_class == CAYMAN) { 5483 for (j = 0 ; j < 4; j++) { 5484 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5485 alu.op = ALU_OP2_MULHI_UINT; 5486 5487 alu.dst.sel = tmp0; 5488 alu.dst.chan = j; 5489 alu.dst.write = (j == 2); 5490 5491 alu.src[0].sel = tmp0; 5492 alu.src[0].chan = 0; 5493 5494 if (signed_op) { 5495 alu.src[1].sel = tmp2; 5496 alu.src[1].chan = 0; 5497 } else { 5498 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5499 } 5500 5501 alu.last = (j == 3); 5502 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5503 return r; 5504 } 5505 } else { 5506 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5507 alu.op = ALU_OP2_MULHI_UINT; 5508 5509 alu.dst.sel = tmp0; 5510 alu.dst.chan = 2; 5511 alu.dst.write = 1; 5512 5513 alu.src[0].sel = tmp0; 5514 alu.src[0].chan = 0; 5515 5516 if (signed_op) { 5517 alu.src[1].sel = tmp2; 5518 alu.src[1].chan = 0; 5519 } else { 5520 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5521 } 5522 5523 alu.last = 1; 5524 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5525 return r; 5526 } 5527 5528 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 5529 if (ctx->bc->chip_class == CAYMAN) { 5530 for (j = 0 ; j < 4; j++) { 5531 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5532 alu.op = ALU_OP2_MULLO_UINT; 5533 5534 alu.dst.sel = tmp0; 5535 alu.dst.chan = j; 5536 alu.dst.write = (j == 1); 5537 5538 if (signed_op) { 5539 alu.src[0].sel = tmp2; 5540 alu.src[0].chan = 1; 5541 } else { 5542 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5543 } 5544 5545 alu.src[1].sel = tmp0; 5546 alu.src[1].chan = 2; 5547 5548 alu.last = (j == 3); 5549 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5550 return r; 5551 } 5552 } else { 5553 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5554 alu.op = ALU_OP2_MULLO_UINT; 5555 5556 alu.dst.sel = tmp0; 5557 alu.dst.chan = 1; 5558 alu.dst.write = 1; 5559 5560 if (signed_op) { 5561 alu.src[0].sel = tmp2; 5562 alu.src[0].chan = 1; 5563 } else { 5564 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5565 } 5566 5567 alu.src[1].sel = tmp0; 5568 alu.src[1].chan = 2; 5569 5570 alu.last = 1; 5571 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5572 return r; 5573 } 5574 5575 /* 12. tmp0.w = src1 - tmp0.y = r */ 5576 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5577 alu.op = ALU_OP2_SUB_INT; 5578 5579 alu.dst.sel = tmp0; 5580 alu.dst.chan = 3; 5581 alu.dst.write = 1; 5582 5583 if (signed_op) { 5584 alu.src[0].sel = tmp2; 5585 alu.src[0].chan = 0; 5586 } else { 5587 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5588 } 5589 5590 alu.src[1].sel = tmp0; 5591 alu.src[1].chan = 1; 5592 5593 alu.last = 1; 5594 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5595 return r; 5596 5597 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 5598 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5599 alu.op = ALU_OP2_SETGE_UINT; 5600 5601 alu.dst.sel = tmp1; 5602 alu.dst.chan = 0; 5603 alu.dst.write = 1; 5604 5605 alu.src[0].sel = tmp0; 5606 alu.src[0].chan = 3; 5607 if (signed_op) { 5608 alu.src[1].sel = tmp2; 5609 alu.src[1].chan = 1; 5610 } else { 5611 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5612 } 5613 5614 alu.last = 1; 5615 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5616 return r; 5617 5618 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 5619 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5620 alu.op = ALU_OP2_SETGE_UINT; 5621 5622 alu.dst.sel = tmp1; 5623 alu.dst.chan = 1; 5624 alu.dst.write = 1; 5625 5626 if (signed_op) { 5627 alu.src[0].sel = tmp2; 5628 alu.src[0].chan = 0; 5629 } else { 5630 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5631 } 5632 5633 alu.src[1].sel = tmp0; 5634 alu.src[1].chan = 1; 5635 5636 alu.last = 1; 5637 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5638 return r; 5639 5640 if (mod) { /* UMOD */ 5641 5642 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 5643 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5644 alu.op = ALU_OP2_SUB_INT; 5645 5646 alu.dst.sel = tmp1; 5647 alu.dst.chan = 2; 5648 alu.dst.write = 1; 5649 5650 alu.src[0].sel = tmp0; 5651 alu.src[0].chan = 3; 5652 5653 if (signed_op) { 5654 alu.src[1].sel = tmp2; 5655 alu.src[1].chan = 1; 5656 } else { 5657 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5658 } 5659 5660 alu.last = 1; 5661 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5662 return r; 5663 5664 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 5665 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5666 alu.op = ALU_OP2_ADD_INT; 5667 5668 alu.dst.sel = tmp1; 5669 alu.dst.chan = 3; 5670 alu.dst.write = 1; 5671 5672 alu.src[0].sel = tmp0; 5673 alu.src[0].chan = 3; 5674 if (signed_op) { 5675 alu.src[1].sel = tmp2; 5676 alu.src[1].chan = 1; 5677 } else { 5678 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5679 } 5680 5681 alu.last = 1; 5682 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5683 return r; 5684 5685 } else { /* UDIV */ 5686 5687 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 5688 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5689 alu.op = ALU_OP2_ADD_INT; 5690 5691 alu.dst.sel = tmp1; 5692 alu.dst.chan = 2; 5693 alu.dst.write = 1; 5694 5695 alu.src[0].sel = tmp0; 5696 alu.src[0].chan = 2; 5697 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 5698 5699 alu.last = 1; 5700 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5701 return r; 5702 5703 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 5704 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5705 alu.op = ALU_OP2_ADD_INT; 5706 5707 alu.dst.sel = tmp1; 5708 alu.dst.chan = 3; 5709 alu.dst.write = 1; 5710 5711 alu.src[0].sel = tmp0; 5712 alu.src[0].chan = 2; 5713 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 5714 5715 alu.last = 1; 5716 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5717 return r; 5718 5719 } 5720 5721 /* 17. tmp1.x = tmp1.x & tmp1.y */ 5722 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5723 alu.op = ALU_OP2_AND_INT; 5724 5725 alu.dst.sel = tmp1; 5726 alu.dst.chan = 0; 5727 alu.dst.write = 1; 5728 5729 alu.src[0].sel = tmp1; 5730 alu.src[0].chan = 0; 5731 alu.src[1].sel = tmp1; 5732 alu.src[1].chan = 1; 5733 5734 alu.last = 1; 5735 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5736 return r; 5737 5738 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 5739 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 5740 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5741 alu.op = ALU_OP3_CNDE_INT; 5742 alu.is_op3 = 1; 5743 5744 alu.dst.sel = tmp0; 5745 alu.dst.chan = 2; 5746 alu.dst.write = 1; 5747 5748 alu.src[0].sel = tmp1; 5749 alu.src[0].chan = 0; 5750 alu.src[1].sel = tmp0; 5751 alu.src[1].chan = mod ? 3 : 2; 5752 alu.src[2].sel = tmp1; 5753 alu.src[2].chan = 2; 5754 5755 alu.last = 1; 5756 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5757 return r; 5758 5759 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 5760 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5761 alu.op = ALU_OP3_CNDE_INT; 5762 alu.is_op3 = 1; 5763 5764 if (signed_op) { 5765 alu.dst.sel = tmp0; 5766 alu.dst.chan = 2; 5767 alu.dst.write = 1; 5768 } else { 5769 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5770 } 5771 5772 alu.src[0].sel = tmp1; 5773 alu.src[0].chan = 1; 5774 alu.src[1].sel = tmp1; 5775 alu.src[1].chan = 3; 5776 alu.src[2].sel = tmp0; 5777 alu.src[2].chan = 2; 5778 5779 alu.last = 1; 5780 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5781 return r; 5782 5783 if (signed_op) { 5784 5785 /* fix the sign of the result */ 5786 5787 if (mod) { 5788 5789 /* tmp0.x = -tmp0.z */ 5790 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5791 alu.op = ALU_OP2_SUB_INT; 5792 5793 alu.dst.sel = tmp0; 5794 alu.dst.chan = 0; 5795 alu.dst.write = 1; 5796 5797 alu.src[0].sel = V_SQ_ALU_SRC_0; 5798 alu.src[1].sel = tmp0; 5799 alu.src[1].chan = 2; 5800 5801 alu.last = 1; 5802 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5803 return r; 5804 5805 /* sign of the remainder is the same as the sign of src0 */ 5806 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 5807 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5808 alu.op = ALU_OP3_CNDGE_INT; 5809 alu.is_op3 = 1; 5810 5811 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5812 5813 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5814 alu.src[1].sel = tmp0; 5815 alu.src[1].chan = 2; 5816 alu.src[2].sel = tmp0; 5817 alu.src[2].chan = 0; 5818 5819 alu.last = 1; 5820 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5821 return r; 5822 5823 } else { 5824 5825 /* tmp0.x = -tmp0.z */ 5826 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5827 alu.op = ALU_OP2_SUB_INT; 5828 5829 alu.dst.sel = tmp0; 5830 alu.dst.chan = 0; 5831 alu.dst.write = 1; 5832 5833 alu.src[0].sel = V_SQ_ALU_SRC_0; 5834 alu.src[1].sel = tmp0; 5835 alu.src[1].chan = 2; 5836 5837 alu.last = 1; 5838 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5839 return r; 5840 5841 /* fix the quotient sign (same as the sign of src0*src1) */ 5842 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 5843 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5844 alu.op = ALU_OP3_CNDGE_INT; 5845 alu.is_op3 = 1; 5846 5847 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5848 5849 alu.src[0].sel = tmp2; 5850 alu.src[0].chan = 2; 5851 alu.src[1].sel = tmp0; 5852 alu.src[1].chan = 2; 5853 alu.src[2].sel = tmp0; 5854 alu.src[2].chan = 0; 5855 5856 alu.last = 1; 5857 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5858 return r; 5859 } 5860 } 5861 } 5862 return 0; 5863} 5864 5865static int tgsi_udiv(struct r600_shader_ctx *ctx) 5866{ 5867 return tgsi_divmod(ctx, 0, 0); 5868} 5869 5870static int tgsi_umod(struct r600_shader_ctx *ctx) 5871{ 5872 return tgsi_divmod(ctx, 1, 0); 5873} 5874 5875static int tgsi_idiv(struct r600_shader_ctx *ctx) 5876{ 5877 return tgsi_divmod(ctx, 0, 1); 5878} 5879 5880static int tgsi_imod(struct r600_shader_ctx *ctx) 5881{ 5882 return tgsi_divmod(ctx, 1, 1); 5883} 5884 5885 5886static int tgsi_f2i(struct r600_shader_ctx *ctx) 5887{ 5888 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5889 struct r600_bytecode_alu alu; 5890 int i, r; 5891 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5892 int last_inst = tgsi_last_instruction(write_mask); 5893 5894 for (i = 0; i < 4; i++) { 5895 if (!(write_mask & (1<<i))) 5896 continue; 5897 5898 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5899 alu.op = ALU_OP1_TRUNC; 5900 5901 alu.dst.sel = ctx->temp_reg; 5902 alu.dst.chan = i; 5903 alu.dst.write = 1; 5904 5905 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5906 if (i == last_inst) 5907 alu.last = 1; 5908 r = r600_bytecode_add_alu(ctx->bc, &alu); 5909 if (r) 5910 return r; 5911 } 5912 5913 for (i = 0; i < 4; i++) { 5914 if (!(write_mask & (1<<i))) 5915 continue; 5916 5917 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5918 alu.op = ctx->inst_info->op; 5919 5920 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5921 5922 alu.src[0].sel = ctx->temp_reg; 5923 alu.src[0].chan = i; 5924 5925 if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT) 5926 alu.last = 1; 5927 r = r600_bytecode_add_alu(ctx->bc, &alu); 5928 if (r) 5929 return r; 5930 } 5931 5932 return 0; 5933} 5934 5935static int tgsi_iabs(struct r600_shader_ctx *ctx) 5936{ 5937 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5938 struct r600_bytecode_alu alu; 5939 int i, r; 5940 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5941 int last_inst = tgsi_last_instruction(write_mask); 5942 5943 /* tmp = -src */ 5944 for (i = 0; i < 4; i++) { 5945 if (!(write_mask & (1<<i))) 5946 continue; 5947 5948 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5949 alu.op = ALU_OP2_SUB_INT; 5950 5951 alu.dst.sel = ctx->temp_reg; 5952 alu.dst.chan = i; 5953 alu.dst.write = 1; 5954 5955 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5956 alu.src[0].sel = V_SQ_ALU_SRC_0; 5957 5958 if (i == last_inst) 5959 alu.last = 1; 5960 r = r600_bytecode_add_alu(ctx->bc, &alu); 5961 if (r) 5962 return r; 5963 } 5964 5965 /* dst = (src >= 0 ? src : tmp) */ 5966 for (i = 0; i < 4; i++) { 5967 if (!(write_mask & (1<<i))) 5968 continue; 5969 5970 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5971 alu.op = ALU_OP3_CNDGE_INT; 5972 alu.is_op3 = 1; 5973 alu.dst.write = 1; 5974 5975 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5976 5977 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5978 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5979 alu.src[2].sel = ctx->temp_reg; 5980 alu.src[2].chan = i; 5981 5982 if (i == last_inst) 5983 alu.last = 1; 5984 r = r600_bytecode_add_alu(ctx->bc, &alu); 5985 if (r) 5986 return r; 5987 } 5988 return 0; 5989} 5990 5991static int tgsi_issg(struct r600_shader_ctx *ctx) 5992{ 5993 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5994 struct r600_bytecode_alu alu; 5995 int i, r; 5996 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5997 int last_inst = tgsi_last_instruction(write_mask); 5998 5999 /* tmp = (src >= 0 ? src : -1) */ 6000 for (i = 0; i < 4; i++) { 6001 if (!(write_mask & (1<<i))) 6002 continue; 6003 6004 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6005 alu.op = ALU_OP3_CNDGE_INT; 6006 alu.is_op3 = 1; 6007 6008 alu.dst.sel = ctx->temp_reg; 6009 alu.dst.chan = i; 6010 alu.dst.write = 1; 6011 6012 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6013 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6014 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 6015 6016 if (i == last_inst) 6017 alu.last = 1; 6018 r = r600_bytecode_add_alu(ctx->bc, &alu); 6019 if (r) 6020 return r; 6021 } 6022 6023 /* dst = (tmp > 0 ? 1 : tmp) */ 6024 for (i = 0; i < 4; i++) { 6025 if (!(write_mask & (1<<i))) 6026 continue; 6027 6028 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6029 alu.op = ALU_OP3_CNDGT_INT; 6030 alu.is_op3 = 1; 6031 alu.dst.write = 1; 6032 6033 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6034 6035 alu.src[0].sel = ctx->temp_reg; 6036 alu.src[0].chan = i; 6037 6038 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 6039 6040 alu.src[2].sel = ctx->temp_reg; 6041 alu.src[2].chan = i; 6042 6043 if (i == last_inst) 6044 alu.last = 1; 6045 r = r600_bytecode_add_alu(ctx->bc, &alu); 6046 if (r) 6047 return r; 6048 } 6049 return 0; 6050} 6051 6052 6053 6054static int tgsi_ssg(struct r600_shader_ctx *ctx) 6055{ 6056 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6057 struct r600_bytecode_alu alu; 6058 int i, r; 6059 6060 /* tmp = (src > 0 ? 1 : src) */ 6061 for (i = 0; i < 4; i++) { 6062 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6063 alu.op = ALU_OP3_CNDGT; 6064 alu.is_op3 = 1; 6065 6066 alu.dst.sel = ctx->temp_reg; 6067 alu.dst.chan = i; 6068 6069 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6070 alu.src[1].sel = V_SQ_ALU_SRC_1; 6071 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 6072 6073 if (i == 3) 6074 alu.last = 1; 6075 r = r600_bytecode_add_alu(ctx->bc, &alu); 6076 if (r) 6077 return r; 6078 } 6079 6080 /* dst = (-tmp > 0 ? -1 : tmp) */ 6081 for (i = 0; i < 4; i++) { 6082 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6083 alu.op = ALU_OP3_CNDGT; 6084 alu.is_op3 = 1; 6085 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6086 6087 alu.src[0].sel = ctx->temp_reg; 6088 alu.src[0].chan = i; 6089 alu.src[0].neg = 1; 6090 6091 alu.src[1].sel = V_SQ_ALU_SRC_1; 6092 alu.src[1].neg = 1; 6093 6094 alu.src[2].sel = ctx->temp_reg; 6095 alu.src[2].chan = i; 6096 6097 if (i == 3) 6098 alu.last = 1; 6099 r = r600_bytecode_add_alu(ctx->bc, &alu); 6100 if (r) 6101 return r; 6102 } 6103 return 0; 6104} 6105 6106static int tgsi_bfi(struct r600_shader_ctx *ctx) 6107{ 6108 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6109 struct r600_bytecode_alu alu; 6110 int i, r, t1, t2; 6111 6112 unsigned write_mask = inst->Dst[0].Register.WriteMask; 6113 int last_inst = tgsi_last_instruction(write_mask); 6114 6115 t1 = ctx->temp_reg; 6116 6117 for (i = 0; i < 4; i++) { 6118 if (!(write_mask & (1<<i))) 6119 continue; 6120 6121 /* create mask tmp */ 6122 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6123 alu.op = ALU_OP2_BFM_INT; 6124 alu.dst.sel = t1; 6125 alu.dst.chan = i; 6126 alu.dst.write = 1; 6127 alu.last = i == last_inst; 6128 6129 r600_bytecode_src(&alu.src[0], &ctx->src[3], i); 6130 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6131 6132 r = r600_bytecode_add_alu(ctx->bc, &alu); 6133 if (r) 6134 return r; 6135 } 6136 6137 t2 = r600_get_temp(ctx); 6138 6139 for (i = 0; i < 4; i++) { 6140 if (!(write_mask & (1<<i))) 6141 continue; 6142 6143 /* shift insert left */ 6144 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6145 alu.op = ALU_OP2_LSHL_INT; 6146 alu.dst.sel = t2; 6147 alu.dst.chan = i; 6148 alu.dst.write = 1; 6149 alu.last = i == last_inst; 6150 6151 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 6152 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6153 6154 r = r600_bytecode_add_alu(ctx->bc, &alu); 6155 if (r) 6156 return r; 6157 } 6158 6159 for (i = 0; i < 4; i++) { 6160 if (!(write_mask & (1<<i))) 6161 continue; 6162 6163 /* actual bitfield insert */ 6164 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6165 alu.op = ALU_OP3_BFI_INT; 6166 alu.is_op3 = 1; 6167 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6168 alu.dst.chan = i; 6169 alu.dst.write = 1; 6170 alu.last = i == last_inst; 6171 6172 alu.src[0].sel = t1; 6173 alu.src[0].chan = i; 6174 alu.src[1].sel = t2; 6175 alu.src[1].chan = i; 6176 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 6177 6178 r = r600_bytecode_add_alu(ctx->bc, &alu); 6179 if (r) 6180 return r; 6181 } 6182 6183 return 0; 6184} 6185 6186static int tgsi_msb(struct r600_shader_ctx *ctx) 6187{ 6188 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6189 struct r600_bytecode_alu alu; 6190 int i, r, t1, t2; 6191 6192 unsigned write_mask = inst->Dst[0].Register.WriteMask; 6193 int last_inst = tgsi_last_instruction(write_mask); 6194 6195 assert(ctx->inst_info->op == ALU_OP1_FFBH_INT || 6196 ctx->inst_info->op == ALU_OP1_FFBH_UINT); 6197 6198 t1 = ctx->temp_reg; 6199 6200 /* bit position is indexed from lsb by TGSI, and from msb by the hardware */ 6201 for (i = 0; i < 4; i++) { 6202 if (!(write_mask & (1<<i))) 6203 continue; 6204 6205 /* t1 = FFBH_INT / FFBH_UINT */ 6206 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6207 alu.op = ctx->inst_info->op; 6208 alu.dst.sel = t1; 6209 alu.dst.chan = i; 6210 alu.dst.write = 1; 6211 alu.last = i == last_inst; 6212 6213 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6214 6215 r = r600_bytecode_add_alu(ctx->bc, &alu); 6216 if (r) 6217 return r; 6218 } 6219 6220 t2 = r600_get_temp(ctx); 6221 6222 for (i = 0; i < 4; i++) { 6223 if (!(write_mask & (1<<i))) 6224 continue; 6225 6226 /* t2 = 31 - t1 */ 6227 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6228 alu.op = ALU_OP2_SUB_INT; 6229 alu.dst.sel = t2; 6230 alu.dst.chan = i; 6231 alu.dst.write = 1; 6232 alu.last = i == last_inst; 6233 6234 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 6235 alu.src[0].value = 31; 6236 alu.src[1].sel = t1; 6237 alu.src[1].chan = i; 6238 6239 r = r600_bytecode_add_alu(ctx->bc, &alu); 6240 if (r) 6241 return r; 6242 } 6243 6244 for (i = 0; i < 4; i++) { 6245 if (!(write_mask & (1<<i))) 6246 continue; 6247 6248 /* result = t1 >= 0 ? t2 : t1 */ 6249 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6250 alu.op = ALU_OP3_CNDGE_INT; 6251 alu.is_op3 = 1; 6252 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6253 alu.dst.chan = i; 6254 alu.dst.write = 1; 6255 alu.last = i == last_inst; 6256 6257 alu.src[0].sel = t1; 6258 alu.src[0].chan = i; 6259 alu.src[1].sel = t2; 6260 alu.src[1].chan = i; 6261 alu.src[2].sel = t1; 6262 alu.src[2].chan = i; 6263 6264 r = r600_bytecode_add_alu(ctx->bc, &alu); 6265 if (r) 6266 return r; 6267 } 6268 6269 return 0; 6270} 6271 6272static int tgsi_interp_egcm(struct r600_shader_ctx *ctx) 6273{ 6274 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6275 struct r600_bytecode_alu alu; 6276 int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti; 6277 unsigned location; 6278 int input; 6279 6280 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); 6281 6282 input = inst->Src[0].Register.Index; 6283 6284 /* Interpolators have been marked for use already by allocate_system_value_inputs */ 6285 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6286 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6287 location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */ 6288 } 6289 else { 6290 location = TGSI_INTERPOLATE_LOC_CENTROID; 6291 } 6292 6293 k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location); 6294 if (k < 0) 6295 k = 0; 6296 interp_gpr = ctx->eg_interpolators[k].ij_index / 2; 6297 interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2); 6298 6299 /* NOTE: currently offset is not perspective correct */ 6300 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6301 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6302 int sample_gpr = -1; 6303 int gradientsH, gradientsV; 6304 struct r600_bytecode_tex tex; 6305 6306 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6307 sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]); 6308 } 6309 6310 gradientsH = r600_get_temp(ctx); 6311 gradientsV = r600_get_temp(ctx); 6312 for (i = 0; i < 2; i++) { 6313 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6314 tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V; 6315 tex.src_gpr = interp_gpr; 6316 tex.src_sel_x = interp_base_chan + 0; 6317 tex.src_sel_y = interp_base_chan + 1; 6318 tex.src_sel_z = 0; 6319 tex.src_sel_w = 0; 6320 tex.dst_gpr = i == 0 ? gradientsH : gradientsV; 6321 tex.dst_sel_x = 0; 6322 tex.dst_sel_y = 1; 6323 tex.dst_sel_z = 7; 6324 tex.dst_sel_w = 7; 6325 tex.inst_mod = 1; // Use per pixel gradient calculation 6326 tex.sampler_id = 0; 6327 tex.resource_id = tex.sampler_id; 6328 r = r600_bytecode_add_tex(ctx->bc, &tex); 6329 if (r) 6330 return r; 6331 } 6332 6333 for (i = 0; i < 2; i++) { 6334 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6335 alu.op = ALU_OP3_MULADD; 6336 alu.is_op3 = 1; 6337 alu.src[0].sel = gradientsH; 6338 alu.src[0].chan = i; 6339 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6340 alu.src[1].sel = sample_gpr; 6341 alu.src[1].chan = 2; 6342 } 6343 else { 6344 r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 6345 } 6346 alu.src[2].sel = interp_gpr; 6347 alu.src[2].chan = interp_base_chan + i; 6348 alu.dst.sel = ctx->temp_reg; 6349 alu.dst.chan = i; 6350 alu.last = i == 1; 6351 6352 r = r600_bytecode_add_alu(ctx->bc, &alu); 6353 if (r) 6354 return r; 6355 } 6356 6357 for (i = 0; i < 2; i++) { 6358 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6359 alu.op = ALU_OP3_MULADD; 6360 alu.is_op3 = 1; 6361 alu.src[0].sel = gradientsV; 6362 alu.src[0].chan = i; 6363 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6364 alu.src[1].sel = sample_gpr; 6365 alu.src[1].chan = 3; 6366 } 6367 else { 6368 r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 6369 } 6370 alu.src[2].sel = ctx->temp_reg; 6371 alu.src[2].chan = i; 6372 alu.dst.sel = ctx->temp_reg; 6373 alu.dst.chan = i; 6374 alu.last = i == 1; 6375 6376 r = r600_bytecode_add_alu(ctx->bc, &alu); 6377 if (r) 6378 return r; 6379 } 6380 } 6381 6382 tmp = r600_get_temp(ctx); 6383 for (i = 0; i < 8; i++) { 6384 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6385 alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY; 6386 6387 alu.dst.sel = tmp; 6388 if ((i > 1 && i < 6)) { 6389 alu.dst.write = 1; 6390 } 6391 else { 6392 alu.dst.write = 0; 6393 } 6394 alu.dst.chan = i % 4; 6395 6396 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6397 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6398 alu.src[0].sel = ctx->temp_reg; 6399 alu.src[0].chan = 1 - (i % 2); 6400 } else { 6401 alu.src[0].sel = interp_gpr; 6402 alu.src[0].chan = interp_base_chan + 1 - (i % 2); 6403 } 6404 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 6405 alu.src[1].chan = 0; 6406 6407 alu.last = i % 4 == 3; 6408 alu.bank_swizzle_force = SQ_ALU_VEC_210; 6409 6410 r = r600_bytecode_add_alu(ctx->bc, &alu); 6411 if (r) 6412 return r; 6413 } 6414 6415 // INTERP can't swizzle dst 6416 lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6417 for (i = 0; i <= lasti; i++) { 6418 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6419 continue; 6420 6421 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6422 alu.op = ALU_OP1_MOV; 6423 alu.src[0].sel = tmp; 6424 alu.src[0].chan = ctx->src[0].swizzle[i]; 6425 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6426 alu.dst.write = 1; 6427 alu.last = i == lasti; 6428 r = r600_bytecode_add_alu(ctx->bc, &alu); 6429 if (r) 6430 return r; 6431 } 6432 6433 return 0; 6434} 6435 6436 6437static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 6438{ 6439 struct r600_bytecode_alu alu; 6440 int i, r; 6441 6442 for (i = 0; i < 4; i++) { 6443 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6444 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 6445 alu.op = ALU_OP0_NOP; 6446 alu.dst.chan = i; 6447 } else { 6448 alu.op = ALU_OP1_MOV; 6449 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6450 alu.src[0].sel = ctx->temp_reg; 6451 alu.src[0].chan = i; 6452 } 6453 if (i == 3) { 6454 alu.last = 1; 6455 } 6456 r = r600_bytecode_add_alu(ctx->bc, &alu); 6457 if (r) 6458 return r; 6459 } 6460 return 0; 6461} 6462 6463static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, 6464 unsigned temp, int chan, 6465 struct r600_bytecode_alu_src *bc_src, 6466 const struct r600_shader_src *shader_src) 6467{ 6468 struct r600_bytecode_alu alu; 6469 int r; 6470 6471 r600_bytecode_src(bc_src, shader_src, chan); 6472 6473 /* op3 operands don't support abs modifier */ 6474 if (bc_src->abs) { 6475 assert(temp!=0); /* we actually need the extra register, make sure it is allocated. */ 6476 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6477 alu.op = ALU_OP1_MOV; 6478 alu.dst.sel = temp; 6479 alu.dst.chan = chan; 6480 alu.dst.write = 1; 6481 6482 alu.src[0] = *bc_src; 6483 alu.last = true; // sufficient? 6484 r = r600_bytecode_add_alu(ctx->bc, &alu); 6485 if (r) 6486 return r; 6487 6488 memset(bc_src, 0, sizeof(*bc_src)); 6489 bc_src->sel = temp; 6490 bc_src->chan = chan; 6491 } 6492 return 0; 6493} 6494 6495static int tgsi_op3(struct r600_shader_ctx *ctx) 6496{ 6497 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6498 struct r600_bytecode_alu alu; 6499 int i, j, r; 6500 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6501 int temp_regs[4]; 6502 6503 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6504 temp_regs[j] = 0; 6505 if (ctx->src[j].abs) 6506 temp_regs[j] = r600_get_temp(ctx); 6507 } 6508 for (i = 0; i < lasti + 1; i++) { 6509 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6510 continue; 6511 6512 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6513 alu.op = ctx->inst_info->op; 6514 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6515 r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]); 6516 if (r) 6517 return r; 6518 } 6519 6520 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6521 alu.dst.chan = i; 6522 alu.dst.write = 1; 6523 alu.is_op3 = 1; 6524 if (i == lasti) { 6525 alu.last = 1; 6526 } 6527 r = r600_bytecode_add_alu(ctx->bc, &alu); 6528 if (r) 6529 return r; 6530 } 6531 return 0; 6532} 6533 6534static int tgsi_dp(struct r600_shader_ctx *ctx) 6535{ 6536 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6537 struct r600_bytecode_alu alu; 6538 int i, j, r; 6539 6540 for (i = 0; i < 4; i++) { 6541 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6542 alu.op = ctx->inst_info->op; 6543 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6544 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 6545 } 6546 6547 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6548 alu.dst.chan = i; 6549 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 6550 /* handle some special cases */ 6551 switch (inst->Instruction.Opcode) { 6552 case TGSI_OPCODE_DP2: 6553 if (i > 1) { 6554 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 6555 alu.src[0].chan = alu.src[1].chan = 0; 6556 } 6557 break; 6558 case TGSI_OPCODE_DP3: 6559 if (i > 2) { 6560 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 6561 alu.src[0].chan = alu.src[1].chan = 0; 6562 } 6563 break; 6564 case TGSI_OPCODE_DPH: 6565 if (i == 3) { 6566 alu.src[0].sel = V_SQ_ALU_SRC_1; 6567 alu.src[0].chan = 0; 6568 alu.src[0].neg = 0; 6569 } 6570 break; 6571 default: 6572 break; 6573 } 6574 if (i == 3) { 6575 alu.last = 1; 6576 } 6577 r = r600_bytecode_add_alu(ctx->bc, &alu); 6578 if (r) 6579 return r; 6580 } 6581 return 0; 6582} 6583 6584static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 6585 unsigned index) 6586{ 6587 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6588 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 6589 inst->Src[index].Register.File != TGSI_FILE_INPUT && 6590 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 6591 ctx->src[index].neg || ctx->src[index].abs || 6592 (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == TGSI_PROCESSOR_GEOMETRY); 6593} 6594 6595static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 6596 unsigned index) 6597{ 6598 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6599 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 6600} 6601 6602static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) 6603{ 6604 struct r600_bytecode_vtx vtx; 6605 struct r600_bytecode_alu alu; 6606 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6607 int src_gpr, r, i; 6608 int id = tgsi_tex_get_src_gpr(ctx, 1); 6609 6610 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 6611 if (src_requires_loading) { 6612 for (i = 0; i < 4; i++) { 6613 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6614 alu.op = ALU_OP1_MOV; 6615 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6616 alu.dst.sel = ctx->temp_reg; 6617 alu.dst.chan = i; 6618 if (i == 3) 6619 alu.last = 1; 6620 alu.dst.write = 1; 6621 r = r600_bytecode_add_alu(ctx->bc, &alu); 6622 if (r) 6623 return r; 6624 } 6625 src_gpr = ctx->temp_reg; 6626 } 6627 6628 memset(&vtx, 0, sizeof(vtx)); 6629 vtx.op = FETCH_OP_VFETCH; 6630 vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; 6631 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 6632 vtx.src_gpr = src_gpr; 6633 vtx.mega_fetch_count = 16; 6634 vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 6635 vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 6636 vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 6637 vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 6638 vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 6639 vtx.use_const_fields = 1; 6640 6641 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 6642 return r; 6643 6644 if (ctx->bc->chip_class >= EVERGREEN) 6645 return 0; 6646 6647 for (i = 0; i < 4; i++) { 6648 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6649 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6650 continue; 6651 6652 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6653 alu.op = ALU_OP2_AND_INT; 6654 6655 alu.dst.chan = i; 6656 alu.dst.sel = vtx.dst_gpr; 6657 alu.dst.write = 1; 6658 6659 alu.src[0].sel = vtx.dst_gpr; 6660 alu.src[0].chan = i; 6661 6662 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL; 6663 alu.src[1].sel += (id * 2); 6664 alu.src[1].chan = i % 4; 6665 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6666 6667 if (i == lasti) 6668 alu.last = 1; 6669 r = r600_bytecode_add_alu(ctx->bc, &alu); 6670 if (r) 6671 return r; 6672 } 6673 6674 if (inst->Dst[0].Register.WriteMask & 3) { 6675 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6676 alu.op = ALU_OP2_OR_INT; 6677 6678 alu.dst.chan = 3; 6679 alu.dst.sel = vtx.dst_gpr; 6680 alu.dst.write = 1; 6681 6682 alu.src[0].sel = vtx.dst_gpr; 6683 alu.src[0].chan = 3; 6684 6685 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1; 6686 alu.src[1].chan = 0; 6687 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6688 6689 alu.last = 1; 6690 r = r600_bytecode_add_alu(ctx->bc, &alu); 6691 if (r) 6692 return r; 6693 } 6694 return 0; 6695} 6696 6697static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) 6698{ 6699 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6700 struct r600_bytecode_alu alu; 6701 int r; 6702 int id = tgsi_tex_get_src_gpr(ctx, 1); 6703 6704 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6705 alu.op = ALU_OP1_MOV; 6706 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 6707 if (ctx->bc->chip_class >= EVERGREEN) { 6708 /* channel 0 or 2 of each word */ 6709 alu.src[0].sel += (id / 2); 6710 alu.src[0].chan = (id % 2) * 2; 6711 } else { 6712 /* r600 we have them at channel 2 of the second dword */ 6713 alu.src[0].sel += (id * 2) + 1; 6714 alu.src[0].chan = 1; 6715 } 6716 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6717 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 6718 alu.last = 1; 6719 r = r600_bytecode_add_alu(ctx->bc, &alu); 6720 if (r) 6721 return r; 6722 return 0; 6723} 6724 6725static int tgsi_tex(struct r600_shader_ctx *ctx) 6726{ 6727 static float one_point_five = 1.5f; 6728 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6729 struct r600_bytecode_tex tex; 6730 struct r600_bytecode_alu alu; 6731 unsigned src_gpr; 6732 int r, i, j; 6733 int opcode; 6734 bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing && 6735 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 6736 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || 6737 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); 6738 6739 bool txf_add_offsets = inst->Texture.NumOffsets && 6740 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 6741 inst->Texture.Texture != TGSI_TEXTURE_BUFFER; 6742 6743 /* Texture fetch instructions can only use gprs as source. 6744 * Also they cannot negate the source or take the absolute value */ 6745 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && 6746 inst->Instruction.Opcode != TGSI_OPCODE_TXQS && 6747 tgsi_tex_src_requires_loading(ctx, 0)) || 6748 read_compressed_msaa || txf_add_offsets; 6749 6750 boolean src_loaded = FALSE; 6751 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; 6752 int8_t offset_x = 0, offset_y = 0, offset_z = 0; 6753 boolean has_txq_cube_array_z = false; 6754 unsigned sampler_index_mode; 6755 6756 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && 6757 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6758 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) 6759 if (inst->Dst[0].Register.WriteMask & 4) { 6760 ctx->shader->has_txq_cube_array_z_comp = true; 6761 has_txq_cube_array_z = true; 6762 } 6763 6764 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 6765 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 6766 inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || 6767 inst->Instruction.Opcode == TGSI_OPCODE_TG4) 6768 sampler_src_reg = 2; 6769 6770 /* TGSI moves the sampler to src reg 3 for TXD */ 6771 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) 6772 sampler_src_reg = 3; 6773 6774 sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 6775 6776 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 6777 6778 if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { 6779 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { 6780 ctx->shader->uses_tex_buffers = true; 6781 return r600_do_buffer_txq(ctx); 6782 } 6783 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 6784 if (ctx->bc->chip_class < EVERGREEN) 6785 ctx->shader->uses_tex_buffers = true; 6786 return do_vtx_fetch_inst(ctx, src_requires_loading); 6787 } 6788 } 6789 6790 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 6791 int out_chan; 6792 /* Add perspective divide */ 6793 if (ctx->bc->chip_class == CAYMAN) { 6794 out_chan = 2; 6795 for (i = 0; i < 3; i++) { 6796 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6797 alu.op = ALU_OP1_RECIP_IEEE; 6798 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6799 6800 alu.dst.sel = ctx->temp_reg; 6801 alu.dst.chan = i; 6802 if (i == 2) 6803 alu.last = 1; 6804 if (out_chan == i) 6805 alu.dst.write = 1; 6806 r = r600_bytecode_add_alu(ctx->bc, &alu); 6807 if (r) 6808 return r; 6809 } 6810 6811 } else { 6812 out_chan = 3; 6813 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6814 alu.op = ALU_OP1_RECIP_IEEE; 6815 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6816 6817 alu.dst.sel = ctx->temp_reg; 6818 alu.dst.chan = out_chan; 6819 alu.last = 1; 6820 alu.dst.write = 1; 6821 r = r600_bytecode_add_alu(ctx->bc, &alu); 6822 if (r) 6823 return r; 6824 } 6825 6826 for (i = 0; i < 3; i++) { 6827 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6828 alu.op = ALU_OP2_MUL; 6829 alu.src[0].sel = ctx->temp_reg; 6830 alu.src[0].chan = out_chan; 6831 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6832 alu.dst.sel = ctx->temp_reg; 6833 alu.dst.chan = i; 6834 alu.dst.write = 1; 6835 r = r600_bytecode_add_alu(ctx->bc, &alu); 6836 if (r) 6837 return r; 6838 } 6839 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6840 alu.op = ALU_OP1_MOV; 6841 alu.src[0].sel = V_SQ_ALU_SRC_1; 6842 alu.src[0].chan = 0; 6843 alu.dst.sel = ctx->temp_reg; 6844 alu.dst.chan = 3; 6845 alu.last = 1; 6846 alu.dst.write = 1; 6847 r = r600_bytecode_add_alu(ctx->bc, &alu); 6848 if (r) 6849 return r; 6850 src_loaded = TRUE; 6851 src_gpr = ctx->temp_reg; 6852 } 6853 6854 6855 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 6856 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6857 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6858 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 6859 inst->Instruction.Opcode != TGSI_OPCODE_TXQ && 6860 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { 6861 6862 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 6863 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 6864 6865 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 6866 for (i = 0; i < 4; i++) { 6867 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6868 alu.op = ALU_OP2_CUBE; 6869 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 6870 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 6871 alu.dst.sel = ctx->temp_reg; 6872 alu.dst.chan = i; 6873 if (i == 3) 6874 alu.last = 1; 6875 alu.dst.write = 1; 6876 r = r600_bytecode_add_alu(ctx->bc, &alu); 6877 if (r) 6878 return r; 6879 } 6880 6881 /* tmp1.z = RCP_e(|tmp1.z|) */ 6882 if (ctx->bc->chip_class == CAYMAN) { 6883 for (i = 0; i < 3; i++) { 6884 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6885 alu.op = ALU_OP1_RECIP_IEEE; 6886 alu.src[0].sel = ctx->temp_reg; 6887 alu.src[0].chan = 2; 6888 alu.src[0].abs = 1; 6889 alu.dst.sel = ctx->temp_reg; 6890 alu.dst.chan = i; 6891 if (i == 2) 6892 alu.dst.write = 1; 6893 if (i == 2) 6894 alu.last = 1; 6895 r = r600_bytecode_add_alu(ctx->bc, &alu); 6896 if (r) 6897 return r; 6898 } 6899 } else { 6900 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6901 alu.op = ALU_OP1_RECIP_IEEE; 6902 alu.src[0].sel = ctx->temp_reg; 6903 alu.src[0].chan = 2; 6904 alu.src[0].abs = 1; 6905 alu.dst.sel = ctx->temp_reg; 6906 alu.dst.chan = 2; 6907 alu.dst.write = 1; 6908 alu.last = 1; 6909 r = r600_bytecode_add_alu(ctx->bc, &alu); 6910 if (r) 6911 return r; 6912 } 6913 6914 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 6915 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 6916 * muladd has no writemask, have to use another temp 6917 */ 6918 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6919 alu.op = ALU_OP3_MULADD; 6920 alu.is_op3 = 1; 6921 6922 alu.src[0].sel = ctx->temp_reg; 6923 alu.src[0].chan = 0; 6924 alu.src[1].sel = ctx->temp_reg; 6925 alu.src[1].chan = 2; 6926 6927 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 6928 alu.src[2].chan = 0; 6929 alu.src[2].value = *(uint32_t *)&one_point_five; 6930 6931 alu.dst.sel = ctx->temp_reg; 6932 alu.dst.chan = 0; 6933 alu.dst.write = 1; 6934 6935 r = r600_bytecode_add_alu(ctx->bc, &alu); 6936 if (r) 6937 return r; 6938 6939 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6940 alu.op = ALU_OP3_MULADD; 6941 alu.is_op3 = 1; 6942 6943 alu.src[0].sel = ctx->temp_reg; 6944 alu.src[0].chan = 1; 6945 alu.src[1].sel = ctx->temp_reg; 6946 alu.src[1].chan = 2; 6947 6948 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 6949 alu.src[2].chan = 0; 6950 alu.src[2].value = *(uint32_t *)&one_point_five; 6951 6952 alu.dst.sel = ctx->temp_reg; 6953 alu.dst.chan = 1; 6954 alu.dst.write = 1; 6955 6956 alu.last = 1; 6957 r = r600_bytecode_add_alu(ctx->bc, &alu); 6958 if (r) 6959 return r; 6960 /* write initial compare value into Z component 6961 - W src 0 for shadow cube 6962 - X src 1 for shadow cube array */ 6963 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6964 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6965 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6966 alu.op = ALU_OP1_MOV; 6967 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) 6968 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 6969 else 6970 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6971 alu.dst.sel = ctx->temp_reg; 6972 alu.dst.chan = 2; 6973 alu.dst.write = 1; 6974 alu.last = 1; 6975 r = r600_bytecode_add_alu(ctx->bc, &alu); 6976 if (r) 6977 return r; 6978 } 6979 6980 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6981 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6982 if (ctx->bc->chip_class >= EVERGREEN) { 6983 int mytmp = r600_get_temp(ctx); 6984 static const float eight = 8.0f; 6985 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6986 alu.op = ALU_OP1_MOV; 6987 alu.src[0].sel = ctx->temp_reg; 6988 alu.src[0].chan = 3; 6989 alu.dst.sel = mytmp; 6990 alu.dst.chan = 0; 6991 alu.dst.write = 1; 6992 alu.last = 1; 6993 r = r600_bytecode_add_alu(ctx->bc, &alu); 6994 if (r) 6995 return r; 6996 6997 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ 6998 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6999 alu.op = ALU_OP3_MULADD; 7000 alu.is_op3 = 1; 7001 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7002 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7003 alu.src[1].chan = 0; 7004 alu.src[1].value = *(uint32_t *)&eight; 7005 alu.src[2].sel = mytmp; 7006 alu.src[2].chan = 0; 7007 alu.dst.sel = ctx->temp_reg; 7008 alu.dst.chan = 3; 7009 alu.dst.write = 1; 7010 alu.last = 1; 7011 r = r600_bytecode_add_alu(ctx->bc, &alu); 7012 if (r) 7013 return r; 7014 } else if (ctx->bc->chip_class < EVERGREEN) { 7015 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7016 tex.op = FETCH_OP_SET_CUBEMAP_INDEX; 7017 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7018 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7019 tex.src_gpr = r600_get_temp(ctx); 7020 tex.src_sel_x = 0; 7021 tex.src_sel_y = 0; 7022 tex.src_sel_z = 0; 7023 tex.src_sel_w = 0; 7024 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 7025 tex.coord_type_x = 1; 7026 tex.coord_type_y = 1; 7027 tex.coord_type_z = 1; 7028 tex.coord_type_w = 1; 7029 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7030 alu.op = ALU_OP1_MOV; 7031 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7032 alu.dst.sel = tex.src_gpr; 7033 alu.dst.chan = 0; 7034 alu.last = 1; 7035 alu.dst.write = 1; 7036 r = r600_bytecode_add_alu(ctx->bc, &alu); 7037 if (r) 7038 return r; 7039 7040 r = r600_bytecode_add_tex(ctx->bc, &tex); 7041 if (r) 7042 return r; 7043 } 7044 7045 } 7046 7047 /* for cube forms of lod and bias we need to route things */ 7048 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || 7049 inst->Instruction.Opcode == TGSI_OPCODE_TXL || 7050 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 7051 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { 7052 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7053 alu.op = ALU_OP1_MOV; 7054 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 7055 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 7056 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 7057 else 7058 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7059 alu.dst.sel = ctx->temp_reg; 7060 alu.dst.chan = 2; 7061 alu.last = 1; 7062 alu.dst.write = 1; 7063 r = r600_bytecode_add_alu(ctx->bc, &alu); 7064 if (r) 7065 return r; 7066 } 7067 7068 src_loaded = TRUE; 7069 src_gpr = ctx->temp_reg; 7070 } 7071 7072 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 7073 int temp_h = 0, temp_v = 0; 7074 int start_val = 0; 7075 7076 /* if we've already loaded the src (i.e. CUBE don't reload it). */ 7077 if (src_loaded == TRUE) 7078 start_val = 1; 7079 else 7080 src_loaded = TRUE; 7081 for (i = start_val; i < 3; i++) { 7082 int treg = r600_get_temp(ctx); 7083 7084 if (i == 0) 7085 src_gpr = treg; 7086 else if (i == 1) 7087 temp_h = treg; 7088 else 7089 temp_v = treg; 7090 7091 for (j = 0; j < 4; j++) { 7092 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7093 alu.op = ALU_OP1_MOV; 7094 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 7095 alu.dst.sel = treg; 7096 alu.dst.chan = j; 7097 if (j == 3) 7098 alu.last = 1; 7099 alu.dst.write = 1; 7100 r = r600_bytecode_add_alu(ctx->bc, &alu); 7101 if (r) 7102 return r; 7103 } 7104 } 7105 for (i = 1; i < 3; i++) { 7106 /* set gradients h/v */ 7107 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7108 tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H : 7109 FETCH_OP_SET_GRADIENTS_V; 7110 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7111 tex.sampler_index_mode = sampler_index_mode; 7112 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7113 tex.resource_index_mode = sampler_index_mode; 7114 7115 tex.src_gpr = (i == 1) ? temp_h : temp_v; 7116 tex.src_sel_x = 0; 7117 tex.src_sel_y = 1; 7118 tex.src_sel_z = 2; 7119 tex.src_sel_w = 3; 7120 7121 tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */ 7122 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 7123 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 7124 tex.coord_type_x = 1; 7125 tex.coord_type_y = 1; 7126 tex.coord_type_z = 1; 7127 tex.coord_type_w = 1; 7128 } 7129 r = r600_bytecode_add_tex(ctx->bc, &tex); 7130 if (r) 7131 return r; 7132 } 7133 } 7134 7135 if (src_requires_loading && !src_loaded) { 7136 for (i = 0; i < 4; i++) { 7137 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7138 alu.op = ALU_OP1_MOV; 7139 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7140 alu.dst.sel = ctx->temp_reg; 7141 alu.dst.chan = i; 7142 if (i == 3) 7143 alu.last = 1; 7144 alu.dst.write = 1; 7145 r = r600_bytecode_add_alu(ctx->bc, &alu); 7146 if (r) 7147 return r; 7148 } 7149 src_loaded = TRUE; 7150 src_gpr = ctx->temp_reg; 7151 } 7152 7153 /* get offset values */ 7154 if (inst->Texture.NumOffsets) { 7155 assert(inst->Texture.NumOffsets == 1); 7156 7157 /* The texture offset feature doesn't work with the TXF instruction 7158 * and must be emulated by adding the offset to the texture coordinates. */ 7159 if (txf_add_offsets) { 7160 const struct tgsi_texture_offset *off = inst->TexOffsets; 7161 7162 switch (inst->Texture.Texture) { 7163 case TGSI_TEXTURE_3D: 7164 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7165 alu.op = ALU_OP2_ADD_INT; 7166 alu.src[0].sel = src_gpr; 7167 alu.src[0].chan = 2; 7168 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7169 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ]; 7170 alu.dst.sel = src_gpr; 7171 alu.dst.chan = 2; 7172 alu.dst.write = 1; 7173 alu.last = 1; 7174 r = r600_bytecode_add_alu(ctx->bc, &alu); 7175 if (r) 7176 return r; 7177 /* fall through */ 7178 7179 case TGSI_TEXTURE_2D: 7180 case TGSI_TEXTURE_SHADOW2D: 7181 case TGSI_TEXTURE_RECT: 7182 case TGSI_TEXTURE_SHADOWRECT: 7183 case TGSI_TEXTURE_2D_ARRAY: 7184 case TGSI_TEXTURE_SHADOW2D_ARRAY: 7185 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7186 alu.op = ALU_OP2_ADD_INT; 7187 alu.src[0].sel = src_gpr; 7188 alu.src[0].chan = 1; 7189 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7190 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY]; 7191 alu.dst.sel = src_gpr; 7192 alu.dst.chan = 1; 7193 alu.dst.write = 1; 7194 alu.last = 1; 7195 r = r600_bytecode_add_alu(ctx->bc, &alu); 7196 if (r) 7197 return r; 7198 /* fall through */ 7199 7200 case TGSI_TEXTURE_1D: 7201 case TGSI_TEXTURE_SHADOW1D: 7202 case TGSI_TEXTURE_1D_ARRAY: 7203 case TGSI_TEXTURE_SHADOW1D_ARRAY: 7204 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7205 alu.op = ALU_OP2_ADD_INT; 7206 alu.src[0].sel = src_gpr; 7207 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7208 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX]; 7209 alu.dst.sel = src_gpr; 7210 alu.dst.write = 1; 7211 alu.last = 1; 7212 r = r600_bytecode_add_alu(ctx->bc, &alu); 7213 if (r) 7214 return r; 7215 break; 7216 /* texture offsets do not apply to other texture targets */ 7217 } 7218 } else { 7219 switch (inst->Texture.Texture) { 7220 case TGSI_TEXTURE_3D: 7221 offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 7222 /* fallthrough */ 7223 case TGSI_TEXTURE_2D: 7224 case TGSI_TEXTURE_SHADOW2D: 7225 case TGSI_TEXTURE_RECT: 7226 case TGSI_TEXTURE_SHADOWRECT: 7227 case TGSI_TEXTURE_2D_ARRAY: 7228 case TGSI_TEXTURE_SHADOW2D_ARRAY: 7229 offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 7230 /* fallthrough */ 7231 case TGSI_TEXTURE_1D: 7232 case TGSI_TEXTURE_SHADOW1D: 7233 case TGSI_TEXTURE_1D_ARRAY: 7234 case TGSI_TEXTURE_SHADOW1D_ARRAY: 7235 offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 7236 } 7237 } 7238 } 7239 7240 /* Obtain the sample index for reading a compressed MSAA color texture. 7241 * To read the FMASK, we use the ldfptr instruction, which tells us 7242 * where the samples are stored. 7243 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, 7244 * which is the identity mapping. Each nibble says which physical sample 7245 * should be fetched to get that sample. 7246 * 7247 * Assume src.z contains the sample index. It should be modified like this: 7248 * src.z = (ldfptr() >> (src.z * 4)) & 0xF; 7249 * Then fetch the texel with src. 7250 */ 7251 if (read_compressed_msaa) { 7252 unsigned sample_chan = 3; 7253 unsigned temp = r600_get_temp(ctx); 7254 assert(src_loaded); 7255 7256 /* temp.w = ldfptr() */ 7257 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7258 tex.op = FETCH_OP_LD; 7259 tex.inst_mod = 1; /* to indicate this is ldfptr */ 7260 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7261 tex.sampler_index_mode = sampler_index_mode; 7262 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7263 tex.resource_index_mode = sampler_index_mode; 7264 tex.src_gpr = src_gpr; 7265 tex.dst_gpr = temp; 7266 tex.dst_sel_x = 7; /* mask out these components */ 7267 tex.dst_sel_y = 7; 7268 tex.dst_sel_z = 7; 7269 tex.dst_sel_w = 0; /* store X */ 7270 tex.src_sel_x = 0; 7271 tex.src_sel_y = 1; 7272 tex.src_sel_z = 2; 7273 tex.src_sel_w = 3; 7274 tex.offset_x = offset_x; 7275 tex.offset_y = offset_y; 7276 tex.offset_z = offset_z; 7277 r = r600_bytecode_add_tex(ctx->bc, &tex); 7278 if (r) 7279 return r; 7280 7281 /* temp.x = sample_index*4 */ 7282 if (ctx->bc->chip_class == CAYMAN) { 7283 for (i = 0 ; i < 4; i++) { 7284 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7285 alu.op = ALU_OP2_MULLO_INT; 7286 alu.src[0].sel = src_gpr; 7287 alu.src[0].chan = sample_chan; 7288 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7289 alu.src[1].value = 4; 7290 alu.dst.sel = temp; 7291 alu.dst.chan = i; 7292 alu.dst.write = i == 0; 7293 if (i == 3) 7294 alu.last = 1; 7295 r = r600_bytecode_add_alu(ctx->bc, &alu); 7296 if (r) 7297 return r; 7298 } 7299 } else { 7300 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7301 alu.op = ALU_OP2_MULLO_INT; 7302 alu.src[0].sel = src_gpr; 7303 alu.src[0].chan = sample_chan; 7304 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7305 alu.src[1].value = 4; 7306 alu.dst.sel = temp; 7307 alu.dst.chan = 0; 7308 alu.dst.write = 1; 7309 alu.last = 1; 7310 r = r600_bytecode_add_alu(ctx->bc, &alu); 7311 if (r) 7312 return r; 7313 } 7314 7315 /* sample_index = temp.w >> temp.x */ 7316 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7317 alu.op = ALU_OP2_LSHR_INT; 7318 alu.src[0].sel = temp; 7319 alu.src[0].chan = 3; 7320 alu.src[1].sel = temp; 7321 alu.src[1].chan = 0; 7322 alu.dst.sel = src_gpr; 7323 alu.dst.chan = sample_chan; 7324 alu.dst.write = 1; 7325 alu.last = 1; 7326 r = r600_bytecode_add_alu(ctx->bc, &alu); 7327 if (r) 7328 return r; 7329 7330 /* sample_index & 0xF */ 7331 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7332 alu.op = ALU_OP2_AND_INT; 7333 alu.src[0].sel = src_gpr; 7334 alu.src[0].chan = sample_chan; 7335 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7336 alu.src[1].value = 0xF; 7337 alu.dst.sel = src_gpr; 7338 alu.dst.chan = sample_chan; 7339 alu.dst.write = 1; 7340 alu.last = 1; 7341 r = r600_bytecode_add_alu(ctx->bc, &alu); 7342 if (r) 7343 return r; 7344#if 0 7345 /* visualize the FMASK */ 7346 for (i = 0; i < 4; i++) { 7347 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7348 alu.op = ALU_OP1_INT_TO_FLT; 7349 alu.src[0].sel = src_gpr; 7350 alu.src[0].chan = sample_chan; 7351 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 7352 alu.dst.chan = i; 7353 alu.dst.write = 1; 7354 alu.last = 1; 7355 r = r600_bytecode_add_alu(ctx->bc, &alu); 7356 if (r) 7357 return r; 7358 } 7359 return 0; 7360#endif 7361 } 7362 7363 /* does this shader want a num layers from TXQ for a cube array? */ 7364 if (has_txq_cube_array_z) { 7365 int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7366 7367 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7368 alu.op = ALU_OP1_MOV; 7369 7370 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 7371 if (ctx->bc->chip_class >= EVERGREEN) { 7372 /* channel 1 or 3 of each word */ 7373 alu.src[0].sel += (id / 2); 7374 alu.src[0].chan = ((id % 2) * 2) + 1; 7375 } else { 7376 /* r600 we have them at channel 2 of the second dword */ 7377 alu.src[0].sel += (id * 2) + 1; 7378 alu.src[0].chan = 2; 7379 } 7380 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 7381 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 7382 alu.last = 1; 7383 r = r600_bytecode_add_alu(ctx->bc, &alu); 7384 if (r) 7385 return r; 7386 /* disable writemask from texture instruction */ 7387 inst->Dst[0].Register.WriteMask &= ~4; 7388 } 7389 7390 opcode = ctx->inst_info->op; 7391 if (opcode == FETCH_OP_GATHER4 && 7392 inst->TexOffsets[0].File != TGSI_FILE_NULL && 7393 inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) { 7394 opcode = FETCH_OP_GATHER4_O; 7395 7396 /* GATHER4_O/GATHER4_C_O use offset values loaded by 7397 SET_TEXTURE_OFFSETS instruction. The immediate offset values 7398 encoded in the instruction are ignored. */ 7399 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7400 tex.op = FETCH_OP_SET_TEXTURE_OFFSETS; 7401 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7402 tex.sampler_index_mode = sampler_index_mode; 7403 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7404 tex.resource_index_mode = sampler_index_mode; 7405 7406 tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index; 7407 tex.src_sel_x = inst->TexOffsets[0].SwizzleX; 7408 tex.src_sel_y = inst->TexOffsets[0].SwizzleY; 7409 tex.src_sel_z = inst->TexOffsets[0].SwizzleZ; 7410 tex.src_sel_w = 4; 7411 7412 tex.dst_sel_x = 7; 7413 tex.dst_sel_y = 7; 7414 tex.dst_sel_z = 7; 7415 tex.dst_sel_w = 7; 7416 7417 r = r600_bytecode_add_tex(ctx->bc, &tex); 7418 if (r) 7419 return r; 7420 } 7421 7422 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 7423 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 7424 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 7425 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7426 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 7427 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 7428 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7429 switch (opcode) { 7430 case FETCH_OP_SAMPLE: 7431 opcode = FETCH_OP_SAMPLE_C; 7432 break; 7433 case FETCH_OP_SAMPLE_L: 7434 opcode = FETCH_OP_SAMPLE_C_L; 7435 break; 7436 case FETCH_OP_SAMPLE_LB: 7437 opcode = FETCH_OP_SAMPLE_C_LB; 7438 break; 7439 case FETCH_OP_SAMPLE_G: 7440 opcode = FETCH_OP_SAMPLE_C_G; 7441 break; 7442 /* Texture gather variants */ 7443 case FETCH_OP_GATHER4: 7444 opcode = FETCH_OP_GATHER4_C; 7445 break; 7446 case FETCH_OP_GATHER4_O: 7447 opcode = FETCH_OP_GATHER4_C_O; 7448 break; 7449 } 7450 } 7451 7452 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7453 tex.op = opcode; 7454 7455 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7456 tex.sampler_index_mode = sampler_index_mode; 7457 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7458 tex.resource_index_mode = sampler_index_mode; 7459 tex.src_gpr = src_gpr; 7460 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 7461 7462 if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE || 7463 inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) { 7464 tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */ 7465 } 7466 7467 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) { 7468 int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]; 7469 tex.inst_mod = texture_component_select; 7470 7471 if (ctx->bc->chip_class == CAYMAN) { 7472 /* GATHER4 result order is different from TGSI TG4 */ 7473 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 0 : 7; 7474 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 1 : 7; 7475 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 2 : 7; 7476 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7477 } else { 7478 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7479 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 7480 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7481 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7482 } 7483 } 7484 else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) { 7485 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7486 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7487 tex.dst_sel_z = 7; 7488 tex.dst_sel_w = 7; 7489 } 7490 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 7491 tex.dst_sel_x = 3; 7492 tex.dst_sel_y = 7; 7493 tex.dst_sel_z = 7; 7494 tex.dst_sel_w = 7; 7495 } 7496 else { 7497 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7498 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7499 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 7500 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7501 } 7502 7503 7504 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ || 7505 inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 7506 tex.src_sel_x = 4; 7507 tex.src_sel_y = 4; 7508 tex.src_sel_z = 4; 7509 tex.src_sel_w = 4; 7510 } else if (src_loaded) { 7511 tex.src_sel_x = 0; 7512 tex.src_sel_y = 1; 7513 tex.src_sel_z = 2; 7514 tex.src_sel_w = 3; 7515 } else { 7516 tex.src_sel_x = ctx->src[0].swizzle[0]; 7517 tex.src_sel_y = ctx->src[0].swizzle[1]; 7518 tex.src_sel_z = ctx->src[0].swizzle[2]; 7519 tex.src_sel_w = ctx->src[0].swizzle[3]; 7520 tex.src_rel = ctx->src[0].rel; 7521 } 7522 7523 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || 7524 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7525 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7526 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7527 tex.src_sel_x = 1; 7528 tex.src_sel_y = 0; 7529 tex.src_sel_z = 3; 7530 tex.src_sel_w = 2; /* route Z compare or Lod value into W */ 7531 } 7532 7533 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 7534 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 7535 tex.coord_type_x = 1; 7536 tex.coord_type_y = 1; 7537 } 7538 tex.coord_type_z = 1; 7539 tex.coord_type_w = 1; 7540 7541 tex.offset_x = offset_x; 7542 tex.offset_y = offset_y; 7543 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && 7544 (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 7545 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) { 7546 tex.offset_z = 0; 7547 } 7548 else { 7549 tex.offset_z = offset_z; 7550 } 7551 7552 /* Put the depth for comparison in W. 7553 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 7554 * Some instructions expect the depth in Z. */ 7555 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 7556 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 7557 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 7558 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 7559 opcode != FETCH_OP_SAMPLE_C_L && 7560 opcode != FETCH_OP_SAMPLE_C_LB) { 7561 tex.src_sel_w = tex.src_sel_z; 7562 } 7563 7564 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 7565 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 7566 if (opcode == FETCH_OP_SAMPLE_C_L || 7567 opcode == FETCH_OP_SAMPLE_C_LB) { 7568 /* the array index is read from Y */ 7569 tex.coord_type_y = 0; 7570 } else { 7571 /* the array index is read from Z */ 7572 tex.coord_type_z = 0; 7573 tex.src_sel_z = tex.src_sel_y; 7574 } 7575 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 7576 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 7577 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7578 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 7579 (ctx->bc->chip_class >= EVERGREEN))) 7580 /* the array index is read from Z */ 7581 tex.coord_type_z = 0; 7582 7583 /* mask unused source components */ 7584 if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { 7585 switch (inst->Texture.Texture) { 7586 case TGSI_TEXTURE_2D: 7587 case TGSI_TEXTURE_RECT: 7588 tex.src_sel_z = 7; 7589 tex.src_sel_w = 7; 7590 break; 7591 case TGSI_TEXTURE_1D_ARRAY: 7592 tex.src_sel_y = 7; 7593 tex.src_sel_w = 7; 7594 break; 7595 case TGSI_TEXTURE_1D: 7596 tex.src_sel_y = 7; 7597 tex.src_sel_z = 7; 7598 tex.src_sel_w = 7; 7599 break; 7600 } 7601 } 7602 7603 r = r600_bytecode_add_tex(ctx->bc, &tex); 7604 if (r) 7605 return r; 7606 7607 /* add shadow ambient support - gallium doesn't do it yet */ 7608 return 0; 7609} 7610 7611static int tgsi_lrp(struct r600_shader_ctx *ctx) 7612{ 7613 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7614 struct r600_bytecode_alu alu; 7615 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7616 unsigned i, temp_regs[2]; 7617 int r; 7618 7619 /* optimize if it's just an equal balance */ 7620 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 7621 for (i = 0; i < lasti + 1; i++) { 7622 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7623 continue; 7624 7625 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7626 alu.op = ALU_OP2_ADD; 7627 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 7628 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7629 alu.omod = 3; 7630 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7631 alu.dst.chan = i; 7632 if (i == lasti) { 7633 alu.last = 1; 7634 } 7635 r = r600_bytecode_add_alu(ctx->bc, &alu); 7636 if (r) 7637 return r; 7638 } 7639 return 0; 7640 } 7641 7642 /* 1 - src0 */ 7643 for (i = 0; i < lasti + 1; i++) { 7644 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7645 continue; 7646 7647 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7648 alu.op = ALU_OP2_ADD; 7649 alu.src[0].sel = V_SQ_ALU_SRC_1; 7650 alu.src[0].chan = 0; 7651 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 7652 r600_bytecode_src_toggle_neg(&alu.src[1]); 7653 alu.dst.sel = ctx->temp_reg; 7654 alu.dst.chan = i; 7655 if (i == lasti) { 7656 alu.last = 1; 7657 } 7658 alu.dst.write = 1; 7659 r = r600_bytecode_add_alu(ctx->bc, &alu); 7660 if (r) 7661 return r; 7662 } 7663 7664 /* (1 - src0) * src2 */ 7665 for (i = 0; i < lasti + 1; i++) { 7666 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7667 continue; 7668 7669 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7670 alu.op = ALU_OP2_MUL; 7671 alu.src[0].sel = ctx->temp_reg; 7672 alu.src[0].chan = i; 7673 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7674 alu.dst.sel = ctx->temp_reg; 7675 alu.dst.chan = i; 7676 if (i == lasti) { 7677 alu.last = 1; 7678 } 7679 alu.dst.write = 1; 7680 r = r600_bytecode_add_alu(ctx->bc, &alu); 7681 if (r) 7682 return r; 7683 } 7684 7685 /* src0 * src1 + (1 - src0) * src2 */ 7686 if (ctx->src[0].abs) 7687 temp_regs[0] = r600_get_temp(ctx); 7688 else 7689 temp_regs[0] = 0; 7690 if (ctx->src[1].abs) 7691 temp_regs[1] = r600_get_temp(ctx); 7692 else 7693 temp_regs[1] = 0; 7694 7695 for (i = 0; i < lasti + 1; i++) { 7696 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7697 continue; 7698 7699 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7700 alu.op = ALU_OP3_MULADD; 7701 alu.is_op3 = 1; 7702 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 7703 if (r) 7704 return r; 7705 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[1]); 7706 if (r) 7707 return r; 7708 alu.src[2].sel = ctx->temp_reg; 7709 alu.src[2].chan = i; 7710 7711 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7712 alu.dst.chan = i; 7713 if (i == lasti) { 7714 alu.last = 1; 7715 } 7716 r = r600_bytecode_add_alu(ctx->bc, &alu); 7717 if (r) 7718 return r; 7719 } 7720 return 0; 7721} 7722 7723static int tgsi_cmp(struct r600_shader_ctx *ctx) 7724{ 7725 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7726 struct r600_bytecode_alu alu; 7727 int i, r, j; 7728 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7729 int temp_regs[3]; 7730 7731 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 7732 temp_regs[j] = 0; 7733 if (ctx->src[j].abs) 7734 temp_regs[j] = r600_get_temp(ctx); 7735 } 7736 7737 for (i = 0; i < lasti + 1; i++) { 7738 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7739 continue; 7740 7741 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7742 alu.op = ALU_OP3_CNDGE; 7743 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 7744 if (r) 7745 return r; 7746 r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]); 7747 if (r) 7748 return r; 7749 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]); 7750 if (r) 7751 return r; 7752 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7753 alu.dst.chan = i; 7754 alu.dst.write = 1; 7755 alu.is_op3 = 1; 7756 if (i == lasti) 7757 alu.last = 1; 7758 r = r600_bytecode_add_alu(ctx->bc, &alu); 7759 if (r) 7760 return r; 7761 } 7762 return 0; 7763} 7764 7765static int tgsi_ucmp(struct r600_shader_ctx *ctx) 7766{ 7767 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7768 struct r600_bytecode_alu alu; 7769 int i, r; 7770 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7771 7772 for (i = 0; i < lasti + 1; i++) { 7773 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7774 continue; 7775 7776 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7777 alu.op = ALU_OP3_CNDE_INT; 7778 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7779 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7780 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 7781 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7782 alu.dst.chan = i; 7783 alu.dst.write = 1; 7784 alu.is_op3 = 1; 7785 if (i == lasti) 7786 alu.last = 1; 7787 r = r600_bytecode_add_alu(ctx->bc, &alu); 7788 if (r) 7789 return r; 7790 } 7791 return 0; 7792} 7793 7794static int tgsi_xpd(struct r600_shader_ctx *ctx) 7795{ 7796 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7797 static const unsigned int src0_swizzle[] = {2, 0, 1}; 7798 static const unsigned int src1_swizzle[] = {1, 2, 0}; 7799 struct r600_bytecode_alu alu; 7800 uint32_t use_temp = 0; 7801 int i, r; 7802 7803 if (inst->Dst[0].Register.WriteMask != 0xf) 7804 use_temp = 1; 7805 7806 for (i = 0; i < 4; i++) { 7807 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7808 alu.op = ALU_OP2_MUL; 7809 if (i < 3) { 7810 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 7811 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 7812 } else { 7813 alu.src[0].sel = V_SQ_ALU_SRC_0; 7814 alu.src[0].chan = i; 7815 alu.src[1].sel = V_SQ_ALU_SRC_0; 7816 alu.src[1].chan = i; 7817 } 7818 7819 alu.dst.sel = ctx->temp_reg; 7820 alu.dst.chan = i; 7821 alu.dst.write = 1; 7822 7823 if (i == 3) 7824 alu.last = 1; 7825 r = r600_bytecode_add_alu(ctx->bc, &alu); 7826 if (r) 7827 return r; 7828 } 7829 7830 for (i = 0; i < 4; i++) { 7831 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7832 alu.op = ALU_OP3_MULADD; 7833 7834 if (i < 3) { 7835 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 7836 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 7837 } else { 7838 alu.src[0].sel = V_SQ_ALU_SRC_0; 7839 alu.src[0].chan = i; 7840 alu.src[1].sel = V_SQ_ALU_SRC_0; 7841 alu.src[1].chan = i; 7842 } 7843 7844 alu.src[2].sel = ctx->temp_reg; 7845 alu.src[2].neg = 1; 7846 alu.src[2].chan = i; 7847 7848 if (use_temp) 7849 alu.dst.sel = ctx->temp_reg; 7850 else 7851 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7852 alu.dst.chan = i; 7853 alu.dst.write = 1; 7854 alu.is_op3 = 1; 7855 if (i == 3) 7856 alu.last = 1; 7857 r = r600_bytecode_add_alu(ctx->bc, &alu); 7858 if (r) 7859 return r; 7860 } 7861 if (use_temp) 7862 return tgsi_helper_copy(ctx, inst); 7863 return 0; 7864} 7865 7866static int tgsi_exp(struct r600_shader_ctx *ctx) 7867{ 7868 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7869 struct r600_bytecode_alu alu; 7870 int r; 7871 int i; 7872 7873 /* result.x = 2^floor(src); */ 7874 if (inst->Dst[0].Register.WriteMask & 1) { 7875 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7876 7877 alu.op = ALU_OP1_FLOOR; 7878 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7879 7880 alu.dst.sel = ctx->temp_reg; 7881 alu.dst.chan = 0; 7882 alu.dst.write = 1; 7883 alu.last = 1; 7884 r = r600_bytecode_add_alu(ctx->bc, &alu); 7885 if (r) 7886 return r; 7887 7888 if (ctx->bc->chip_class == CAYMAN) { 7889 for (i = 0; i < 3; i++) { 7890 alu.op = ALU_OP1_EXP_IEEE; 7891 alu.src[0].sel = ctx->temp_reg; 7892 alu.src[0].chan = 0; 7893 7894 alu.dst.sel = ctx->temp_reg; 7895 alu.dst.chan = i; 7896 alu.dst.write = i == 0; 7897 alu.last = i == 2; 7898 r = r600_bytecode_add_alu(ctx->bc, &alu); 7899 if (r) 7900 return r; 7901 } 7902 } else { 7903 alu.op = ALU_OP1_EXP_IEEE; 7904 alu.src[0].sel = ctx->temp_reg; 7905 alu.src[0].chan = 0; 7906 7907 alu.dst.sel = ctx->temp_reg; 7908 alu.dst.chan = 0; 7909 alu.dst.write = 1; 7910 alu.last = 1; 7911 r = r600_bytecode_add_alu(ctx->bc, &alu); 7912 if (r) 7913 return r; 7914 } 7915 } 7916 7917 /* result.y = tmp - floor(tmp); */ 7918 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 7919 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7920 7921 alu.op = ALU_OP1_FRACT; 7922 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7923 7924 alu.dst.sel = ctx->temp_reg; 7925#if 0 7926 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7927 if (r) 7928 return r; 7929#endif 7930 alu.dst.write = 1; 7931 alu.dst.chan = 1; 7932 7933 alu.last = 1; 7934 7935 r = r600_bytecode_add_alu(ctx->bc, &alu); 7936 if (r) 7937 return r; 7938 } 7939 7940 /* result.z = RoughApprox2ToX(tmp);*/ 7941 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 7942 if (ctx->bc->chip_class == CAYMAN) { 7943 for (i = 0; i < 3; i++) { 7944 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7945 alu.op = ALU_OP1_EXP_IEEE; 7946 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7947 7948 alu.dst.sel = ctx->temp_reg; 7949 alu.dst.chan = i; 7950 if (i == 2) { 7951 alu.dst.write = 1; 7952 alu.last = 1; 7953 } 7954 7955 r = r600_bytecode_add_alu(ctx->bc, &alu); 7956 if (r) 7957 return r; 7958 } 7959 } else { 7960 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7961 alu.op = ALU_OP1_EXP_IEEE; 7962 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7963 7964 alu.dst.sel = ctx->temp_reg; 7965 alu.dst.write = 1; 7966 alu.dst.chan = 2; 7967 7968 alu.last = 1; 7969 7970 r = r600_bytecode_add_alu(ctx->bc, &alu); 7971 if (r) 7972 return r; 7973 } 7974 } 7975 7976 /* result.w = 1.0;*/ 7977 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 7978 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7979 7980 alu.op = ALU_OP1_MOV; 7981 alu.src[0].sel = V_SQ_ALU_SRC_1; 7982 alu.src[0].chan = 0; 7983 7984 alu.dst.sel = ctx->temp_reg; 7985 alu.dst.chan = 3; 7986 alu.dst.write = 1; 7987 alu.last = 1; 7988 r = r600_bytecode_add_alu(ctx->bc, &alu); 7989 if (r) 7990 return r; 7991 } 7992 return tgsi_helper_copy(ctx, inst); 7993} 7994 7995static int tgsi_log(struct r600_shader_ctx *ctx) 7996{ 7997 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7998 struct r600_bytecode_alu alu; 7999 int r; 8000 int i; 8001 8002 /* result.x = floor(log2(|src|)); */ 8003 if (inst->Dst[0].Register.WriteMask & 1) { 8004 if (ctx->bc->chip_class == CAYMAN) { 8005 for (i = 0; i < 3; i++) { 8006 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8007 8008 alu.op = ALU_OP1_LOG_IEEE; 8009 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8010 r600_bytecode_src_set_abs(&alu.src[0]); 8011 8012 alu.dst.sel = ctx->temp_reg; 8013 alu.dst.chan = i; 8014 if (i == 0) 8015 alu.dst.write = 1; 8016 if (i == 2) 8017 alu.last = 1; 8018 r = r600_bytecode_add_alu(ctx->bc, &alu); 8019 if (r) 8020 return r; 8021 } 8022 8023 } else { 8024 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8025 8026 alu.op = ALU_OP1_LOG_IEEE; 8027 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8028 r600_bytecode_src_set_abs(&alu.src[0]); 8029 8030 alu.dst.sel = ctx->temp_reg; 8031 alu.dst.chan = 0; 8032 alu.dst.write = 1; 8033 alu.last = 1; 8034 r = r600_bytecode_add_alu(ctx->bc, &alu); 8035 if (r) 8036 return r; 8037 } 8038 8039 alu.op = ALU_OP1_FLOOR; 8040 alu.src[0].sel = ctx->temp_reg; 8041 alu.src[0].chan = 0; 8042 8043 alu.dst.sel = ctx->temp_reg; 8044 alu.dst.chan = 0; 8045 alu.dst.write = 1; 8046 alu.last = 1; 8047 8048 r = r600_bytecode_add_alu(ctx->bc, &alu); 8049 if (r) 8050 return r; 8051 } 8052 8053 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 8054 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 8055 8056 if (ctx->bc->chip_class == CAYMAN) { 8057 for (i = 0; i < 3; i++) { 8058 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8059 8060 alu.op = ALU_OP1_LOG_IEEE; 8061 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8062 r600_bytecode_src_set_abs(&alu.src[0]); 8063 8064 alu.dst.sel = ctx->temp_reg; 8065 alu.dst.chan = i; 8066 if (i == 1) 8067 alu.dst.write = 1; 8068 if (i == 2) 8069 alu.last = 1; 8070 8071 r = r600_bytecode_add_alu(ctx->bc, &alu); 8072 if (r) 8073 return r; 8074 } 8075 } else { 8076 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8077 8078 alu.op = ALU_OP1_LOG_IEEE; 8079 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8080 r600_bytecode_src_set_abs(&alu.src[0]); 8081 8082 alu.dst.sel = ctx->temp_reg; 8083 alu.dst.chan = 1; 8084 alu.dst.write = 1; 8085 alu.last = 1; 8086 8087 r = r600_bytecode_add_alu(ctx->bc, &alu); 8088 if (r) 8089 return r; 8090 } 8091 8092 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8093 8094 alu.op = ALU_OP1_FLOOR; 8095 alu.src[0].sel = ctx->temp_reg; 8096 alu.src[0].chan = 1; 8097 8098 alu.dst.sel = ctx->temp_reg; 8099 alu.dst.chan = 1; 8100 alu.dst.write = 1; 8101 alu.last = 1; 8102 8103 r = r600_bytecode_add_alu(ctx->bc, &alu); 8104 if (r) 8105 return r; 8106 8107 if (ctx->bc->chip_class == CAYMAN) { 8108 for (i = 0; i < 3; i++) { 8109 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8110 alu.op = ALU_OP1_EXP_IEEE; 8111 alu.src[0].sel = ctx->temp_reg; 8112 alu.src[0].chan = 1; 8113 8114 alu.dst.sel = ctx->temp_reg; 8115 alu.dst.chan = i; 8116 if (i == 1) 8117 alu.dst.write = 1; 8118 if (i == 2) 8119 alu.last = 1; 8120 8121 r = r600_bytecode_add_alu(ctx->bc, &alu); 8122 if (r) 8123 return r; 8124 } 8125 } else { 8126 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8127 alu.op = ALU_OP1_EXP_IEEE; 8128 alu.src[0].sel = ctx->temp_reg; 8129 alu.src[0].chan = 1; 8130 8131 alu.dst.sel = ctx->temp_reg; 8132 alu.dst.chan = 1; 8133 alu.dst.write = 1; 8134 alu.last = 1; 8135 8136 r = r600_bytecode_add_alu(ctx->bc, &alu); 8137 if (r) 8138 return r; 8139 } 8140 8141 if (ctx->bc->chip_class == CAYMAN) { 8142 for (i = 0; i < 3; i++) { 8143 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8144 alu.op = ALU_OP1_RECIP_IEEE; 8145 alu.src[0].sel = ctx->temp_reg; 8146 alu.src[0].chan = 1; 8147 8148 alu.dst.sel = ctx->temp_reg; 8149 alu.dst.chan = i; 8150 if (i == 1) 8151 alu.dst.write = 1; 8152 if (i == 2) 8153 alu.last = 1; 8154 8155 r = r600_bytecode_add_alu(ctx->bc, &alu); 8156 if (r) 8157 return r; 8158 } 8159 } else { 8160 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8161 alu.op = ALU_OP1_RECIP_IEEE; 8162 alu.src[0].sel = ctx->temp_reg; 8163 alu.src[0].chan = 1; 8164 8165 alu.dst.sel = ctx->temp_reg; 8166 alu.dst.chan = 1; 8167 alu.dst.write = 1; 8168 alu.last = 1; 8169 8170 r = r600_bytecode_add_alu(ctx->bc, &alu); 8171 if (r) 8172 return r; 8173 } 8174 8175 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8176 8177 alu.op = ALU_OP2_MUL; 8178 8179 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8180 r600_bytecode_src_set_abs(&alu.src[0]); 8181 8182 alu.src[1].sel = ctx->temp_reg; 8183 alu.src[1].chan = 1; 8184 8185 alu.dst.sel = ctx->temp_reg; 8186 alu.dst.chan = 1; 8187 alu.dst.write = 1; 8188 alu.last = 1; 8189 8190 r = r600_bytecode_add_alu(ctx->bc, &alu); 8191 if (r) 8192 return r; 8193 } 8194 8195 /* result.z = log2(|src|);*/ 8196 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 8197 if (ctx->bc->chip_class == CAYMAN) { 8198 for (i = 0; i < 3; i++) { 8199 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8200 8201 alu.op = ALU_OP1_LOG_IEEE; 8202 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8203 r600_bytecode_src_set_abs(&alu.src[0]); 8204 8205 alu.dst.sel = ctx->temp_reg; 8206 if (i == 2) 8207 alu.dst.write = 1; 8208 alu.dst.chan = i; 8209 if (i == 2) 8210 alu.last = 1; 8211 8212 r = r600_bytecode_add_alu(ctx->bc, &alu); 8213 if (r) 8214 return r; 8215 } 8216 } else { 8217 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8218 8219 alu.op = ALU_OP1_LOG_IEEE; 8220 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8221 r600_bytecode_src_set_abs(&alu.src[0]); 8222 8223 alu.dst.sel = ctx->temp_reg; 8224 alu.dst.write = 1; 8225 alu.dst.chan = 2; 8226 alu.last = 1; 8227 8228 r = r600_bytecode_add_alu(ctx->bc, &alu); 8229 if (r) 8230 return r; 8231 } 8232 } 8233 8234 /* result.w = 1.0; */ 8235 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 8236 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8237 8238 alu.op = ALU_OP1_MOV; 8239 alu.src[0].sel = V_SQ_ALU_SRC_1; 8240 alu.src[0].chan = 0; 8241 8242 alu.dst.sel = ctx->temp_reg; 8243 alu.dst.chan = 3; 8244 alu.dst.write = 1; 8245 alu.last = 1; 8246 8247 r = r600_bytecode_add_alu(ctx->bc, &alu); 8248 if (r) 8249 return r; 8250 } 8251 8252 return tgsi_helper_copy(ctx, inst); 8253} 8254 8255static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 8256{ 8257 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8258 struct r600_bytecode_alu alu; 8259 int r; 8260 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8261 unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index); 8262 8263 assert(inst->Dst[0].Register.Index < 3); 8264 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8265 8266 switch (inst->Instruction.Opcode) { 8267 case TGSI_OPCODE_ARL: 8268 alu.op = ALU_OP1_FLT_TO_INT_FLOOR; 8269 break; 8270 case TGSI_OPCODE_ARR: 8271 alu.op = ALU_OP1_FLT_TO_INT; 8272 break; 8273 case TGSI_OPCODE_UARL: 8274 alu.op = ALU_OP1_MOV; 8275 break; 8276 default: 8277 assert(0); 8278 return -1; 8279 } 8280 8281 for (i = 0; i <= lasti; ++i) { 8282 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8283 continue; 8284 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8285 alu.last = i == lasti; 8286 alu.dst.sel = reg; 8287 alu.dst.chan = i; 8288 alu.dst.write = 1; 8289 r = r600_bytecode_add_alu(ctx->bc, &alu); 8290 if (r) 8291 return r; 8292 } 8293 8294 if (inst->Dst[0].Register.Index > 0) 8295 ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0; 8296 else 8297 ctx->bc->ar_loaded = 0; 8298 8299 return 0; 8300} 8301static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 8302{ 8303 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8304 struct r600_bytecode_alu alu; 8305 int r; 8306 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8307 8308 switch (inst->Instruction.Opcode) { 8309 case TGSI_OPCODE_ARL: 8310 memset(&alu, 0, sizeof(alu)); 8311 alu.op = ALU_OP1_FLOOR; 8312 alu.dst.sel = ctx->bc->ar_reg; 8313 alu.dst.write = 1; 8314 for (i = 0; i <= lasti; ++i) { 8315 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8316 alu.dst.chan = i; 8317 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8318 alu.last = i == lasti; 8319 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8320 return r; 8321 } 8322 } 8323 8324 memset(&alu, 0, sizeof(alu)); 8325 alu.op = ALU_OP1_FLT_TO_INT; 8326 alu.src[0].sel = ctx->bc->ar_reg; 8327 alu.dst.sel = ctx->bc->ar_reg; 8328 alu.dst.write = 1; 8329 /* FLT_TO_INT is trans-only on r600/r700 */ 8330 alu.last = TRUE; 8331 for (i = 0; i <= lasti; ++i) { 8332 alu.dst.chan = i; 8333 alu.src[0].chan = i; 8334 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8335 return r; 8336 } 8337 break; 8338 case TGSI_OPCODE_ARR: 8339 memset(&alu, 0, sizeof(alu)); 8340 alu.op = ALU_OP1_FLT_TO_INT; 8341 alu.dst.sel = ctx->bc->ar_reg; 8342 alu.dst.write = 1; 8343 /* FLT_TO_INT is trans-only on r600/r700 */ 8344 alu.last = TRUE; 8345 for (i = 0; i <= lasti; ++i) { 8346 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8347 alu.dst.chan = i; 8348 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8349 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8350 return r; 8351 } 8352 } 8353 break; 8354 case TGSI_OPCODE_UARL: 8355 memset(&alu, 0, sizeof(alu)); 8356 alu.op = ALU_OP1_MOV; 8357 alu.dst.sel = ctx->bc->ar_reg; 8358 alu.dst.write = 1; 8359 for (i = 0; i <= lasti; ++i) { 8360 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8361 alu.dst.chan = i; 8362 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8363 alu.last = i == lasti; 8364 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8365 return r; 8366 } 8367 } 8368 break; 8369 default: 8370 assert(0); 8371 return -1; 8372 } 8373 8374 ctx->bc->ar_loaded = 0; 8375 return 0; 8376} 8377 8378static int tgsi_opdst(struct r600_shader_ctx *ctx) 8379{ 8380 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8381 struct r600_bytecode_alu alu; 8382 int i, r = 0; 8383 8384 for (i = 0; i < 4; i++) { 8385 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8386 8387 alu.op = ALU_OP2_MUL; 8388 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 8389 8390 if (i == 0 || i == 3) { 8391 alu.src[0].sel = V_SQ_ALU_SRC_1; 8392 } else { 8393 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8394 } 8395 8396 if (i == 0 || i == 2) { 8397 alu.src[1].sel = V_SQ_ALU_SRC_1; 8398 } else { 8399 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 8400 } 8401 if (i == 3) 8402 alu.last = 1; 8403 r = r600_bytecode_add_alu(ctx->bc, &alu); 8404 if (r) 8405 return r; 8406 } 8407 return 0; 8408} 8409 8410static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type) 8411{ 8412 struct r600_bytecode_alu alu; 8413 int r; 8414 8415 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8416 alu.op = opcode; 8417 alu.execute_mask = 1; 8418 alu.update_pred = 1; 8419 8420 alu.dst.sel = ctx->temp_reg; 8421 alu.dst.write = 1; 8422 alu.dst.chan = 0; 8423 8424 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8425 alu.src[1].sel = V_SQ_ALU_SRC_0; 8426 alu.src[1].chan = 0; 8427 8428 alu.last = 1; 8429 8430 r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); 8431 if (r) 8432 return r; 8433 return 0; 8434} 8435 8436static int pops(struct r600_shader_ctx *ctx, int pops) 8437{ 8438 unsigned force_pop = ctx->bc->force_add_cf; 8439 8440 if (!force_pop) { 8441 int alu_pop = 3; 8442 if (ctx->bc->cf_last) { 8443 if (ctx->bc->cf_last->op == CF_OP_ALU) 8444 alu_pop = 0; 8445 else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER) 8446 alu_pop = 1; 8447 } 8448 alu_pop += pops; 8449 if (alu_pop == 1) { 8450 ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER; 8451 ctx->bc->force_add_cf = 1; 8452 } else if (alu_pop == 2) { 8453 ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER; 8454 ctx->bc->force_add_cf = 1; 8455 } else { 8456 force_pop = 1; 8457 } 8458 } 8459 8460 if (force_pop) { 8461 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 8462 ctx->bc->cf_last->pop_count = pops; 8463 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 8464 } 8465 8466 return 0; 8467} 8468 8469static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, 8470 unsigned reason) 8471{ 8472 struct r600_stack_info *stack = &ctx->bc->stack; 8473 unsigned elements, entries; 8474 8475 unsigned entry_size = stack->entry_size; 8476 8477 elements = (stack->loop + stack->push_wqm ) * entry_size; 8478 elements += stack->push; 8479 8480 switch (ctx->bc->chip_class) { 8481 case R600: 8482 case R700: 8483 /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on 8484 * the stack must be reserved to hold the current active/continue 8485 * masks */ 8486 if (reason == FC_PUSH_VPM) { 8487 elements += 2; 8488 } 8489 break; 8490 8491 case CAYMAN: 8492 /* r9xx: any stack operation on empty stack consumes 2 additional 8493 * elements */ 8494 elements += 2; 8495 8496 /* fallthrough */ 8497 /* FIXME: do the two elements added above cover the cases for the 8498 * r8xx+ below? */ 8499 8500 case EVERGREEN: 8501 /* r8xx+: 2 extra elements are not always required, but one extra 8502 * element must be added for each of the following cases: 8503 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest 8504 * stack usage. 8505 * (Currently we don't use ALU_ELSE_AFTER.) 8506 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM 8507 * PUSH instruction executed. 8508 * 8509 * NOTE: it seems we also need to reserve additional element in some 8510 * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, 8511 * then STACK_SIZE should be 2 instead of 1 */ 8512 if (reason == FC_PUSH_VPM) { 8513 elements += 1; 8514 } 8515 break; 8516 8517 default: 8518 assert(0); 8519 break; 8520 } 8521 8522 /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4 8523 * for all chips, so we use 4 in the final formula, not the real entry_size 8524 * for the chip */ 8525 entry_size = 4; 8526 8527 entries = (elements + (entry_size - 1)) / entry_size; 8528 8529 if (entries > stack->max_entries) 8530 stack->max_entries = entries; 8531} 8532 8533static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) 8534{ 8535 switch(reason) { 8536 case FC_PUSH_VPM: 8537 --ctx->bc->stack.push; 8538 assert(ctx->bc->stack.push >= 0); 8539 break; 8540 case FC_PUSH_WQM: 8541 --ctx->bc->stack.push_wqm; 8542 assert(ctx->bc->stack.push_wqm >= 0); 8543 break; 8544 case FC_LOOP: 8545 --ctx->bc->stack.loop; 8546 assert(ctx->bc->stack.loop >= 0); 8547 break; 8548 default: 8549 assert(0); 8550 break; 8551 } 8552} 8553 8554static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) 8555{ 8556 switch (reason) { 8557 case FC_PUSH_VPM: 8558 ++ctx->bc->stack.push; 8559 break; 8560 case FC_PUSH_WQM: 8561 ++ctx->bc->stack.push_wqm; 8562 case FC_LOOP: 8563 ++ctx->bc->stack.loop; 8564 break; 8565 default: 8566 assert(0); 8567 } 8568 8569 callstack_update_max_depth(ctx, reason); 8570} 8571 8572static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 8573{ 8574 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 8575 8576 sp->mid = realloc((void *)sp->mid, 8577 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 8578 sp->mid[sp->num_mid] = ctx->bc->cf_last; 8579 sp->num_mid++; 8580} 8581 8582static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 8583{ 8584 ctx->bc->fc_sp++; 8585 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 8586 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 8587} 8588 8589static void fc_poplevel(struct r600_shader_ctx *ctx) 8590{ 8591 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 8592 free(sp->mid); 8593 sp->mid = NULL; 8594 sp->num_mid = 0; 8595 sp->start = NULL; 8596 sp->type = 0; 8597 ctx->bc->fc_sp--; 8598} 8599 8600#if 0 8601static int emit_return(struct r600_shader_ctx *ctx) 8602{ 8603 r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN)); 8604 return 0; 8605} 8606 8607static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 8608{ 8609 8610 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP)); 8611 ctx->bc->cf_last->pop_count = pops; 8612 /* XXX work out offset */ 8613 return 0; 8614} 8615 8616static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 8617{ 8618 return 0; 8619} 8620 8621static void emit_testflag(struct r600_shader_ctx *ctx) 8622{ 8623 8624} 8625 8626static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 8627{ 8628 emit_testflag(ctx); 8629 emit_jump_to_offset(ctx, 1, 4); 8630 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 8631 pops(ctx, ifidx + 1); 8632 emit_return(ctx); 8633} 8634 8635static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 8636{ 8637 emit_testflag(ctx); 8638 8639 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8640 ctx->bc->cf_last->pop_count = 1; 8641 8642 fc_set_mid(ctx, fc_sp); 8643 8644 pops(ctx, 1); 8645} 8646#endif 8647 8648static int emit_if(struct r600_shader_ctx *ctx, int opcode) 8649{ 8650 int alu_type = CF_OP_ALU_PUSH_BEFORE; 8651 8652 /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by 8653 * LOOP_STARTxxx for nested loops may put the branch stack into a state 8654 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this 8655 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ 8656 if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) { 8657 r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); 8658 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 8659 alu_type = CF_OP_ALU; 8660 } 8661 8662 emit_logic_pred(ctx, opcode, alu_type); 8663 8664 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 8665 8666 fc_pushlevel(ctx, FC_IF); 8667 8668 callstack_push(ctx, FC_PUSH_VPM); 8669 return 0; 8670} 8671 8672static int tgsi_if(struct r600_shader_ctx *ctx) 8673{ 8674 return emit_if(ctx, ALU_OP2_PRED_SETNE); 8675} 8676 8677static int tgsi_uif(struct r600_shader_ctx *ctx) 8678{ 8679 return emit_if(ctx, ALU_OP2_PRED_SETNE_INT); 8680} 8681 8682static int tgsi_else(struct r600_shader_ctx *ctx) 8683{ 8684 r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); 8685 ctx->bc->cf_last->pop_count = 1; 8686 8687 fc_set_mid(ctx, ctx->bc->fc_sp); 8688 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 8689 return 0; 8690} 8691 8692static int tgsi_endif(struct r600_shader_ctx *ctx) 8693{ 8694 pops(ctx, 1); 8695 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 8696 R600_ERR("if/endif unbalanced in shader\n"); 8697 return -1; 8698 } 8699 8700 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 8701 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 8702 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 8703 } else { 8704 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 8705 } 8706 fc_poplevel(ctx); 8707 8708 callstack_pop(ctx, FC_PUSH_VPM); 8709 return 0; 8710} 8711 8712static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 8713{ 8714 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not 8715 * limited to 4096 iterations, like the other LOOP_* instructions. */ 8716 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10); 8717 8718 fc_pushlevel(ctx, FC_LOOP); 8719 8720 /* check stack depth */ 8721 callstack_push(ctx, FC_LOOP); 8722 return 0; 8723} 8724 8725static int tgsi_endloop(struct r600_shader_ctx *ctx) 8726{ 8727 int i; 8728 8729 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); 8730 8731 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 8732 R600_ERR("loop/endloop in shader code are not paired.\n"); 8733 return -EINVAL; 8734 } 8735 8736 /* fixup loop pointers - from r600isa 8737 LOOP END points to CF after LOOP START, 8738 LOOP START point to CF after LOOP END 8739 BRK/CONT point to LOOP END CF 8740 */ 8741 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 8742 8743 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 8744 8745 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 8746 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 8747 } 8748 /* XXX add LOOPRET support */ 8749 fc_poplevel(ctx); 8750 callstack_pop(ctx, FC_LOOP); 8751 return 0; 8752} 8753 8754static int tgsi_loop_breakc(struct r600_shader_ctx *ctx) 8755{ 8756 int r; 8757 unsigned int fscp; 8758 8759 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 8760 { 8761 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 8762 break; 8763 } 8764 if (fscp == 0) { 8765 R600_ERR("BREAKC not inside loop/endloop pair\n"); 8766 return -EINVAL; 8767 } 8768 8769 if (ctx->bc->chip_class == EVERGREEN && 8770 ctx->bc->family != CHIP_CYPRESS && 8771 ctx->bc->family != CHIP_JUNIPER) { 8772 /* HW bug: ALU_BREAK does not save the active mask correctly */ 8773 r = tgsi_uif(ctx); 8774 if (r) 8775 return r; 8776 8777 r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK); 8778 if (r) 8779 return r; 8780 fc_set_mid(ctx, fscp); 8781 8782 return tgsi_endif(ctx); 8783 } else { 8784 r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK); 8785 if (r) 8786 return r; 8787 fc_set_mid(ctx, fscp); 8788 } 8789 8790 return 0; 8791} 8792 8793static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 8794{ 8795 unsigned int fscp; 8796 8797 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 8798 { 8799 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 8800 break; 8801 } 8802 8803 if (fscp == 0) { 8804 R600_ERR("Break not inside loop/endloop pair\n"); 8805 return -EINVAL; 8806 } 8807 8808 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8809 8810 fc_set_mid(ctx, fscp); 8811 8812 return 0; 8813} 8814 8815static int tgsi_gs_emit(struct r600_shader_ctx *ctx) 8816{ 8817 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8818 int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX]; 8819 int r; 8820 8821 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 8822 emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE); 8823 8824 r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8825 if (!r) { 8826 ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream 8827 return emit_inc_ring_offset(ctx, stream, TRUE); 8828 } 8829 return r; 8830} 8831 8832static int tgsi_umad(struct r600_shader_ctx *ctx) 8833{ 8834 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8835 struct r600_bytecode_alu alu; 8836 int i, j, k, r; 8837 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8838 8839 /* src0 * src1 */ 8840 for (i = 0; i < lasti + 1; i++) { 8841 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8842 continue; 8843 8844 if (ctx->bc->chip_class == CAYMAN) { 8845 for (j = 0 ; j < 4; j++) { 8846 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8847 8848 alu.op = ALU_OP2_MULLO_UINT; 8849 for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { 8850 r600_bytecode_src(&alu.src[k], &ctx->src[k], i); 8851 } 8852 alu.dst.chan = j; 8853 alu.dst.sel = ctx->temp_reg; 8854 alu.dst.write = (j == i); 8855 if (j == 3) 8856 alu.last = 1; 8857 r = r600_bytecode_add_alu(ctx->bc, &alu); 8858 if (r) 8859 return r; 8860 } 8861 } else { 8862 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8863 8864 alu.dst.chan = i; 8865 alu.dst.sel = ctx->temp_reg; 8866 alu.dst.write = 1; 8867 8868 alu.op = ALU_OP2_MULLO_UINT; 8869 for (j = 0; j < 2; j++) { 8870 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 8871 } 8872 8873 alu.last = 1; 8874 r = r600_bytecode_add_alu(ctx->bc, &alu); 8875 if (r) 8876 return r; 8877 } 8878 } 8879 8880 8881 for (i = 0; i < lasti + 1; i++) { 8882 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8883 continue; 8884 8885 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8886 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 8887 8888 alu.op = ALU_OP2_ADD_INT; 8889 8890 alu.src[0].sel = ctx->temp_reg; 8891 alu.src[0].chan = i; 8892 8893 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 8894 if (i == lasti) { 8895 alu.last = 1; 8896 } 8897 r = r600_bytecode_add_alu(ctx->bc, &alu); 8898 if (r) 8899 return r; 8900 } 8901 return 0; 8902} 8903 8904static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 8905 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, 8906 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 8907 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 8908 8909 /* XXX: 8910 * For state trackers other than OpenGL, we'll want to use 8911 * _RECIP_IEEE instead. 8912 */ 8913 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 8914 8915 [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, 8916 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 8917 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 8918 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 8919 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 8920 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 8921 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 8922 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 8923 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 8924 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 8925 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 8926 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 8927 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 8928 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 8929 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 8930 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 8931 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 8932 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 8933 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 8934 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 8935 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 8936 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 8937 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 8938 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 8939 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 8940 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 8941 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 8942 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 8943 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 8944 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 8945 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 8946 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 8947 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 8948 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8949 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8950 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 8951 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8952 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8953 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8954 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8955 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 8956 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 8957 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 8958 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 8959 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 8960 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 8961 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 8962 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 8963 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 8964 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 8965 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 8966 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8967 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8968 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8969 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8970 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 8971 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 8972 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_r600_arl}, 8973 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 8974 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 8975 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 8976 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 8977 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 8978 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 8979 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8980 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 8981 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 8982 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 8983 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8984 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 8985 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 8986 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 8987 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 8988 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 8989 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 8990 [TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 8991 [TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 8992 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 8993 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 8994 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 8995 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 8996 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 8997 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 8998 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2_trans}, 8999 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9000 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9001 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9002 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9003 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9004 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9005 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9006 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9007 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9008 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9009 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9010 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9011 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9012 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9013 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9014 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9015 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9016 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 9017 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9018 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9019 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9020 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9021 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9022 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9023 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 9024 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9025 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9026 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc}, 9027 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9028 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9029 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 9030 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, 9031 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9032 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9033 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9034 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9035 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9036 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2_trans}, 9037 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9038 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, 9039 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 9040 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9041 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9042 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9043 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9044 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9045 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9046 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 9047 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9048 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9049 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2_trans}, 9050 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9051 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2_swap}, 9052 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9053 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9054 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9055 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9056 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9057 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9058 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9059 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9060 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9061 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9062 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9063 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9064 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9065 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9066 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9067 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9068 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_r600_arl}, 9069 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9070 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9071 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9072 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9073 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9074 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9075 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9076 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9077 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 9078 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9079 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9080 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9081 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9082 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9083 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9084 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9085 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9086 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9087 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9088 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9089 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9090 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9091 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 9092 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 9093 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_unsupported}, 9094 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_unsupported}, 9095 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_unsupported}, 9096 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_unsupported}, 9097 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_unsupported}, 9098 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_unsupported}, 9099 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_unsupported}, 9100 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_unsupported}, 9101 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_unsupported}, 9102 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_unsupported}, 9103 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_unsupported}, 9104 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_unsupported}, 9105 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_unsupported}, 9106 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9107}; 9108 9109static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 9110 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 9111 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 9112 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 9113 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 9114 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, 9115 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 9116 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 9117 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 9118 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 9119 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 9120 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 9121 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 9122 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 9123 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 9124 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 9125 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 9126 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 9127 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 9128 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 9129 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 9130 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 9131 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 9132 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 9133 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 9134 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 9135 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 9136 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 9137 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 9138 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 9139 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 9140 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 9141 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 9142 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 9143 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 9144 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 9145 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 9146 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 9147 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9148 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9149 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 9150 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9151 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9152 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9153 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9154 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 9155 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 9156 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 9157 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 9158 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 9159 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 9160 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 9161 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 9162 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 9163 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 9164 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 9165 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9166 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9167 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9168 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9169 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 9170 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 9171 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 9172 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 9173 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 9174 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 9175 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 9176 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 9177 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 9178 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9179 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 9180 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 9181 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 9182 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9183 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 9184 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 9185 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 9186 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 9187 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 9188 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 9189 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9190 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9191 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 9192 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 9193 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 9194 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 9195 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 9196 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 9197 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 9198 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9199 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9200 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9201 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9202 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9203 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9204 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9205 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9206 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9207 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9208 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9209 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9210 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9211 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9212 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9213 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9214 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9215 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 9216 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9217 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9218 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9219 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9220 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9221 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9222 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 9223 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9224 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9225 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 9226 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9227 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9228 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 9229 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i}, 9230 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9231 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9232 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9233 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9234 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9235 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 9236 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9237 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_f2i}, 9238 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 9239 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9240 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9241 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9242 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9243 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9244 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9245 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 9246 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9247 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9248 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 9249 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9250 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 9251 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9252 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9253 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9254 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9255 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9256 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9257 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9258 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9259 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9260 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9261 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9262 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9263 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9264 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9265 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9266 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9267 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 9268 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9269 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9270 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9271 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9272 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9273 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9274 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9275 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9276 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 9277 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9278 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9279 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9280 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9281 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9282 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9283 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9284 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9285 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9286 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9287 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9288 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9289 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9290 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 9291 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 9292 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 9293 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 9294 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 9295 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 9296 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 9297 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 9298 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 9299 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 9300 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 9301 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 9302 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9303 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9304 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9305 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 9306 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 9307 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 9308 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 9309 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 9310 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 9311 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 9312 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 9313 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 9314 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 9315 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 9316 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 9317 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 9318 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 9319 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 9320 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 9321 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 9322 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 9323 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 9324 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 9325 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 9326 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 9327 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 9328 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9329}; 9330 9331static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 9332 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 9333 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 9334 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 9335 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, 9336 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, 9337 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 9338 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 9339 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 9340 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 9341 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 9342 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 9343 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 9344 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 9345 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 9346 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 9347 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 9348 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 9349 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 9350 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 9351 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 9352 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, 9353 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 9354 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 9355 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 9356 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 9357 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 9358 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 9359 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 9360 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, 9361 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, 9362 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, 9363 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 9364 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 9365 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 9366 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 9367 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 9368 [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig}, 9369 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9370 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9371 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 9372 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9373 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9374 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9375 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9376 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 9377 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 9378 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 9379 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 9380 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, cayman_trig}, 9381 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 9382 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 9383 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 9384 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 9385 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 9386 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 9387 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9388 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9389 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9390 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9391 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 9392 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 9393 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 9394 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 9395 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 9396 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 9397 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 9398 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 9399 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 9400 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9401 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 9402 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 9403 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 9404 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9405 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 9406 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 9407 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 9408 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 9409 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 9410 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 9411 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9412 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9413 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 9414 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 9415 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 9416 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2}, 9417 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 9418 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 9419 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 9420 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9421 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9422 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9423 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9424 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9425 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9426 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9427 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9428 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9429 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9430 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9431 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9432 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9433 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9434 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9435 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9436 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9437 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 9438 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9439 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9440 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9441 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9442 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9443 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9444 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 9445 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9446 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9447 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 9448 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9449 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9450 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 9451 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2}, 9452 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9453 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9454 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9455 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9456 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9457 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 9458 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9459 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2}, 9460 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2}, 9461 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9462 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9463 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9464 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9465 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9466 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9467 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_INT, cayman_mul_int_instr}, 9468 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9469 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9470 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 9471 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9472 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 9473 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9474 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9475 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9476 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9477 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9478 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9479 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9480 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9481 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9482 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9483 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9484 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9485 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9486 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9487 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9488 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9489 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 9490 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9491 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9492 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9493 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9494 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9495 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9496 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9497 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9498 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 9499 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9500 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9501 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9502 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9503 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9504 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9505 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9506 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9507 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9508 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9509 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9510 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9511 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9512 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, cayman_mul_int_instr}, 9513 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, cayman_mul_int_instr}, 9514 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 9515 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 9516 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 9517 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 9518 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 9519 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 9520 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 9521 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 9522 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 9523 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 9524 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9525 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9526 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9527 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 9528 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 9529 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 9530 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 9531 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 9532 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 9533 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 9534 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 9535 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 9536 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 9537 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 9538 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 9539 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 9540 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 9541 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 9542 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 9543 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 9544 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 9545 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 9546 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 9547 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 9548 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 9549 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 9550 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9551}; 9552