r600_shader.c revision cce3864046be104933fd4f1bb7a4b36092ff4925
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "r600_sq.h" 24#include "r600_llvm.h" 25#include "r600_formats.h" 26#include "r600_opcodes.h" 27#include "r600_shader.h" 28#include "r600d.h" 29 30#include "sb/sb_public.h" 31 32#include "pipe/p_shader_tokens.h" 33#include "tgsi/tgsi_info.h" 34#include "tgsi/tgsi_parse.h" 35#include "tgsi/tgsi_scan.h" 36#include "tgsi/tgsi_dump.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39#include <stdio.h> 40#include <errno.h> 41 42/* CAYMAN notes 43Why CAYMAN got loops for lots of instructions is explained here. 44 45-These 8xx t-slot only ops are implemented in all vector slots. 46MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 47These 8xx t-slot only opcodes become vector ops, with all four 48slots expecting the arguments on sources a and b. Result is 49broadcast to all channels. 50MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64 51These 8xx t-slot only opcodes become vector ops in the z, y, and 52x slots. 53EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 54RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 55SQRT_IEEE/_64 56SIN/COS 57The w slot may have an independent co-issued operation, or if the 58result is required to be in the w slot, the opcode above may be 59issued in the w slot as well. 60The compiler must issue the source argument to slots z, y, and x 61*/ 62 63/* Contents of r0 on entry to various shaders 64 65 VS - .x = VertexID 66 .y = RelVertexID (??) 67 .w = InstanceID 68 69 GS - r0.xyw, r1.xyz = per-vertex offsets 70 r0.z = PrimitiveID 71 72 TCS - .x = PatchID 73 .y = RelPatchID (??) 74 .z = InvocationID 75 .w = tess factor base. 76 77 TES - .x = TessCoord.x 78 - .y = TessCoord.y 79 - .z = RelPatchID (??) 80 - .w = PrimitiveID 81 82 PS - face_gpr.z = SampleMask 83 face_gpr.w = SampleID 84*/ 85#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) 86static int r600_shader_from_tgsi(struct r600_context *rctx, 87 struct r600_pipe_shader *pipeshader, 88 union r600_shader_key key); 89 90static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, 91 int size, unsigned comp_mask) { 92 93 if (!size) 94 return; 95 96 if (ps->num_arrays == ps->max_arrays) { 97 ps->max_arrays += 64; 98 ps->arrays = realloc(ps->arrays, ps->max_arrays * 99 sizeof(struct r600_shader_array)); 100 } 101 102 int n = ps->num_arrays; 103 ++ps->num_arrays; 104 105 ps->arrays[n].comp_mask = comp_mask; 106 ps->arrays[n].gpr_start = start_gpr; 107 ps->arrays[n].gpr_count = size; 108} 109 110static void r600_dump_streamout(struct pipe_stream_output_info *so) 111{ 112 unsigned i; 113 114 fprintf(stderr, "STREAMOUT\n"); 115 for (i = 0; i < so->num_outputs; i++) { 116 unsigned mask = ((1 << so->output[i].num_components) - 1) << 117 so->output[i].start_component; 118 fprintf(stderr, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", 119 i, 120 so->output[i].stream, 121 so->output[i].output_buffer, 122 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, 123 so->output[i].register_index, 124 mask & 1 ? "x" : "", 125 mask & 2 ? "y" : "", 126 mask & 4 ? "z" : "", 127 mask & 8 ? "w" : "", 128 so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); 129 } 130} 131 132static int store_shader(struct pipe_context *ctx, 133 struct r600_pipe_shader *shader) 134{ 135 struct r600_context *rctx = (struct r600_context *)ctx; 136 uint32_t *ptr, i; 137 138 if (shader->bo == NULL) { 139 shader->bo = (struct r600_resource*) 140 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); 141 if (shader->bo == NULL) { 142 return -ENOMEM; 143 } 144 ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); 145 if (R600_BIG_ENDIAN) { 146 for (i = 0; i < shader->shader.bc.ndw; ++i) { 147 ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); 148 } 149 } else { 150 memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); 151 } 152 rctx->b.ws->buffer_unmap(shader->bo->cs_buf); 153 } 154 155 return 0; 156} 157 158int r600_pipe_shader_create(struct pipe_context *ctx, 159 struct r600_pipe_shader *shader, 160 union r600_shader_key key) 161{ 162 struct r600_context *rctx = (struct r600_context *)ctx; 163 struct r600_pipe_shader_selector *sel = shader->selector; 164 int r; 165 bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens); 166 unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); 167 unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 168 unsigned export_shader; 169 170 shader->shader.bc.isa = rctx->isa; 171 172 if (dump) { 173 fprintf(stderr, "--------------------------------------------------------------\n"); 174 tgsi_dump(sel->tokens, 0); 175 176 if (sel->so.num_outputs) { 177 r600_dump_streamout(&sel->so); 178 } 179 } 180 r = r600_shader_from_tgsi(rctx, shader, key); 181 if (r) { 182 R600_ERR("translation from TGSI failed !\n"); 183 goto error; 184 } 185 if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) { 186 /* only disable for vertex shaders in tess paths */ 187 if (key.vs.as_ls) 188 use_sb = 0; 189 } 190 use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_TESS_CTRL); 191 use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_TESS_EVAL); 192 193 /* disable SB for shaders using doubles */ 194 use_sb &= !shader->shader.uses_doubles; 195 196 /* Check if the bytecode has already been built. When using the llvm 197 * backend, r600_shader_from_tgsi() will take care of building the 198 * bytecode. 199 */ 200 if (!shader->shader.bc.bytecode) { 201 r = r600_bytecode_build(&shader->shader.bc); 202 if (r) { 203 R600_ERR("building bytecode failed !\n"); 204 goto error; 205 } 206 } 207 208 if (dump && !sb_disasm) { 209 fprintf(stderr, "--------------------------------------------------------------\n"); 210 r600_bytecode_disasm(&shader->shader.bc); 211 fprintf(stderr, "______________________________________________________________\n"); 212 } else if ((dump && sb_disasm) || use_sb) { 213 r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, 214 dump, use_sb); 215 if (r) { 216 R600_ERR("r600_sb_bytecode_process failed !\n"); 217 goto error; 218 } 219 } 220 221 if (shader->gs_copy_shader) { 222 if (dump) { 223 // dump copy shader 224 r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, 225 &shader->gs_copy_shader->shader, dump, 0); 226 if (r) 227 goto error; 228 } 229 230 if ((r = store_shader(ctx, shader->gs_copy_shader))) 231 goto error; 232 } 233 234 /* Store the shader in a buffer. */ 235 if ((r = store_shader(ctx, shader))) 236 goto error; 237 238 /* Build state. */ 239 switch (shader->shader.processor_type) { 240 case TGSI_PROCESSOR_TESS_CTRL: 241 evergreen_update_hs_state(ctx, shader); 242 break; 243 case TGSI_PROCESSOR_TESS_EVAL: 244 if (key.tes.as_es) 245 evergreen_update_es_state(ctx, shader); 246 else 247 evergreen_update_vs_state(ctx, shader); 248 break; 249 case TGSI_PROCESSOR_GEOMETRY: 250 if (rctx->b.chip_class >= EVERGREEN) { 251 evergreen_update_gs_state(ctx, shader); 252 evergreen_update_vs_state(ctx, shader->gs_copy_shader); 253 } else { 254 r600_update_gs_state(ctx, shader); 255 r600_update_vs_state(ctx, shader->gs_copy_shader); 256 } 257 break; 258 case TGSI_PROCESSOR_VERTEX: 259 export_shader = key.vs.as_es; 260 if (rctx->b.chip_class >= EVERGREEN) { 261 if (key.vs.as_ls) 262 evergreen_update_ls_state(ctx, shader); 263 else if (key.vs.as_es) 264 evergreen_update_es_state(ctx, shader); 265 else 266 evergreen_update_vs_state(ctx, shader); 267 } else { 268 if (export_shader) 269 r600_update_es_state(ctx, shader); 270 else 271 r600_update_vs_state(ctx, shader); 272 } 273 break; 274 case TGSI_PROCESSOR_FRAGMENT: 275 if (rctx->b.chip_class >= EVERGREEN) { 276 evergreen_update_ps_state(ctx, shader); 277 } else { 278 r600_update_ps_state(ctx, shader); 279 } 280 break; 281 default: 282 r = -EINVAL; 283 goto error; 284 } 285 return 0; 286 287error: 288 r600_pipe_shader_destroy(ctx, shader); 289 return r; 290} 291 292void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 293{ 294 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 295 r600_bytecode_clear(&shader->shader.bc); 296 r600_release_command_buffer(&shader->command_buffer); 297} 298 299/* 300 * tgsi -> r600 shader 301 */ 302struct r600_shader_tgsi_instruction; 303 304struct r600_shader_src { 305 unsigned sel; 306 unsigned swizzle[4]; 307 unsigned neg; 308 unsigned abs; 309 unsigned rel; 310 unsigned kc_bank; 311 boolean kc_rel; /* true if cache bank is indexed */ 312 uint32_t value[4]; 313}; 314 315struct eg_interp { 316 boolean enabled; 317 unsigned ij_index; 318}; 319 320struct r600_shader_ctx { 321 struct tgsi_shader_info info; 322 struct tgsi_parse_context parse; 323 const struct tgsi_token *tokens; 324 unsigned type; 325 unsigned file_offset[TGSI_FILE_COUNT]; 326 unsigned temp_reg; 327 const struct r600_shader_tgsi_instruction *inst_info; 328 struct r600_bytecode *bc; 329 struct r600_shader *shader; 330 struct r600_shader_src src[4]; 331 uint32_t *literals; 332 uint32_t nliterals; 333 uint32_t max_driver_temp_used; 334 boolean use_llvm; 335 /* needed for evergreen interpolation */ 336 struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid 337 /* evergreen/cayman also store sample mask in face register */ 338 int face_gpr; 339 /* sample id is .w component stored in fixed point position register */ 340 int fixed_pt_position_gpr; 341 int colors_used; 342 boolean clip_vertex_write; 343 unsigned cv_output; 344 unsigned edgeflag_output; 345 int fragcoord_input; 346 int native_integers; 347 int next_ring_offset; 348 int gs_out_ring_offset; 349 int gs_next_vertex; 350 struct r600_shader *gs_for_vs; 351 int gs_export_gpr_tregs[4]; 352 const struct pipe_stream_output_info *gs_stream_output_info; 353 unsigned enabled_stream_buffers_mask; 354 unsigned tess_input_info; /* temp with tess input offsets */ 355 unsigned tess_output_info; /* temp with tess input offsets */ 356}; 357 358struct r600_shader_tgsi_instruction { 359 unsigned op; 360 int (*process)(struct r600_shader_ctx *ctx); 361}; 362 363static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind); 364static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 365static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 366static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); 367static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 368static int tgsi_else(struct r600_shader_ctx *ctx); 369static int tgsi_endif(struct r600_shader_ctx *ctx); 370static int tgsi_bgnloop(struct r600_shader_ctx *ctx); 371static int tgsi_endloop(struct r600_shader_ctx *ctx); 372static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 373static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 374 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 375 unsigned int dst_reg); 376static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 377 const struct r600_shader_src *shader_src, 378 unsigned chan); 379static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 380 unsigned dst_reg); 381 382static int tgsi_last_instruction(unsigned writemask) 383{ 384 int i, lasti = 0; 385 386 for (i = 0; i < 4; i++) { 387 if (writemask & (1 << i)) { 388 lasti = i; 389 } 390 } 391 return lasti; 392} 393 394static int tgsi_is_supported(struct r600_shader_ctx *ctx) 395{ 396 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 397 int j; 398 399 if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) { 400 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 401 return -EINVAL; 402 } 403 if (i->Instruction.Predicate) { 404 R600_ERR("predicate unsupported\n"); 405 return -EINVAL; 406 } 407#if 0 408 if (i->Instruction.Label) { 409 R600_ERR("label unsupported\n"); 410 return -EINVAL; 411 } 412#endif 413 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 414 if (i->Src[j].Register.Dimension) { 415 switch (i->Src[j].Register.File) { 416 case TGSI_FILE_CONSTANT: 417 break; 418 case TGSI_FILE_INPUT: 419 if (ctx->type == TGSI_PROCESSOR_GEOMETRY || 420 ctx->type == TGSI_PROCESSOR_TESS_CTRL || 421 ctx->type == TGSI_PROCESSOR_TESS_EVAL) 422 break; 423 case TGSI_FILE_OUTPUT: 424 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) 425 break; 426 default: 427 R600_ERR("unsupported src %d (file %d, dimension %d)\n", j, 428 i->Src[j].Register.File, 429 i->Src[j].Register.Dimension); 430 return -EINVAL; 431 } 432 } 433 } 434 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 435 if (i->Dst[j].Register.Dimension) { 436 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) 437 continue; 438 R600_ERR("unsupported dst (dimension)\n"); 439 return -EINVAL; 440 } 441 } 442 return 0; 443} 444 445int eg_get_interpolator_index(unsigned interpolate, unsigned location) 446{ 447 if (interpolate == TGSI_INTERPOLATE_COLOR || 448 interpolate == TGSI_INTERPOLATE_LINEAR || 449 interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 450 { 451 int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR; 452 int loc; 453 454 switch(location) { 455 case TGSI_INTERPOLATE_LOC_CENTER: 456 loc = 1; 457 break; 458 case TGSI_INTERPOLATE_LOC_CENTROID: 459 loc = 2; 460 break; 461 case TGSI_INTERPOLATE_LOC_SAMPLE: 462 default: 463 loc = 0; break; 464 } 465 466 return is_linear * 3 + loc; 467 } 468 469 return -1; 470} 471 472static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, 473 int input) 474{ 475 int i = eg_get_interpolator_index( 476 ctx->shader->input[input].interpolate, 477 ctx->shader->input[input].interpolate_location); 478 assert(i >= 0); 479 ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index; 480} 481 482static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 483{ 484 int i, r; 485 struct r600_bytecode_alu alu; 486 int gpr = 0, base_chan = 0; 487 int ij_index = ctx->shader->input[input].ij_index; 488 489 /* work out gpr and base_chan from index */ 490 gpr = ij_index / 2; 491 base_chan = (2 * (ij_index % 2)) + 1; 492 493 for (i = 0; i < 8; i++) { 494 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 495 496 if (i < 4) 497 alu.op = ALU_OP2_INTERP_ZW; 498 else 499 alu.op = ALU_OP2_INTERP_XY; 500 501 if ((i > 1) && (i < 6)) { 502 alu.dst.sel = ctx->shader->input[input].gpr; 503 alu.dst.write = 1; 504 } 505 506 alu.dst.chan = i % 4; 507 508 alu.src[0].sel = gpr; 509 alu.src[0].chan = (base_chan - (i % 2)); 510 511 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 512 513 alu.bank_swizzle_force = SQ_ALU_VEC_210; 514 if ((i % 4) == 3) 515 alu.last = 1; 516 r = r600_bytecode_add_alu(ctx->bc, &alu); 517 if (r) 518 return r; 519 } 520 return 0; 521} 522 523static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 524{ 525 int i, r; 526 struct r600_bytecode_alu alu; 527 528 for (i = 0; i < 4; i++) { 529 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 530 531 alu.op = ALU_OP1_INTERP_LOAD_P0; 532 533 alu.dst.sel = ctx->shader->input[input].gpr; 534 alu.dst.write = 1; 535 536 alu.dst.chan = i; 537 538 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 539 alu.src[0].chan = i; 540 541 if (i == 3) 542 alu.last = 1; 543 r = r600_bytecode_add_alu(ctx->bc, &alu); 544 if (r) 545 return r; 546 } 547 return 0; 548} 549 550/* 551 * Special export handling in shaders 552 * 553 * shader export ARRAY_BASE for EXPORT_POS: 554 * 60 is position 555 * 61 is misc vector 556 * 62, 63 are clip distance vectors 557 * 558 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 559 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 560 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 561 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 562 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 563 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 564 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 565 * exclusive from render target index) 566 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 567 * 568 * 569 * shader export ARRAY_BASE for EXPORT_PIXEL: 570 * 0-7 CB targets 571 * 61 computed Z vector 572 * 573 * The use of the values exported in the computed Z vector are controlled 574 * by DB_SHADER_CONTROL: 575 * Z_EXPORT_ENABLE - Z as a float in RED 576 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 577 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 578 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 579 * DB_SOURCE_FORMAT - export control restrictions 580 * 581 */ 582 583 584/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 585static int r600_spi_sid(struct r600_shader_io * io) 586{ 587 int index, name = io->name; 588 589 /* These params are handled differently, they don't need 590 * semantic indices, so we'll use 0 for them. 591 */ 592 if (name == TGSI_SEMANTIC_POSITION || 593 name == TGSI_SEMANTIC_PSIZE || 594 name == TGSI_SEMANTIC_EDGEFLAG || 595 name == TGSI_SEMANTIC_FACE || 596 name == TGSI_SEMANTIC_SAMPLEMASK) 597 index = 0; 598 else { 599 if (name == TGSI_SEMANTIC_GENERIC) { 600 /* For generic params simply use sid from tgsi */ 601 index = io->sid; 602 } else { 603 /* For non-generic params - pack name and sid into 8 bits */ 604 index = 0x80 | (name<<3) | (io->sid); 605 } 606 607 /* Make sure that all really used indices have nonzero value, so 608 * we can just compare it to 0 later instead of comparing the name 609 * with different values to detect special cases. */ 610 index++; 611 } 612 613 return index; 614}; 615 616/* we need this to get a common lds index for vs/tcs/tes input/outputs */ 617int r600_get_lds_unique_index(unsigned semantic_name, unsigned index) 618{ 619 switch (semantic_name) { 620 case TGSI_SEMANTIC_POSITION: 621 return 0; 622 case TGSI_SEMANTIC_PSIZE: 623 return 1; 624 case TGSI_SEMANTIC_CLIPDIST: 625 assert(index <= 1); 626 return 2 + index; 627 case TGSI_SEMANTIC_GENERIC: 628 if (index <= 63-4) 629 return 4 + index - 9; 630 else 631 /* same explanation as in the default statement, 632 * the only user hitting this is st/nine. 633 */ 634 return 0; 635 636 /* patch indices are completely separate and thus start from 0 */ 637 case TGSI_SEMANTIC_TESSOUTER: 638 return 0; 639 case TGSI_SEMANTIC_TESSINNER: 640 return 1; 641 case TGSI_SEMANTIC_PATCH: 642 return 2 + index; 643 644 default: 645 /* Don't fail here. The result of this function is only used 646 * for LS, TCS, TES, and GS, where legacy GL semantics can't 647 * occur, but this function is called for all vertex shaders 648 * before it's known whether LS will be compiled or not. 649 */ 650 return 0; 651 } 652} 653 654/* turn input into interpolate on EG */ 655static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 656{ 657 int r = 0; 658 659 if (ctx->shader->input[index].spi_sid) { 660 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 661 if (ctx->shader->input[index].interpolate > 0) { 662 evergreen_interp_assign_ij_index(ctx, index); 663 if (!ctx->use_llvm) 664 r = evergreen_interp_alu(ctx, index); 665 } else { 666 if (!ctx->use_llvm) 667 r = evergreen_interp_flat(ctx, index); 668 } 669 } 670 return r; 671} 672 673static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 674{ 675 struct r600_bytecode_alu alu; 676 int i, r; 677 int gpr_front = ctx->shader->input[front].gpr; 678 int gpr_back = ctx->shader->input[back].gpr; 679 680 for (i = 0; i < 4; i++) { 681 memset(&alu, 0, sizeof(alu)); 682 alu.op = ALU_OP3_CNDGT; 683 alu.is_op3 = 1; 684 alu.dst.write = 1; 685 alu.dst.sel = gpr_front; 686 alu.src[0].sel = ctx->face_gpr; 687 alu.src[1].sel = gpr_front; 688 alu.src[2].sel = gpr_back; 689 690 alu.dst.chan = i; 691 alu.src[1].chan = i; 692 alu.src[2].chan = i; 693 alu.last = (i==3); 694 695 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 696 return r; 697 } 698 699 return 0; 700} 701 702/* execute a single slot ALU calculation */ 703static int single_alu_op2(struct r600_shader_ctx *ctx, int op, 704 int dst_sel, int dst_chan, 705 int src0_sel, unsigned src0_chan_val, 706 int src1_sel, unsigned src1_chan_val) 707{ 708 struct r600_bytecode_alu alu; 709 int r, i; 710 711 if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) { 712 for (i = 0; i < 4; i++) { 713 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 714 alu.op = op; 715 alu.src[0].sel = src0_sel; 716 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 717 alu.src[0].value = src0_chan_val; 718 else 719 alu.src[0].chan = src0_chan_val; 720 alu.src[1].sel = src1_sel; 721 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 722 alu.src[1].value = src1_chan_val; 723 else 724 alu.src[1].chan = src1_chan_val; 725 alu.dst.sel = dst_sel; 726 alu.dst.chan = i; 727 alu.dst.write = i == dst_chan; 728 alu.last = (i == 3); 729 r = r600_bytecode_add_alu(ctx->bc, &alu); 730 if (r) 731 return r; 732 } 733 return 0; 734 } 735 736 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 737 alu.op = op; 738 alu.src[0].sel = src0_sel; 739 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 740 alu.src[0].value = src0_chan_val; 741 else 742 alu.src[0].chan = src0_chan_val; 743 alu.src[1].sel = src1_sel; 744 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 745 alu.src[1].value = src1_chan_val; 746 else 747 alu.src[1].chan = src1_chan_val; 748 alu.dst.sel = dst_sel; 749 alu.dst.chan = dst_chan; 750 alu.dst.write = 1; 751 alu.last = 1; 752 r = r600_bytecode_add_alu(ctx->bc, &alu); 753 if (r) 754 return r; 755 return 0; 756} 757 758/* execute a single slot ALU calculation */ 759static int single_alu_op3(struct r600_shader_ctx *ctx, int op, 760 int dst_sel, int dst_chan, 761 int src0_sel, unsigned src0_chan_val, 762 int src1_sel, unsigned src1_chan_val, 763 int src2_sel, unsigned src2_chan_val) 764{ 765 struct r600_bytecode_alu alu; 766 int r; 767 768 /* validate this for other ops */ 769 assert(op == ALU_OP3_MULADD_UINT24); 770 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 771 alu.op = op; 772 alu.src[0].sel = src0_sel; 773 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 774 alu.src[0].value = src0_chan_val; 775 else 776 alu.src[0].chan = src0_chan_val; 777 alu.src[1].sel = src1_sel; 778 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 779 alu.src[1].value = src1_chan_val; 780 else 781 alu.src[1].chan = src1_chan_val; 782 alu.src[2].sel = src2_sel; 783 if (src2_sel == V_SQ_ALU_SRC_LITERAL) 784 alu.src[2].value = src2_chan_val; 785 else 786 alu.src[2].chan = src2_chan_val; 787 alu.dst.sel = dst_sel; 788 alu.dst.chan = dst_chan; 789 alu.is_op3 = 1; 790 alu.last = 1; 791 r = r600_bytecode_add_alu(ctx->bc, &alu); 792 if (r) 793 return r; 794 return 0; 795} 796 797/* put it in temp_reg.x */ 798static int get_lds_offset0(struct r600_shader_ctx *ctx, 799 int rel_patch_chan, 800 int temp_reg, bool is_patch_var) 801{ 802 int r; 803 804 /* MUL temp.x, patch_stride (input_vals.x), rel_patch_id (r0.y (tcs)) */ 805 /* ADD 806 Dimension - patch0_offset (input_vals.z), 807 Non-dim - patch0_data_offset (input_vals.w) 808 */ 809 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 810 temp_reg, 0, 811 ctx->tess_output_info, 0, 812 0, rel_patch_chan, 813 ctx->tess_output_info, is_patch_var ? 3 : 2); 814 if (r) 815 return r; 816 return 0; 817} 818 819static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index) 820{ 821 return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg; 822} 823 824static int r600_get_temp(struct r600_shader_ctx *ctx) 825{ 826 return ctx->temp_reg + ctx->max_driver_temp_used++; 827} 828 829static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid) 830{ 831 int i; 832 i = ctx->shader->noutput++; 833 ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID; 834 ctx->shader->output[i].sid = 0; 835 ctx->shader->output[i].gpr = 0; 836 ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT; 837 ctx->shader->output[i].write_mask = 0x4; 838 ctx->shader->output[i].spi_sid = prim_id_sid; 839 840 return 0; 841} 842 843static int tgsi_barrier(struct r600_shader_ctx *ctx) 844{ 845 struct r600_bytecode_alu alu; 846 int r; 847 848 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 849 alu.op = ctx->inst_info->op; 850 alu.last = 1; 851 852 r = r600_bytecode_add_alu(ctx->bc, &alu); 853 if (r) 854 return r; 855 return 0; 856} 857 858static int tgsi_declaration(struct r600_shader_ctx *ctx) 859{ 860 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 861 int r, i, j, count = d->Range.Last - d->Range.First + 1; 862 863 switch (d->Declaration.File) { 864 case TGSI_FILE_INPUT: 865 for (j = 0; j < count; j++) { 866 i = ctx->shader->ninput + j; 867 assert(i < Elements(ctx->shader->input)); 868 ctx->shader->input[i].name = d->Semantic.Name; 869 ctx->shader->input[i].sid = d->Semantic.Index + j; 870 ctx->shader->input[i].interpolate = d->Interp.Interpolate; 871 ctx->shader->input[i].interpolate_location = d->Interp.Location; 872 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j; 873 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 874 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 875 switch (ctx->shader->input[i].name) { 876 case TGSI_SEMANTIC_FACE: 877 if (ctx->face_gpr != -1) 878 ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ 879 else 880 ctx->face_gpr = ctx->shader->input[i].gpr; 881 break; 882 case TGSI_SEMANTIC_COLOR: 883 ctx->colors_used++; 884 break; 885 case TGSI_SEMANTIC_POSITION: 886 ctx->fragcoord_input = i; 887 break; 888 case TGSI_SEMANTIC_PRIMID: 889 /* set this for now */ 890 ctx->shader->gs_prim_id_input = true; 891 ctx->shader->ps_prim_id_input = i; 892 break; 893 } 894 if (ctx->bc->chip_class >= EVERGREEN) { 895 if ((r = evergreen_interp_input(ctx, i))) 896 return r; 897 } 898 } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 899 /* FIXME probably skip inputs if they aren't passed in the ring */ 900 ctx->shader->input[i].ring_offset = ctx->next_ring_offset; 901 ctx->next_ring_offset += 16; 902 if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) 903 ctx->shader->gs_prim_id_input = true; 904 } 905 } 906 ctx->shader->ninput += count; 907 break; 908 case TGSI_FILE_OUTPUT: 909 for (j = 0; j < count; j++) { 910 i = ctx->shader->noutput + j; 911 assert(i < Elements(ctx->shader->output)); 912 ctx->shader->output[i].name = d->Semantic.Name; 913 ctx->shader->output[i].sid = d->Semantic.Index + j; 914 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j; 915 ctx->shader->output[i].interpolate = d->Interp.Interpolate; 916 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 917 if (ctx->type == TGSI_PROCESSOR_VERTEX || 918 ctx->type == TGSI_PROCESSOR_GEOMETRY || 919 ctx->type == TGSI_PROCESSOR_TESS_EVAL) { 920 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 921 switch (d->Semantic.Name) { 922 case TGSI_SEMANTIC_CLIPDIST: 923 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << 924 ((d->Semantic.Index + j) << 2); 925 break; 926 case TGSI_SEMANTIC_PSIZE: 927 ctx->shader->vs_out_misc_write = 1; 928 ctx->shader->vs_out_point_size = 1; 929 break; 930 case TGSI_SEMANTIC_EDGEFLAG: 931 ctx->shader->vs_out_misc_write = 1; 932 ctx->shader->vs_out_edgeflag = 1; 933 ctx->edgeflag_output = i; 934 break; 935 case TGSI_SEMANTIC_VIEWPORT_INDEX: 936 ctx->shader->vs_out_misc_write = 1; 937 ctx->shader->vs_out_viewport = 1; 938 break; 939 case TGSI_SEMANTIC_LAYER: 940 ctx->shader->vs_out_misc_write = 1; 941 ctx->shader->vs_out_layer = 1; 942 break; 943 case TGSI_SEMANTIC_CLIPVERTEX: 944 ctx->clip_vertex_write = TRUE; 945 ctx->cv_output = i; 946 break; 947 } 948 if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 949 ctx->gs_out_ring_offset += 16; 950 } 951 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 952 switch (d->Semantic.Name) { 953 case TGSI_SEMANTIC_COLOR: 954 ctx->shader->nr_ps_max_color_exports++; 955 break; 956 } 957 } 958 } 959 ctx->shader->noutput += count; 960 break; 961 case TGSI_FILE_TEMPORARY: 962 if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 963 if (d->Array.ArrayID) { 964 r600_add_gpr_array(ctx->shader, 965 ctx->file_offset[TGSI_FILE_TEMPORARY] + 966 d->Range.First, 967 d->Range.Last - d->Range.First + 1, 0x0F); 968 } 969 } 970 break; 971 972 case TGSI_FILE_CONSTANT: 973 case TGSI_FILE_SAMPLER: 974 case TGSI_FILE_SAMPLER_VIEW: 975 case TGSI_FILE_ADDRESS: 976 break; 977 978 case TGSI_FILE_SYSTEM_VALUE: 979 if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK || 980 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID || 981 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) { 982 break; /* Already handled from allocate_system_value_inputs */ 983 } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 984 if (!ctx->native_integers) { 985 struct r600_bytecode_alu alu; 986 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 987 988 alu.op = ALU_OP1_INT_TO_FLT; 989 alu.src[0].sel = 0; 990 alu.src[0].chan = 3; 991 992 alu.dst.sel = 0; 993 alu.dst.chan = 3; 994 alu.dst.write = 1; 995 alu.last = 1; 996 997 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 998 return r; 999 } 1000 break; 1001 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 1002 break; 1003 else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID) 1004 break; 1005 else if (d->Semantic.Name == TGSI_SEMANTIC_TESSINNER || 1006 d->Semantic.Name == TGSI_SEMANTIC_TESSOUTER) { 1007 int param = r600_get_lds_unique_index(d->Semantic.Name, 0); 1008 int dreg = d->Semantic.Name == TGSI_SEMANTIC_TESSINNER ? 3 : 2; 1009 unsigned temp_reg = r600_get_temp(ctx); 1010 1011 r = get_lds_offset0(ctx, 2, temp_reg, true); 1012 if (r) 1013 return r; 1014 1015 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1016 temp_reg, 0, 1017 temp_reg, 0, 1018 V_SQ_ALU_SRC_LITERAL, param * 16); 1019 if (r) 1020 return r; 1021 1022 do_lds_fetch_values(ctx, temp_reg, dreg); 1023 } 1024 else if (d->Semantic.Name == TGSI_SEMANTIC_TESSCOORD) { 1025 /* MOV r1.x, r0.x; 1026 MOV r1.y, r0.y; 1027 */ 1028 for (i = 0; i < 2; i++) { 1029 struct r600_bytecode_alu alu; 1030 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1031 alu.op = ALU_OP1_MOV; 1032 alu.src[0].sel = 0; 1033 alu.src[0].chan = 0 + i; 1034 alu.dst.sel = 1; 1035 alu.dst.chan = 0 + i; 1036 alu.dst.write = 1; 1037 alu.last = (i == 1) ? 1 : 0; 1038 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1039 return r; 1040 } 1041 /* ADD r1.z, 1.0f, -r0.x */ 1042 struct r600_bytecode_alu alu; 1043 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1044 alu.op = ALU_OP2_ADD; 1045 alu.src[0].sel = V_SQ_ALU_SRC_1; 1046 alu.src[1].sel = 1; 1047 alu.src[1].chan = 0; 1048 alu.src[1].neg = 1; 1049 alu.dst.sel = 1; 1050 alu.dst.chan = 2; 1051 alu.dst.write = 1; 1052 alu.last = 1; 1053 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1054 return r; 1055 1056 /* ADD r1.z, r1.z, -r1.y */ 1057 alu.op = ALU_OP2_ADD; 1058 alu.src[0].sel = 1; 1059 alu.src[0].chan = 2; 1060 alu.src[1].sel = 1; 1061 alu.src[1].chan = 1; 1062 alu.src[1].neg = 1; 1063 alu.dst.sel = 1; 1064 alu.dst.chan = 2; 1065 alu.dst.write = 1; 1066 alu.last = 1; 1067 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1068 return r; 1069 break; 1070 } 1071 break; 1072 default: 1073 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 1074 return -EINVAL; 1075 } 1076 return 0; 1077} 1078 1079static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset) 1080{ 1081 struct tgsi_parse_context parse; 1082 struct { 1083 boolean enabled; 1084 int *reg; 1085 unsigned name, alternate_name; 1086 } inputs[2] = { 1087 { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */ 1088 1089 { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */ 1090 }; 1091 int i, k, num_regs = 0; 1092 1093 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 1094 return 0; 1095 } 1096 1097 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 1098 while (!tgsi_parse_end_of_tokens(&parse)) { 1099 tgsi_parse_token(&parse); 1100 1101 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 1102 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 1103 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 1104 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 1105 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 1106 { 1107 int interpolate, location, k; 1108 1109 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 1110 location = TGSI_INTERPOLATE_LOC_CENTER; 1111 inputs[1].enabled = true; /* needs SAMPLEID */ 1112 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 1113 location = TGSI_INTERPOLATE_LOC_CENTER; 1114 /* Needs sample positions, currently those are always available */ 1115 } else { 1116 location = TGSI_INTERPOLATE_LOC_CENTROID; 1117 } 1118 1119 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 1120 k = eg_get_interpolator_index(interpolate, location); 1121 ctx->eg_interpolators[k].enabled = true; 1122 } 1123 } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { 1124 struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration; 1125 if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1126 for (k = 0; k < Elements(inputs); k++) { 1127 if (d->Semantic.Name == inputs[k].name || 1128 d->Semantic.Name == inputs[k].alternate_name) { 1129 inputs[k].enabled = true; 1130 } 1131 } 1132 } 1133 } 1134 } 1135 1136 tgsi_parse_free(&parse); 1137 1138 for (i = 0; i < Elements(inputs); i++) { 1139 boolean enabled = inputs[i].enabled; 1140 int *reg = inputs[i].reg; 1141 unsigned name = inputs[i].name; 1142 1143 if (enabled) { 1144 int gpr = gpr_offset + num_regs++; 1145 1146 // add to inputs, allocate a gpr 1147 k = ctx->shader->ninput ++; 1148 ctx->shader->input[k].name = name; 1149 ctx->shader->input[k].sid = 0; 1150 ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT; 1151 ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER; 1152 *reg = ctx->shader->input[k].gpr = gpr; 1153 } 1154 } 1155 1156 return gpr_offset + num_regs; 1157} 1158 1159/* 1160 * for evergreen we need to scan the shader to find the number of GPRs we need to 1161 * reserve for interpolation and system values 1162 * 1163 * we need to know if we are going to emit 1164 * any sample or centroid inputs 1165 * if perspective and linear are required 1166*/ 1167static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 1168{ 1169 int i; 1170 int num_baryc; 1171 struct tgsi_parse_context parse; 1172 1173 memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators)); 1174 1175 for (i = 0; i < ctx->info.num_inputs; i++) { 1176 int k; 1177 /* skip position/face/mask/sampleid */ 1178 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 1179 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE || 1180 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK || 1181 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID) 1182 continue; 1183 1184 k = eg_get_interpolator_index( 1185 ctx->info.input_interpolate[i], 1186 ctx->info.input_interpolate_loc[i]); 1187 if (k >= 0) 1188 ctx->eg_interpolators[k].enabled = TRUE; 1189 } 1190 1191 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 1192 return 0; 1193 } 1194 1195 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 1196 while (!tgsi_parse_end_of_tokens(&parse)) { 1197 tgsi_parse_token(&parse); 1198 1199 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 1200 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 1201 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 1202 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 1203 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 1204 { 1205 int interpolate, location, k; 1206 1207 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 1208 location = TGSI_INTERPOLATE_LOC_CENTER; 1209 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 1210 location = TGSI_INTERPOLATE_LOC_CENTER; 1211 } else { 1212 location = TGSI_INTERPOLATE_LOC_CENTROID; 1213 } 1214 1215 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 1216 k = eg_get_interpolator_index(interpolate, location); 1217 ctx->eg_interpolators[k].enabled = true; 1218 } 1219 } 1220 } 1221 1222 tgsi_parse_free(&parse); 1223 1224 /* assign gpr to each interpolator according to priority */ 1225 num_baryc = 0; 1226 for (i = 0; i < Elements(ctx->eg_interpolators); i++) { 1227 if (ctx->eg_interpolators[i].enabled) { 1228 ctx->eg_interpolators[i].ij_index = num_baryc; 1229 num_baryc ++; 1230 } 1231 } 1232 1233 /* XXX PULL MODEL and LINE STIPPLE */ 1234 1235 num_baryc = (num_baryc + 1) >> 1; 1236 return allocate_system_value_inputs(ctx, num_baryc); 1237} 1238 1239/* sample_id_sel == NULL means fetch for current sample */ 1240static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel) 1241{ 1242 struct r600_bytecode_vtx vtx; 1243 int r, t1; 1244 1245 assert(ctx->fixed_pt_position_gpr != -1); 1246 1247 t1 = r600_get_temp(ctx); 1248 1249 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 1250 vtx.op = FETCH_OP_VFETCH; 1251 vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 1252 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1253 if (sample_id == NULL) { 1254 vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; 1255 vtx.src_sel_x = 3; 1256 } 1257 else { 1258 struct r600_bytecode_alu alu; 1259 1260 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1261 alu.op = ALU_OP1_MOV; 1262 r600_bytecode_src(&alu.src[0], sample_id, chan_sel); 1263 alu.dst.sel = t1; 1264 alu.dst.write = 1; 1265 alu.last = 1; 1266 r = r600_bytecode_add_alu(ctx->bc, &alu); 1267 if (r) 1268 return r; 1269 1270 vtx.src_gpr = t1; 1271 vtx.src_sel_x = 0; 1272 } 1273 vtx.mega_fetch_count = 16; 1274 vtx.dst_gpr = t1; 1275 vtx.dst_sel_x = 0; 1276 vtx.dst_sel_y = 1; 1277 vtx.dst_sel_z = 2; 1278 vtx.dst_sel_w = 3; 1279 vtx.data_format = FMT_32_32_32_32_FLOAT; 1280 vtx.num_format_all = 2; 1281 vtx.format_comp_all = 1; 1282 vtx.use_const_fields = 0; 1283 vtx.offset = 1; // first element is size of buffer 1284 vtx.endian = r600_endian_swap(32); 1285 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 1286 1287 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 1288 if (r) 1289 return r; 1290 1291 return t1; 1292} 1293 1294static void tgsi_src(struct r600_shader_ctx *ctx, 1295 const struct tgsi_full_src_register *tgsi_src, 1296 struct r600_shader_src *r600_src) 1297{ 1298 memset(r600_src, 0, sizeof(*r600_src)); 1299 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 1300 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 1301 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 1302 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 1303 r600_src->neg = tgsi_src->Register.Negate; 1304 r600_src->abs = tgsi_src->Register.Absolute; 1305 1306 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 1307 int index; 1308 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 1309 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 1310 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 1311 1312 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 1313 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs); 1314 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 1315 return; 1316 } 1317 index = tgsi_src->Register.Index; 1318 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 1319 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 1320 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 1321 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) { 1322 r600_src->swizzle[0] = 2; // Z value 1323 r600_src->swizzle[1] = 2; 1324 r600_src->swizzle[2] = 2; 1325 r600_src->swizzle[3] = 2; 1326 r600_src->sel = ctx->face_gpr; 1327 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) { 1328 r600_src->swizzle[0] = 3; // W value 1329 r600_src->swizzle[1] = 3; 1330 r600_src->swizzle[2] = 3; 1331 r600_src->swizzle[3] = 3; 1332 r600_src->sel = ctx->fixed_pt_position_gpr; 1333 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) { 1334 r600_src->swizzle[0] = 0; 1335 r600_src->swizzle[1] = 1; 1336 r600_src->swizzle[2] = 4; 1337 r600_src->swizzle[3] = 4; 1338 r600_src->sel = load_sample_position(ctx, NULL, -1); 1339 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 1340 r600_src->swizzle[0] = 3; 1341 r600_src->swizzle[1] = 3; 1342 r600_src->swizzle[2] = 3; 1343 r600_src->swizzle[3] = 3; 1344 r600_src->sel = 0; 1345 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 1346 r600_src->swizzle[0] = 0; 1347 r600_src->swizzle[1] = 0; 1348 r600_src->swizzle[2] = 0; 1349 r600_src->swizzle[3] = 0; 1350 r600_src->sel = 0; 1351 } else if (ctx->type != TGSI_PROCESSOR_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1352 r600_src->swizzle[0] = 3; 1353 r600_src->swizzle[1] = 3; 1354 r600_src->swizzle[2] = 3; 1355 r600_src->swizzle[3] = 3; 1356 r600_src->sel = 1; 1357 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1358 r600_src->swizzle[0] = 2; 1359 r600_src->swizzle[1] = 2; 1360 r600_src->swizzle[2] = 2; 1361 r600_src->swizzle[3] = 2; 1362 r600_src->sel = 0; 1363 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSCOORD) { 1364 r600_src->sel = 1; 1365 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSINNER) { 1366 r600_src->sel = 3; 1367 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSOUTER) { 1368 r600_src->sel = 2; 1369 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTICESIN) { 1370 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) { 1371 r600_src->sel = ctx->tess_input_info; 1372 r600_src->swizzle[0] = 2; 1373 r600_src->swizzle[1] = 2; 1374 r600_src->swizzle[2] = 2; 1375 r600_src->swizzle[3] = 2; 1376 } else { 1377 r600_src->sel = ctx->tess_input_info; 1378 r600_src->swizzle[0] = 3; 1379 r600_src->swizzle[1] = 3; 1380 r600_src->swizzle[2] = 3; 1381 r600_src->swizzle[3] = 3; 1382 } 1383 } else if (ctx->type == TGSI_PROCESSOR_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1384 r600_src->sel = 0; 1385 r600_src->swizzle[0] = 0; 1386 r600_src->swizzle[1] = 0; 1387 r600_src->swizzle[2] = 0; 1388 r600_src->swizzle[3] = 0; 1389 } else if (ctx->type == TGSI_PROCESSOR_TESS_EVAL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1390 r600_src->sel = 0; 1391 r600_src->swizzle[0] = 3; 1392 r600_src->swizzle[1] = 3; 1393 r600_src->swizzle[2] = 3; 1394 r600_src->swizzle[3] = 3; 1395 } 1396 } else { 1397 if (tgsi_src->Register.Indirect) 1398 r600_src->rel = V_SQ_REL_RELATIVE; 1399 r600_src->sel = tgsi_src->Register.Index; 1400 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 1401 } 1402 if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { 1403 if (tgsi_src->Register.Dimension) { 1404 r600_src->kc_bank = tgsi_src->Dimension.Index; 1405 if (tgsi_src->Dimension.Indirect) { 1406 r600_src->kc_rel = 1; 1407 } 1408 } 1409 } 1410} 1411 1412static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 1413 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 1414 unsigned int dst_reg) 1415{ 1416 struct r600_bytecode_vtx vtx; 1417 unsigned int ar_reg; 1418 int r; 1419 1420 if (offset) { 1421 struct r600_bytecode_alu alu; 1422 1423 memset(&alu, 0, sizeof(alu)); 1424 1425 alu.op = ALU_OP2_ADD_INT; 1426 alu.src[0].sel = ctx->bc->ar_reg; 1427 alu.src[0].chan = ar_chan; 1428 1429 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1430 alu.src[1].value = offset; 1431 1432 alu.dst.sel = dst_reg; 1433 alu.dst.chan = ar_chan; 1434 alu.dst.write = 1; 1435 alu.last = 1; 1436 1437 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1438 return r; 1439 1440 ar_reg = dst_reg; 1441 } else { 1442 ar_reg = ctx->bc->ar_reg; 1443 } 1444 1445 memset(&vtx, 0, sizeof(vtx)); 1446 vtx.buffer_id = cb_idx; 1447 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1448 vtx.src_gpr = ar_reg; 1449 vtx.src_sel_x = ar_chan; 1450 vtx.mega_fetch_count = 16; 1451 vtx.dst_gpr = dst_reg; 1452 vtx.dst_sel_x = 0; /* SEL_X */ 1453 vtx.dst_sel_y = 1; /* SEL_Y */ 1454 vtx.dst_sel_z = 2; /* SEL_Z */ 1455 vtx.dst_sel_w = 3; /* SEL_W */ 1456 vtx.data_format = FMT_32_32_32_32_FLOAT; 1457 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 1458 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 1459 vtx.endian = r600_endian_swap(32); 1460 vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE; 1461 1462 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1463 return r; 1464 1465 return 0; 1466} 1467 1468static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1469{ 1470 struct r600_bytecode_vtx vtx; 1471 int r; 1472 unsigned index = src->Register.Index; 1473 unsigned vtx_id = src->Dimension.Index; 1474 int offset_reg = vtx_id / 3; 1475 int offset_chan = vtx_id % 3; 1476 int t2 = 0; 1477 1478 /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, 1479 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ 1480 1481 if (offset_reg == 0 && offset_chan == 2) 1482 offset_chan = 3; 1483 1484 if (src->Dimension.Indirect || src->Register.Indirect) 1485 t2 = r600_get_temp(ctx); 1486 1487 if (src->Dimension.Indirect) { 1488 int treg[3]; 1489 struct r600_bytecode_alu alu; 1490 int r, i; 1491 unsigned addr_reg; 1492 addr_reg = get_address_file_reg(ctx, src->DimIndirect.Index); 1493 if (src->DimIndirect.Index > 0) { 1494 r = single_alu_op2(ctx, ALU_OP1_MOV, 1495 ctx->bc->ar_reg, 0, 1496 addr_reg, 0, 1497 0, 0); 1498 if (r) 1499 return r; 1500 } 1501 /* 1502 we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt. 1503 at least this is what fglrx seems to do. */ 1504 for (i = 0; i < 3; i++) { 1505 treg[i] = r600_get_temp(ctx); 1506 } 1507 r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F); 1508 1509 for (i = 0; i < 3; i++) { 1510 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1511 alu.op = ALU_OP1_MOV; 1512 alu.src[0].sel = 0; 1513 alu.src[0].chan = i == 2 ? 3 : i; 1514 alu.dst.sel = treg[i]; 1515 alu.dst.chan = 0; 1516 alu.dst.write = 1; 1517 alu.last = 1; 1518 r = r600_bytecode_add_alu(ctx->bc, &alu); 1519 if (r) 1520 return r; 1521 } 1522 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1523 alu.op = ALU_OP1_MOV; 1524 alu.src[0].sel = treg[0]; 1525 alu.src[0].rel = 1; 1526 alu.dst.sel = t2; 1527 alu.dst.write = 1; 1528 alu.last = 1; 1529 r = r600_bytecode_add_alu(ctx->bc, &alu); 1530 if (r) 1531 return r; 1532 offset_reg = t2; 1533 offset_chan = 0; 1534 } 1535 1536 if (src->Register.Indirect) { 1537 int addr_reg; 1538 unsigned first = ctx->info.input_array_first[src->Indirect.ArrayID]; 1539 1540 addr_reg = get_address_file_reg(ctx, src->Indirect.Index); 1541 1542 /* pull the value from index_reg */ 1543 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1544 t2, 1, 1545 addr_reg, 0, 1546 V_SQ_ALU_SRC_LITERAL, first); 1547 if (r) 1548 return r; 1549 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1550 t2, 0, 1551 t2, 1, 1552 V_SQ_ALU_SRC_LITERAL, 4, 1553 offset_reg, offset_chan); 1554 if (r) 1555 return r; 1556 offset_reg = t2; 1557 offset_chan = 0; 1558 index = src->Register.Index - first; 1559 } 1560 1561 memset(&vtx, 0, sizeof(vtx)); 1562 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1563 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1564 vtx.src_gpr = offset_reg; 1565 vtx.src_sel_x = offset_chan; 1566 vtx.offset = index * 16; /*bytes*/ 1567 vtx.mega_fetch_count = 16; 1568 vtx.dst_gpr = dst_reg; 1569 vtx.dst_sel_x = 0; /* SEL_X */ 1570 vtx.dst_sel_y = 1; /* SEL_Y */ 1571 vtx.dst_sel_z = 2; /* SEL_Z */ 1572 vtx.dst_sel_w = 3; /* SEL_W */ 1573 if (ctx->bc->chip_class >= EVERGREEN) { 1574 vtx.use_const_fields = 1; 1575 } else { 1576 vtx.data_format = FMT_32_32_32_32_FLOAT; 1577 } 1578 1579 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1580 return r; 1581 1582 return 0; 1583} 1584 1585static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) 1586{ 1587 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1588 int i; 1589 1590 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1591 struct tgsi_full_src_register *src = &inst->Src[i]; 1592 1593 if (src->Register.File == TGSI_FILE_INPUT) { 1594 if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) { 1595 /* primitive id is in R0.z */ 1596 ctx->src[i].sel = 0; 1597 ctx->src[i].swizzle[0] = 2; 1598 } 1599 } 1600 if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { 1601 int treg = r600_get_temp(ctx); 1602 1603 fetch_gs_input(ctx, src, treg); 1604 ctx->src[i].sel = treg; 1605 ctx->src[i].rel = 0; 1606 } 1607 } 1608 return 0; 1609} 1610 1611 1612/* Tessellation shaders pass outputs to the next shader using LDS. 1613 * 1614 * LS outputs = TCS(HS) inputs 1615 * TCS(HS) outputs = TES(DS) inputs 1616 * 1617 * The LDS layout is: 1618 * - TCS inputs for patch 0 1619 * - TCS inputs for patch 1 1620 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) 1621 * - ... 1622 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset 1623 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset 1624 * - TCS outputs for patch 1 1625 * - Per-patch TCS outputs for patch 1 1626 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) 1627 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) 1628 * - ... 1629 * 1630 * All three shaders VS(LS), TCS, TES share the same LDS space. 1631 */ 1632/* this will return with the dw address in temp_reg.x */ 1633static int r600_get_byte_address(struct r600_shader_ctx *ctx, int temp_reg, 1634 const struct tgsi_full_dst_register *dst, 1635 const struct tgsi_full_src_register *src, 1636 int stride_bytes_reg, int stride_bytes_chan) 1637{ 1638 struct tgsi_full_dst_register reg; 1639 ubyte *name, *index, *array_first; 1640 int r; 1641 int param; 1642 struct tgsi_shader_info *info = &ctx->info; 1643 /* Set the register description. The address computation is the same 1644 * for sources and destinations. */ 1645 if (src) { 1646 reg.Register.File = src->Register.File; 1647 reg.Register.Index = src->Register.Index; 1648 reg.Register.Indirect = src->Register.Indirect; 1649 reg.Register.Dimension = src->Register.Dimension; 1650 reg.Indirect = src->Indirect; 1651 reg.Dimension = src->Dimension; 1652 reg.DimIndirect = src->DimIndirect; 1653 } else 1654 reg = *dst; 1655 1656 /* If the register is 2-dimensional (e.g. an array of vertices 1657 * in a primitive), calculate the base address of the vertex. */ 1658 if (reg.Register.Dimension) { 1659 int sel, chan; 1660 if (reg.Dimension.Indirect) { 1661 unsigned addr_reg; 1662 assert (reg.DimIndirect.File == TGSI_FILE_ADDRESS); 1663 1664 addr_reg = get_address_file_reg(ctx, reg.DimIndirect.Index); 1665 /* pull the value from index_reg */ 1666 sel = addr_reg; 1667 chan = 0; 1668 } else { 1669 sel = V_SQ_ALU_SRC_LITERAL; 1670 chan = reg.Dimension.Index; 1671 } 1672 1673 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1674 temp_reg, 0, 1675 stride_bytes_reg, stride_bytes_chan, 1676 sel, chan, 1677 temp_reg, 0); 1678 if (r) 1679 return r; 1680 } 1681 1682 if (reg.Register.File == TGSI_FILE_INPUT) { 1683 name = info->input_semantic_name; 1684 index = info->input_semantic_index; 1685 array_first = info->input_array_first; 1686 } else if (reg.Register.File == TGSI_FILE_OUTPUT) { 1687 name = info->output_semantic_name; 1688 index = info->output_semantic_index; 1689 array_first = info->output_array_first; 1690 } else { 1691 assert(0); 1692 return -1; 1693 } 1694 if (reg.Register.Indirect) { 1695 int addr_reg; 1696 int first; 1697 /* Add the relative address of the element. */ 1698 if (reg.Indirect.ArrayID) 1699 first = array_first[reg.Indirect.ArrayID]; 1700 else 1701 first = reg.Register.Index; 1702 1703 addr_reg = get_address_file_reg(ctx, reg.Indirect.Index); 1704 1705 /* pull the value from index_reg */ 1706 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1707 temp_reg, 0, 1708 V_SQ_ALU_SRC_LITERAL, 16, 1709 addr_reg, 0, 1710 temp_reg, 0); 1711 if (r) 1712 return r; 1713 1714 param = r600_get_lds_unique_index(name[first], 1715 index[first]); 1716 1717 } else { 1718 param = r600_get_lds_unique_index(name[reg.Register.Index], 1719 index[reg.Register.Index]); 1720 } 1721 1722 /* add to base_addr - passed in temp_reg.x */ 1723 if (param) { 1724 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1725 temp_reg, 0, 1726 temp_reg, 0, 1727 V_SQ_ALU_SRC_LITERAL, param * 16); 1728 if (r) 1729 return r; 1730 1731 } 1732 return 0; 1733} 1734 1735static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 1736 unsigned dst_reg) 1737{ 1738 struct r600_bytecode_alu alu; 1739 int r, i; 1740 1741 if ((ctx->bc->cf_last->ndw>>1) >= 0x60) 1742 ctx->bc->force_add_cf = 1; 1743 for (i = 1; i < 4; i++) { 1744 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1745 temp_reg, i, 1746 temp_reg, 0, 1747 V_SQ_ALU_SRC_LITERAL, 4 * i); 1748 } 1749 for (i = 0; i < 4; i++) { 1750 /* emit an LDS_READ_RET */ 1751 memset(&alu, 0, sizeof(alu)); 1752 alu.op = LDS_OP1_LDS_READ_RET; 1753 alu.src[0].sel = temp_reg; 1754 alu.src[0].chan = i; 1755 alu.src[1].sel = V_SQ_ALU_SRC_0; 1756 alu.src[2].sel = V_SQ_ALU_SRC_0; 1757 alu.dst.chan = 0; 1758 alu.is_lds_idx_op = true; 1759 alu.last = 1; 1760 r = r600_bytecode_add_alu(ctx->bc, &alu); 1761 if (r) 1762 return r; 1763 } 1764 for (i = 0; i < 4; i++) { 1765 /* then read from LDS_OQ_A_POP */ 1766 memset(&alu, 0, sizeof(alu)); 1767 1768 alu.op = ALU_OP1_MOV; 1769 alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; 1770 alu.src[0].chan = 0; 1771 alu.dst.sel = dst_reg; 1772 alu.dst.chan = i; 1773 alu.dst.write = 1; 1774 alu.last = 1; 1775 r = r600_bytecode_add_alu(ctx->bc, &alu); 1776 if (r) 1777 return r; 1778 } 1779 return 0; 1780} 1781 1782static int fetch_tes_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1783{ 1784 int r; 1785 unsigned temp_reg = r600_get_temp(ctx); 1786 1787 r = get_lds_offset0(ctx, 2, temp_reg, 1788 src->Register.Dimension ? false : true); 1789 if (r) 1790 return r; 1791 1792 /* the base address is now in temp.x */ 1793 r = r600_get_byte_address(ctx, temp_reg, 1794 NULL, src, ctx->tess_output_info, 1); 1795 if (r) 1796 return r; 1797 1798 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1799 if (r) 1800 return r; 1801 return 0; 1802} 1803 1804static int fetch_tcs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1805{ 1806 int r; 1807 unsigned temp_reg = r600_get_temp(ctx); 1808 1809 /* t.x = ips * r0.y */ 1810 r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 1811 temp_reg, 0, 1812 ctx->tess_input_info, 0, 1813 0, 1); 1814 1815 if (r) 1816 return r; 1817 1818 /* the base address is now in temp.x */ 1819 r = r600_get_byte_address(ctx, temp_reg, 1820 NULL, src, ctx->tess_input_info, 1); 1821 if (r) 1822 return r; 1823 1824 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1825 if (r) 1826 return r; 1827 return 0; 1828} 1829 1830static int fetch_tcs_output(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1831{ 1832 int r; 1833 unsigned temp_reg = r600_get_temp(ctx); 1834 1835 r = get_lds_offset0(ctx, 1, temp_reg, 1836 src->Register.Dimension ? false : true); 1837 if (r) 1838 return r; 1839 /* the base address is now in temp.x */ 1840 r = r600_get_byte_address(ctx, temp_reg, 1841 NULL, src, 1842 ctx->tess_output_info, 1); 1843 if (r) 1844 return r; 1845 1846 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1847 if (r) 1848 return r; 1849 return 0; 1850} 1851 1852static int tgsi_split_lds_inputs(struct r600_shader_ctx *ctx) 1853{ 1854 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1855 int i; 1856 1857 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1858 struct tgsi_full_src_register *src = &inst->Src[i]; 1859 1860 if (ctx->type == TGSI_PROCESSOR_TESS_EVAL && src->Register.File == TGSI_FILE_INPUT) { 1861 int treg = r600_get_temp(ctx); 1862 fetch_tes_input(ctx, src, treg); 1863 ctx->src[i].sel = treg; 1864 ctx->src[i].rel = 0; 1865 } 1866 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL && src->Register.File == TGSI_FILE_INPUT) { 1867 int treg = r600_get_temp(ctx); 1868 fetch_tcs_input(ctx, src, treg); 1869 ctx->src[i].sel = treg; 1870 ctx->src[i].rel = 0; 1871 } 1872 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL && src->Register.File == TGSI_FILE_OUTPUT) { 1873 int treg = r600_get_temp(ctx); 1874 fetch_tcs_output(ctx, src, treg); 1875 ctx->src[i].sel = treg; 1876 ctx->src[i].rel = 0; 1877 } 1878 } 1879 return 0; 1880} 1881 1882static int tgsi_split_constant(struct r600_shader_ctx *ctx) 1883{ 1884 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1885 struct r600_bytecode_alu alu; 1886 int i, j, k, nconst, r; 1887 1888 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 1889 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 1890 nconst++; 1891 } 1892 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 1893 } 1894 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 1895 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 1896 continue; 1897 } 1898 1899 if (ctx->src[i].rel) { 1900 int chan = inst->Src[i].Indirect.Swizzle; 1901 int treg = r600_get_temp(ctx); 1902 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg))) 1903 return r; 1904 1905 ctx->src[i].kc_bank = 0; 1906 ctx->src[i].kc_rel = 0; 1907 ctx->src[i].sel = treg; 1908 ctx->src[i].rel = 0; 1909 j--; 1910 } else if (j > 0) { 1911 int treg = r600_get_temp(ctx); 1912 for (k = 0; k < 4; k++) { 1913 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1914 alu.op = ALU_OP1_MOV; 1915 alu.src[0].sel = ctx->src[i].sel; 1916 alu.src[0].chan = k; 1917 alu.src[0].rel = ctx->src[i].rel; 1918 alu.src[0].kc_bank = ctx->src[i].kc_bank; 1919 alu.src[0].kc_rel = ctx->src[i].kc_rel; 1920 alu.dst.sel = treg; 1921 alu.dst.chan = k; 1922 alu.dst.write = 1; 1923 if (k == 3) 1924 alu.last = 1; 1925 r = r600_bytecode_add_alu(ctx->bc, &alu); 1926 if (r) 1927 return r; 1928 } 1929 ctx->src[i].sel = treg; 1930 ctx->src[i].rel =0; 1931 j--; 1932 } 1933 } 1934 return 0; 1935} 1936 1937/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 1938static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 1939{ 1940 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1941 struct r600_bytecode_alu alu; 1942 int i, j, k, nliteral, r; 1943 1944 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 1945 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1946 nliteral++; 1947 } 1948 } 1949 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 1950 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1951 int treg = r600_get_temp(ctx); 1952 for (k = 0; k < 4; k++) { 1953 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1954 alu.op = ALU_OP1_MOV; 1955 alu.src[0].sel = ctx->src[i].sel; 1956 alu.src[0].chan = k; 1957 alu.src[0].value = ctx->src[i].value[k]; 1958 alu.dst.sel = treg; 1959 alu.dst.chan = k; 1960 alu.dst.write = 1; 1961 if (k == 3) 1962 alu.last = 1; 1963 r = r600_bytecode_add_alu(ctx->bc, &alu); 1964 if (r) 1965 return r; 1966 } 1967 ctx->src[i].sel = treg; 1968 j--; 1969 } 1970 } 1971 return 0; 1972} 1973 1974static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 1975{ 1976 int i, r, count = ctx->shader->ninput; 1977 1978 for (i = 0; i < count; i++) { 1979 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1980 r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); 1981 if (r) 1982 return r; 1983 } 1984 } 1985 return 0; 1986} 1987 1988static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so, 1989 int stream, unsigned *stream_item_size) 1990{ 1991 unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; 1992 unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; 1993 int i, j, r; 1994 1995 /* Sanity checking. */ 1996 if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) { 1997 R600_ERR("Too many stream outputs: %d\n", so->num_outputs); 1998 r = -EINVAL; 1999 goto out_err; 2000 } 2001 for (i = 0; i < so->num_outputs; i++) { 2002 if (so->output[i].output_buffer >= 4) { 2003 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", 2004 so->output[i].output_buffer); 2005 r = -EINVAL; 2006 goto out_err; 2007 } 2008 } 2009 2010 /* Initialize locations where the outputs are stored. */ 2011 for (i = 0; i < so->num_outputs; i++) { 2012 2013 so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr; 2014 start_comp[i] = so->output[i].start_component; 2015 /* Lower outputs with dst_offset < start_component. 2016 * 2017 * We can only output 4D vectors with a write mask, e.g. we can 2018 * only output the W component at offset 3, etc. If we want 2019 * to store Y, Z, or W at buffer offset 0, we need to use MOV 2020 * to move it to X and output X. */ 2021 if (so->output[i].dst_offset < so->output[i].start_component) { 2022 unsigned tmp = r600_get_temp(ctx); 2023 2024 for (j = 0; j < so->output[i].num_components; j++) { 2025 struct r600_bytecode_alu alu; 2026 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2027 alu.op = ALU_OP1_MOV; 2028 alu.src[0].sel = so_gpr[i]; 2029 alu.src[0].chan = so->output[i].start_component + j; 2030 2031 alu.dst.sel = tmp; 2032 alu.dst.chan = j; 2033 alu.dst.write = 1; 2034 if (j == so->output[i].num_components - 1) 2035 alu.last = 1; 2036 r = r600_bytecode_add_alu(ctx->bc, &alu); 2037 if (r) 2038 return r; 2039 } 2040 start_comp[i] = 0; 2041 so_gpr[i] = tmp; 2042 } 2043 } 2044 2045 /* Write outputs to buffers. */ 2046 for (i = 0; i < so->num_outputs; i++) { 2047 struct r600_bytecode_output output; 2048 2049 if (stream != -1 && stream != so->output[i].output_buffer) 2050 continue; 2051 2052 memset(&output, 0, sizeof(struct r600_bytecode_output)); 2053 output.gpr = so_gpr[i]; 2054 output.elem_size = so->output[i].num_components - 1; 2055 if (output.elem_size == 2) 2056 output.elem_size = 3; // 3 not supported, write 4 with junk at end 2057 output.array_base = so->output[i].dst_offset - start_comp[i]; 2058 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2059 output.burst_count = 1; 2060 /* array_size is an upper limit for the burst_count 2061 * with MEM_STREAM instructions */ 2062 output.array_size = 0xFFF; 2063 output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i]; 2064 2065 if (ctx->bc->chip_class >= EVERGREEN) { 2066 switch (so->output[i].output_buffer) { 2067 case 0: 2068 output.op = CF_OP_MEM_STREAM0_BUF0; 2069 break; 2070 case 1: 2071 output.op = CF_OP_MEM_STREAM0_BUF1; 2072 break; 2073 case 2: 2074 output.op = CF_OP_MEM_STREAM0_BUF2; 2075 break; 2076 case 3: 2077 output.op = CF_OP_MEM_STREAM0_BUF3; 2078 break; 2079 } 2080 output.op += so->output[i].stream * 4; 2081 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); 2082 ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4; 2083 } else { 2084 switch (so->output[i].output_buffer) { 2085 case 0: 2086 output.op = CF_OP_MEM_STREAM0; 2087 break; 2088 case 1: 2089 output.op = CF_OP_MEM_STREAM1; 2090 break; 2091 case 2: 2092 output.op = CF_OP_MEM_STREAM2; 2093 break; 2094 case 3: 2095 output.op = CF_OP_MEM_STREAM3; 2096 break; 2097 } 2098 ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer; 2099 } 2100 r = r600_bytecode_add_output(ctx->bc, &output); 2101 if (r) 2102 goto out_err; 2103 } 2104 return 0; 2105out_err: 2106 return r; 2107} 2108 2109static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) 2110{ 2111 struct r600_bytecode_alu alu; 2112 unsigned reg; 2113 2114 if (!ctx->shader->vs_out_edgeflag) 2115 return; 2116 2117 reg = ctx->shader->output[ctx->edgeflag_output].gpr; 2118 2119 /* clamp(x, 0, 1) */ 2120 memset(&alu, 0, sizeof(alu)); 2121 alu.op = ALU_OP1_MOV; 2122 alu.src[0].sel = reg; 2123 alu.dst.sel = reg; 2124 alu.dst.write = 1; 2125 alu.dst.clamp = 1; 2126 alu.last = 1; 2127 r600_bytecode_add_alu(ctx->bc, &alu); 2128 2129 memset(&alu, 0, sizeof(alu)); 2130 alu.op = ALU_OP1_FLT_TO_INT; 2131 alu.src[0].sel = reg; 2132 alu.dst.sel = reg; 2133 alu.dst.write = 1; 2134 alu.last = 1; 2135 r600_bytecode_add_alu(ctx->bc, &alu); 2136} 2137 2138static int generate_gs_copy_shader(struct r600_context *rctx, 2139 struct r600_pipe_shader *gs, 2140 struct pipe_stream_output_info *so) 2141{ 2142 struct r600_shader_ctx ctx = {}; 2143 struct r600_shader *gs_shader = &gs->shader; 2144 struct r600_pipe_shader *cshader; 2145 int ocnt = gs_shader->noutput; 2146 struct r600_bytecode_alu alu; 2147 struct r600_bytecode_vtx vtx; 2148 struct r600_bytecode_output output; 2149 struct r600_bytecode_cf *cf_jump, *cf_pop, 2150 *last_exp_pos = NULL, *last_exp_param = NULL; 2151 int i, j, next_clip_pos = 61, next_param = 0; 2152 int ring; 2153 bool only_ring_0 = true; 2154 cshader = calloc(1, sizeof(struct r600_pipe_shader)); 2155 if (!cshader) 2156 return 0; 2157 2158 memcpy(cshader->shader.output, gs_shader->output, ocnt * 2159 sizeof(struct r600_shader_io)); 2160 2161 cshader->shader.noutput = ocnt; 2162 2163 ctx.shader = &cshader->shader; 2164 ctx.bc = &ctx.shader->bc; 2165 ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX; 2166 2167 r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, 2168 rctx->screen->has_compressed_msaa_texturing); 2169 2170 ctx.bc->isa = rctx->isa; 2171 2172 cf_jump = NULL; 2173 memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes)); 2174 2175 /* R0.x = R0.x & 0x3fffffff */ 2176 memset(&alu, 0, sizeof(alu)); 2177 alu.op = ALU_OP2_AND_INT; 2178 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2179 alu.src[1].value = 0x3fffffff; 2180 alu.dst.write = 1; 2181 r600_bytecode_add_alu(ctx.bc, &alu); 2182 2183 /* R0.y = R0.x >> 30 */ 2184 memset(&alu, 0, sizeof(alu)); 2185 alu.op = ALU_OP2_LSHR_INT; 2186 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2187 alu.src[1].value = 0x1e; 2188 alu.dst.chan = 1; 2189 alu.dst.write = 1; 2190 alu.last = 1; 2191 r600_bytecode_add_alu(ctx.bc, &alu); 2192 2193 /* fetch vertex data from GSVS ring */ 2194 for (i = 0; i < ocnt; ++i) { 2195 struct r600_shader_io *out = &ctx.shader->output[i]; 2196 2197 out->gpr = i + 1; 2198 out->ring_offset = i * 16; 2199 2200 memset(&vtx, 0, sizeof(vtx)); 2201 vtx.op = FETCH_OP_VFETCH; 2202 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 2203 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2204 vtx.mega_fetch_count = 16; 2205 vtx.offset = out->ring_offset; 2206 vtx.dst_gpr = out->gpr; 2207 vtx.src_gpr = 0; 2208 vtx.dst_sel_x = 0; 2209 vtx.dst_sel_y = 1; 2210 vtx.dst_sel_z = 2; 2211 vtx.dst_sel_w = 3; 2212 if (rctx->b.chip_class >= EVERGREEN) { 2213 vtx.use_const_fields = 1; 2214 } else { 2215 vtx.data_format = FMT_32_32_32_32_FLOAT; 2216 } 2217 2218 r600_bytecode_add_vtx(ctx.bc, &vtx); 2219 } 2220 ctx.temp_reg = i + 1; 2221 for (ring = 3; ring >= 0; --ring) { 2222 bool enabled = false; 2223 for (i = 0; i < so->num_outputs; i++) { 2224 if (so->output[i].stream == ring) { 2225 enabled = true; 2226 if (ring > 0) 2227 only_ring_0 = false; 2228 break; 2229 } 2230 } 2231 if (ring != 0 && !enabled) { 2232 cshader->shader.ring_item_sizes[ring] = 0; 2233 continue; 2234 } 2235 2236 if (cf_jump) { 2237 // Patch up jump label 2238 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2239 cf_pop = ctx.bc->cf_last; 2240 2241 cf_jump->cf_addr = cf_pop->id + 2; 2242 cf_jump->pop_count = 1; 2243 cf_pop->cf_addr = cf_pop->id + 2; 2244 cf_pop->pop_count = 1; 2245 } 2246 2247 /* PRED_SETE_INT __, R0.y, ring */ 2248 memset(&alu, 0, sizeof(alu)); 2249 alu.op = ALU_OP2_PRED_SETE_INT; 2250 alu.src[0].chan = 1; 2251 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2252 alu.src[1].value = ring; 2253 alu.execute_mask = 1; 2254 alu.update_pred = 1; 2255 alu.last = 1; 2256 r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); 2257 2258 r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); 2259 cf_jump = ctx.bc->cf_last; 2260 2261 if (enabled) 2262 emit_streamout(&ctx, so, only_ring_0 ? -1 : ring, &cshader->shader.ring_item_sizes[ring]); 2263 cshader->shader.ring_item_sizes[ring] = ocnt * 16; 2264 } 2265 2266 /* bc adds nops - copy it */ 2267 if (ctx.bc->chip_class == R600) { 2268 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2269 alu.op = ALU_OP0_NOP; 2270 alu.last = 1; 2271 r600_bytecode_add_alu(ctx.bc, &alu); 2272 2273 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2274 } 2275 2276 /* export vertex data */ 2277 /* XXX factor out common code with r600_shader_from_tgsi ? */ 2278 for (i = 0; i < ocnt; ++i) { 2279 struct r600_shader_io *out = &ctx.shader->output[i]; 2280 bool instream0 = true; 2281 if (out->name == TGSI_SEMANTIC_CLIPVERTEX) 2282 continue; 2283 2284 for (j = 0; j < so->num_outputs; j++) { 2285 if (so->output[j].register_index == i) { 2286 if (so->output[j].stream == 0) 2287 break; 2288 if (so->output[j].stream > 0) 2289 instream0 = false; 2290 } 2291 } 2292 if (!instream0) 2293 continue; 2294 memset(&output, 0, sizeof(output)); 2295 output.gpr = out->gpr; 2296 output.elem_size = 3; 2297 output.swizzle_x = 0; 2298 output.swizzle_y = 1; 2299 output.swizzle_z = 2; 2300 output.swizzle_w = 3; 2301 output.burst_count = 1; 2302 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2303 output.op = CF_OP_EXPORT; 2304 switch (out->name) { 2305 case TGSI_SEMANTIC_POSITION: 2306 output.array_base = 60; 2307 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2308 break; 2309 2310 case TGSI_SEMANTIC_PSIZE: 2311 output.array_base = 61; 2312 if (next_clip_pos == 61) 2313 next_clip_pos = 62; 2314 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2315 output.swizzle_y = 7; 2316 output.swizzle_z = 7; 2317 output.swizzle_w = 7; 2318 ctx.shader->vs_out_misc_write = 1; 2319 ctx.shader->vs_out_point_size = 1; 2320 break; 2321 case TGSI_SEMANTIC_LAYER: 2322 if (out->spi_sid) { 2323 /* duplicate it as PARAM to pass to the pixel shader */ 2324 output.array_base = next_param++; 2325 r600_bytecode_add_output(ctx.bc, &output); 2326 last_exp_param = ctx.bc->cf_last; 2327 } 2328 output.array_base = 61; 2329 if (next_clip_pos == 61) 2330 next_clip_pos = 62; 2331 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2332 output.swizzle_x = 7; 2333 output.swizzle_y = 7; 2334 output.swizzle_z = 0; 2335 output.swizzle_w = 7; 2336 ctx.shader->vs_out_misc_write = 1; 2337 ctx.shader->vs_out_layer = 1; 2338 break; 2339 case TGSI_SEMANTIC_VIEWPORT_INDEX: 2340 if (out->spi_sid) { 2341 /* duplicate it as PARAM to pass to the pixel shader */ 2342 output.array_base = next_param++; 2343 r600_bytecode_add_output(ctx.bc, &output); 2344 last_exp_param = ctx.bc->cf_last; 2345 } 2346 output.array_base = 61; 2347 if (next_clip_pos == 61) 2348 next_clip_pos = 62; 2349 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2350 ctx.shader->vs_out_misc_write = 1; 2351 ctx.shader->vs_out_viewport = 1; 2352 output.swizzle_x = 7; 2353 output.swizzle_y = 7; 2354 output.swizzle_z = 7; 2355 output.swizzle_w = 0; 2356 break; 2357 case TGSI_SEMANTIC_CLIPDIST: 2358 /* spi_sid is 0 for clipdistance outputs that were generated 2359 * for clipvertex - we don't need to pass them to PS */ 2360 ctx.shader->clip_dist_write = gs->shader.clip_dist_write; 2361 if (out->spi_sid) { 2362 /* duplicate it as PARAM to pass to the pixel shader */ 2363 output.array_base = next_param++; 2364 r600_bytecode_add_output(ctx.bc, &output); 2365 last_exp_param = ctx.bc->cf_last; 2366 } 2367 output.array_base = next_clip_pos++; 2368 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2369 break; 2370 case TGSI_SEMANTIC_FOG: 2371 output.swizzle_y = 4; /* 0 */ 2372 output.swizzle_z = 4; /* 0 */ 2373 output.swizzle_w = 5; /* 1 */ 2374 break; 2375 default: 2376 output.array_base = next_param++; 2377 break; 2378 } 2379 r600_bytecode_add_output(ctx.bc, &output); 2380 if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) 2381 last_exp_param = ctx.bc->cf_last; 2382 else 2383 last_exp_pos = ctx.bc->cf_last; 2384 } 2385 2386 if (!last_exp_pos) { 2387 memset(&output, 0, sizeof(output)); 2388 output.gpr = 0; 2389 output.elem_size = 3; 2390 output.swizzle_x = 7; 2391 output.swizzle_y = 7; 2392 output.swizzle_z = 7; 2393 output.swizzle_w = 7; 2394 output.burst_count = 1; 2395 output.type = 2; 2396 output.op = CF_OP_EXPORT; 2397 output.array_base = 60; 2398 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2399 r600_bytecode_add_output(ctx.bc, &output); 2400 last_exp_pos = ctx.bc->cf_last; 2401 } 2402 2403 if (!last_exp_param) { 2404 memset(&output, 0, sizeof(output)); 2405 output.gpr = 0; 2406 output.elem_size = 3; 2407 output.swizzle_x = 7; 2408 output.swizzle_y = 7; 2409 output.swizzle_z = 7; 2410 output.swizzle_w = 7; 2411 output.burst_count = 1; 2412 output.type = 2; 2413 output.op = CF_OP_EXPORT; 2414 output.array_base = next_param++; 2415 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2416 r600_bytecode_add_output(ctx.bc, &output); 2417 last_exp_param = ctx.bc->cf_last; 2418 } 2419 2420 last_exp_pos->op = CF_OP_EXPORT_DONE; 2421 last_exp_param->op = CF_OP_EXPORT_DONE; 2422 2423 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2424 cf_pop = ctx.bc->cf_last; 2425 2426 cf_jump->cf_addr = cf_pop->id + 2; 2427 cf_jump->pop_count = 1; 2428 cf_pop->cf_addr = cf_pop->id + 2; 2429 cf_pop->pop_count = 1; 2430 2431 if (ctx.bc->chip_class == CAYMAN) 2432 cm_bytecode_add_cf_end(ctx.bc); 2433 else { 2434 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2435 ctx.bc->cf_last->end_of_program = 1; 2436 } 2437 2438 gs->gs_copy_shader = cshader; 2439 cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 2440 2441 ctx.bc->nstack = 1; 2442 2443 return r600_bytecode_build(ctx.bc); 2444} 2445 2446static int emit_inc_ring_offset(struct r600_shader_ctx *ctx, int idx, bool ind) 2447{ 2448 if (ind) { 2449 struct r600_bytecode_alu alu; 2450 int r; 2451 2452 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2453 alu.op = ALU_OP2_ADD_INT; 2454 alu.src[0].sel = ctx->gs_export_gpr_tregs[idx]; 2455 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2456 alu.src[1].value = ctx->gs_out_ring_offset >> 4; 2457 alu.dst.sel = ctx->gs_export_gpr_tregs[idx]; 2458 alu.dst.write = 1; 2459 alu.last = 1; 2460 r = r600_bytecode_add_alu(ctx->bc, &alu); 2461 if (r) 2462 return r; 2463 } 2464 return 0; 2465} 2466 2467static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind) 2468{ 2469 struct r600_bytecode_output output; 2470 int i, k, ring_offset; 2471 int effective_stream = stream == -1 ? 0 : stream; 2472 int idx = 0; 2473 2474 for (i = 0; i < ctx->shader->noutput; i++) { 2475 if (ctx->gs_for_vs) { 2476 /* for ES we need to lookup corresponding ring offset expected by GS 2477 * (map this output to GS input by name and sid) */ 2478 /* FIXME precompute offsets */ 2479 ring_offset = -1; 2480 for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { 2481 struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; 2482 struct r600_shader_io *out = &ctx->shader->output[i]; 2483 if (in->name == out->name && in->sid == out->sid) 2484 ring_offset = in->ring_offset; 2485 } 2486 2487 if (ring_offset == -1) 2488 continue; 2489 } else { 2490 ring_offset = idx * 16; 2491 idx++; 2492 } 2493 2494 if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) 2495 continue; 2496 /* next_ring_offset after parsing input decls contains total size of 2497 * single vertex data, gs_next_vertex - current vertex index */ 2498 if (!ind) 2499 ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex; 2500 2501 memset(&output, 0, sizeof(struct r600_bytecode_output)); 2502 output.gpr = ctx->shader->output[i].gpr; 2503 output.elem_size = 3; 2504 output.comp_mask = 0xF; 2505 output.burst_count = 1; 2506 2507 if (ind) 2508 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 2509 else 2510 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2511 2512 switch (stream) { 2513 default: 2514 case 0: 2515 output.op = CF_OP_MEM_RING; break; 2516 case 1: 2517 output.op = CF_OP_MEM_RING1; break; 2518 case 2: 2519 output.op = CF_OP_MEM_RING2; break; 2520 case 3: 2521 output.op = CF_OP_MEM_RING3; break; 2522 } 2523 2524 if (ind) { 2525 output.array_base = ring_offset >> 2; /* in dwords */ 2526 output.array_size = 0xfff; 2527 output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream]; 2528 } else 2529 output.array_base = ring_offset >> 2; /* in dwords */ 2530 r600_bytecode_add_output(ctx->bc, &output); 2531 } 2532 2533 ++ctx->gs_next_vertex; 2534 return 0; 2535} 2536 2537 2538static int r600_fetch_tess_io_info(struct r600_shader_ctx *ctx) 2539{ 2540 int r; 2541 struct r600_bytecode_vtx vtx; 2542 int temp_val = ctx->temp_reg; 2543 /* need to store the TCS output somewhere */ 2544 r = single_alu_op2(ctx, ALU_OP1_MOV, 2545 temp_val, 0, 2546 V_SQ_ALU_SRC_LITERAL, 0, 2547 0, 0); 2548 if (r) 2549 return r; 2550 2551 /* used by VS/TCS */ 2552 if (ctx->tess_input_info) { 2553 /* fetch tcs input values into resv space */ 2554 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2555 vtx.op = FETCH_OP_VFETCH; 2556 vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2557 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2558 vtx.mega_fetch_count = 16; 2559 vtx.data_format = FMT_32_32_32_32; 2560 vtx.num_format_all = 2; 2561 vtx.format_comp_all = 1; 2562 vtx.use_const_fields = 0; 2563 vtx.endian = r600_endian_swap(32); 2564 vtx.srf_mode_all = 1; 2565 vtx.offset = 0; 2566 vtx.dst_gpr = ctx->tess_input_info; 2567 vtx.dst_sel_x = 0; 2568 vtx.dst_sel_y = 1; 2569 vtx.dst_sel_z = 2; 2570 vtx.dst_sel_w = 3; 2571 vtx.src_gpr = temp_val; 2572 vtx.src_sel_x = 0; 2573 2574 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2575 if (r) 2576 return r; 2577 } 2578 2579 /* used by TCS/TES */ 2580 if (ctx->tess_output_info) { 2581 /* fetch tcs output values into resv space */ 2582 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2583 vtx.op = FETCH_OP_VFETCH; 2584 vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2585 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2586 vtx.mega_fetch_count = 16; 2587 vtx.data_format = FMT_32_32_32_32; 2588 vtx.num_format_all = 2; 2589 vtx.format_comp_all = 1; 2590 vtx.use_const_fields = 0; 2591 vtx.endian = r600_endian_swap(32); 2592 vtx.srf_mode_all = 1; 2593 vtx.offset = 16; 2594 vtx.dst_gpr = ctx->tess_output_info; 2595 vtx.dst_sel_x = 0; 2596 vtx.dst_sel_y = 1; 2597 vtx.dst_sel_z = 2; 2598 vtx.dst_sel_w = 3; 2599 vtx.src_gpr = temp_val; 2600 vtx.src_sel_x = 0; 2601 2602 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2603 if (r) 2604 return r; 2605 } 2606 return 0; 2607} 2608 2609static int emit_lds_vs_writes(struct r600_shader_ctx *ctx) 2610{ 2611 int i, j, r; 2612 int temp_reg; 2613 2614 /* fetch tcs input values into input_vals */ 2615 ctx->tess_input_info = r600_get_temp(ctx); 2616 ctx->tess_output_info = 0; 2617 r = r600_fetch_tess_io_info(ctx); 2618 if (r) 2619 return r; 2620 2621 temp_reg = r600_get_temp(ctx); 2622 /* dst reg contains LDS address stride * idx */ 2623 /* MUL vertexID, vertex_dw_stride */ 2624 r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 2625 temp_reg, 0, 2626 ctx->tess_input_info, 1, 2627 0, 1); /* rel id in r0.y? */ 2628 if (r) 2629 return r; 2630 2631 for (i = 0; i < ctx->shader->noutput; i++) { 2632 struct r600_bytecode_alu alu; 2633 int param = r600_get_lds_unique_index(ctx->shader->output[i].name, ctx->shader->output[i].sid); 2634 2635 if (param) { 2636 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2637 temp_reg, 1, 2638 temp_reg, 0, 2639 V_SQ_ALU_SRC_LITERAL, param * 16); 2640 if (r) 2641 return r; 2642 } 2643 2644 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2645 temp_reg, 2, 2646 temp_reg, param ? 1 : 0, 2647 V_SQ_ALU_SRC_LITERAL, 8); 2648 if (r) 2649 return r; 2650 2651 2652 for (j = 0; j < 2; j++) { 2653 int chan = (j == 1) ? 2 : (param ? 1 : 0); 2654 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2655 alu.op = LDS_OP3_LDS_WRITE_REL; 2656 alu.src[0].sel = temp_reg; 2657 alu.src[0].chan = chan; 2658 alu.src[1].sel = ctx->shader->output[i].gpr; 2659 alu.src[1].chan = j * 2; 2660 alu.src[2].sel = ctx->shader->output[i].gpr; 2661 alu.src[2].chan = (j * 2) + 1; 2662 alu.last = 1; 2663 alu.dst.chan = 0; 2664 alu.lds_idx = 1; 2665 alu.is_lds_idx_op = true; 2666 r = r600_bytecode_add_alu(ctx->bc, &alu); 2667 if (r) 2668 return r; 2669 } 2670 } 2671 return 0; 2672} 2673 2674static int r600_store_tcs_output(struct r600_shader_ctx *ctx) 2675{ 2676 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2677 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 2678 int i, r, lasti; 2679 int temp_reg = r600_get_temp(ctx); 2680 struct r600_bytecode_alu alu; 2681 unsigned write_mask = dst->Register.WriteMask; 2682 2683 if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT) 2684 return 0; 2685 2686 r = get_lds_offset0(ctx, 1, temp_reg, dst->Register.Dimension ? false : true); 2687 if (r) 2688 return r; 2689 2690 /* the base address is now in temp.x */ 2691 r = r600_get_byte_address(ctx, temp_reg, 2692 &inst->Dst[0], NULL, ctx->tess_output_info, 1); 2693 if (r) 2694 return r; 2695 2696 /* LDS write */ 2697 lasti = tgsi_last_instruction(write_mask); 2698 for (i = 1; i <= lasti; i++) { 2699 2700 if (!(write_mask & (1 << i))) 2701 continue; 2702 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2703 temp_reg, i, 2704 temp_reg, 0, 2705 V_SQ_ALU_SRC_LITERAL, 4 * i); 2706 if (r) 2707 return r; 2708 } 2709 2710 for (i = 0; i <= lasti; i++) { 2711 if (!(write_mask & (1 << i))) 2712 continue; 2713 2714 if ((i == 0 && ((write_mask & 3) == 3)) || 2715 (i == 2 && ((write_mask & 0xc) == 0xc))) { 2716 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2717 alu.op = LDS_OP3_LDS_WRITE_REL; 2718 alu.src[0].sel = temp_reg; 2719 alu.src[0].chan = i; 2720 2721 alu.src[1].sel = dst->Register.Index; 2722 alu.src[1].sel += ctx->file_offset[dst->Register.File]; 2723 alu.src[1].chan = i; 2724 2725 alu.src[2].sel = dst->Register.Index; 2726 alu.src[2].sel += ctx->file_offset[dst->Register.File]; 2727 alu.src[2].chan = i + 1; 2728 alu.lds_idx = 1; 2729 alu.dst.chan = 0; 2730 alu.last = 1; 2731 alu.is_lds_idx_op = true; 2732 r = r600_bytecode_add_alu(ctx->bc, &alu); 2733 if (r) 2734 return r; 2735 i += 1; 2736 continue; 2737 } 2738 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2739 alu.op = LDS_OP2_LDS_WRITE; 2740 alu.src[0].sel = temp_reg; 2741 alu.src[0].chan = i; 2742 2743 alu.src[1].sel = dst->Register.Index; 2744 alu.src[1].sel += ctx->file_offset[dst->Register.File]; 2745 alu.src[1].chan = i; 2746 2747 alu.src[2].sel = V_SQ_ALU_SRC_0; 2748 alu.dst.chan = 0; 2749 alu.last = 1; 2750 alu.is_lds_idx_op = true; 2751 r = r600_bytecode_add_alu(ctx->bc, &alu); 2752 if (r) 2753 return r; 2754 } 2755 return 0; 2756} 2757 2758static int r600_tess_factor_read(struct r600_shader_ctx *ctx, 2759 int output_idx) 2760{ 2761 int param; 2762 unsigned temp_reg = r600_get_temp(ctx); 2763 unsigned name = ctx->shader->output[output_idx].name; 2764 int dreg = ctx->shader->output[output_idx].gpr; 2765 int r; 2766 2767 param = r600_get_lds_unique_index(name, 0); 2768 r = get_lds_offset0(ctx, 1, temp_reg, true); 2769 if (r) 2770 return r; 2771 2772 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2773 temp_reg, 0, 2774 temp_reg, 0, 2775 V_SQ_ALU_SRC_LITERAL, param * 16); 2776 if (r) 2777 return r; 2778 2779 do_lds_fetch_values(ctx, temp_reg, dreg); 2780 return 0; 2781} 2782 2783static int r600_emit_tess_factor(struct r600_shader_ctx *ctx) 2784{ 2785 int i; 2786 int stride, outer_comps, inner_comps; 2787 int tessinner_idx = -1, tessouter_idx = -1; 2788 int r; 2789 int temp_reg = r600_get_temp(ctx); 2790 int treg[3] = {-1, -1, -1}; 2791 struct r600_bytecode_alu alu; 2792 struct r600_bytecode_cf *cf_jump, *cf_pop; 2793 2794 /* only execute factor emission for invocation 0 */ 2795 /* PRED_SETE_INT __, R0.x, 0 */ 2796 memset(&alu, 0, sizeof(alu)); 2797 alu.op = ALU_OP2_PRED_SETE_INT; 2798 alu.src[0].chan = 2; 2799 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2800 alu.execute_mask = 1; 2801 alu.update_pred = 1; 2802 alu.last = 1; 2803 r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE); 2804 2805 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 2806 cf_jump = ctx->bc->cf_last; 2807 2808 treg[0] = r600_get_temp(ctx); 2809 switch (ctx->shader->tcs_prim_mode) { 2810 case PIPE_PRIM_LINES: 2811 stride = 8; /* 2 dwords, 1 vec2 store */ 2812 outer_comps = 2; 2813 inner_comps = 0; 2814 break; 2815 case PIPE_PRIM_TRIANGLES: 2816 stride = 16; /* 4 dwords, 1 vec4 store */ 2817 outer_comps = 3; 2818 inner_comps = 1; 2819 treg[1] = r600_get_temp(ctx); 2820 break; 2821 case PIPE_PRIM_QUADS: 2822 stride = 24; /* 6 dwords, 2 stores (vec4 + vec2) */ 2823 outer_comps = 4; 2824 inner_comps = 2; 2825 treg[1] = r600_get_temp(ctx); 2826 treg[2] = r600_get_temp(ctx); 2827 break; 2828 default: 2829 assert(0); 2830 return -1; 2831 } 2832 2833 /* R0 is InvocationID, RelPatchID, PatchID, tf_base */ 2834 /* TF_WRITE takes index in R.x, value in R.y */ 2835 for (i = 0; i < ctx->shader->noutput; i++) { 2836 if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSINNER) 2837 tessinner_idx = i; 2838 if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSOUTER) 2839 tessouter_idx = i; 2840 } 2841 2842 if (tessouter_idx == -1) 2843 return -1; 2844 2845 if (tessinner_idx == -1 && inner_comps) 2846 return -1; 2847 2848 if (tessouter_idx != -1) { 2849 r = r600_tess_factor_read(ctx, tessouter_idx); 2850 if (r) 2851 return r; 2852 } 2853 2854 if (tessinner_idx != -1) { 2855 r = r600_tess_factor_read(ctx, tessinner_idx); 2856 if (r) 2857 return r; 2858 } 2859 2860 /* r.x = tf_base(r0.w) + relpatchid(r0.y) * tf_stride */ 2861 /* r.x = relpatchid(r0.y) * tf_stride */ 2862 2863 /* multiply incoming r0.y * stride - t.x = r0.y * stride */ 2864 /* add incoming r0.w to it: t.x = t.x + r0.w */ 2865 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 2866 temp_reg, 0, 2867 0, 1, 2868 V_SQ_ALU_SRC_LITERAL, stride, 2869 0, 3); 2870 if (r) 2871 return r; 2872 2873 for (i = 0; i < outer_comps + inner_comps; i++) { 2874 int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx; 2875 int out_comp = i >= outer_comps ? i - outer_comps : i; 2876 2877 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2878 treg[i / 2], (2 * (i % 2)), 2879 temp_reg, 0, 2880 V_SQ_ALU_SRC_LITERAL, 4 * i); 2881 if (r) 2882 return r; 2883 r = single_alu_op2(ctx, ALU_OP1_MOV, 2884 treg[i / 2], 1 + (2 * (i%2)), 2885 ctx->shader->output[out_idx].gpr, out_comp, 2886 0, 0); 2887 if (r) 2888 return r; 2889 } 2890 for (i = 0; i < outer_comps + inner_comps; i++) { 2891 struct r600_bytecode_gds gds; 2892 2893 memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 2894 gds.src_gpr = treg[i / 2]; 2895 gds.src_sel_x = 2 * (i % 2); 2896 gds.src_sel_y = 1 + (2 * (i % 2)); 2897 gds.src_sel_z = 4; 2898 gds.dst_sel_x = 7; 2899 gds.dst_sel_y = 7; 2900 gds.dst_sel_z = 7; 2901 gds.dst_sel_w = 7; 2902 gds.op = FETCH_OP_TF_WRITE; 2903 r = r600_bytecode_add_gds(ctx->bc, &gds); 2904 if (r) 2905 return r; 2906 } 2907 2908 // Patch up jump label 2909 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 2910 cf_pop = ctx->bc->cf_last; 2911 2912 cf_jump->cf_addr = cf_pop->id + 2; 2913 cf_jump->pop_count = 1; 2914 cf_pop->cf_addr = cf_pop->id + 2; 2915 cf_pop->pop_count = 1; 2916 2917 return 0; 2918} 2919 2920static int r600_shader_from_tgsi(struct r600_context *rctx, 2921 struct r600_pipe_shader *pipeshader, 2922 union r600_shader_key key) 2923{ 2924 struct r600_screen *rscreen = rctx->screen; 2925 struct r600_shader *shader = &pipeshader->shader; 2926 struct tgsi_token *tokens = pipeshader->selector->tokens; 2927 struct pipe_stream_output_info so = pipeshader->selector->so; 2928 struct tgsi_full_immediate *immediate; 2929 struct r600_shader_ctx ctx; 2930 struct r600_bytecode_output output[32]; 2931 unsigned output_done, noutput; 2932 unsigned opcode; 2933 int i, j, k, r = 0; 2934 int next_param_base = 0, next_clip_base; 2935 int max_color_exports = MAX2(key.ps.nr_cbufs, 1); 2936 /* Declarations used by llvm code */ 2937 bool use_llvm = false; 2938 bool indirect_gprs; 2939 bool ring_outputs = false; 2940 bool lds_outputs = false; 2941 bool lds_inputs = false; 2942 bool pos_emitted = false; 2943 2944#ifdef R600_USE_LLVM 2945 use_llvm = rscreen->b.debug_flags & DBG_LLVM; 2946#endif 2947 ctx.bc = &shader->bc; 2948 ctx.shader = shader; 2949 ctx.native_integers = true; 2950 2951 2952 r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, 2953 rscreen->has_compressed_msaa_texturing); 2954 ctx.tokens = tokens; 2955 tgsi_scan_shader(tokens, &ctx.info); 2956 shader->indirect_files = ctx.info.indirect_files; 2957 2958 shader->uses_doubles = ctx.info.uses_doubles; 2959 2960 indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER)); 2961 tgsi_parse_init(&ctx.parse, tokens); 2962 ctx.type = ctx.info.processor; 2963 shader->processor_type = ctx.type; 2964 ctx.bc->type = shader->processor_type; 2965 2966 switch (ctx.type) { 2967 case TGSI_PROCESSOR_VERTEX: 2968 shader->vs_as_gs_a = key.vs.as_gs_a; 2969 shader->vs_as_es = key.vs.as_es; 2970 shader->vs_as_ls = key.vs.as_ls; 2971 if (shader->vs_as_es) 2972 ring_outputs = true; 2973 if (shader->vs_as_ls) 2974 lds_outputs = true; 2975 break; 2976 case TGSI_PROCESSOR_GEOMETRY: 2977 ring_outputs = true; 2978 break; 2979 case TGSI_PROCESSOR_TESS_CTRL: 2980 shader->tcs_prim_mode = key.tcs.prim_mode; 2981 lds_outputs = true; 2982 lds_inputs = true; 2983 break; 2984 case TGSI_PROCESSOR_TESS_EVAL: 2985 shader->tes_as_es = key.tes.as_es; 2986 lds_inputs = true; 2987 if (shader->tes_as_es) 2988 ring_outputs = true; 2989 break; 2990 case TGSI_PROCESSOR_FRAGMENT: 2991 shader->two_side = key.ps.color_two_side; 2992 break; 2993 default: 2994 break; 2995 } 2996 2997 if (shader->vs_as_es || shader->tes_as_es) { 2998 ctx.gs_for_vs = &rctx->gs_shader->current->shader; 2999 } else { 3000 ctx.gs_for_vs = NULL; 3001 } 3002 3003 ctx.next_ring_offset = 0; 3004 ctx.gs_out_ring_offset = 0; 3005 ctx.gs_next_vertex = 0; 3006 ctx.gs_stream_output_info = &so; 3007 3008 ctx.face_gpr = -1; 3009 ctx.fixed_pt_position_gpr = -1; 3010 ctx.fragcoord_input = -1; 3011 ctx.colors_used = 0; 3012 ctx.clip_vertex_write = 0; 3013 3014 shader->nr_ps_color_exports = 0; 3015 shader->nr_ps_max_color_exports = 0; 3016 3017 3018 /* register allocations */ 3019 /* Values [0,127] correspond to GPR[0..127]. 3020 * Values [128,159] correspond to constant buffer bank 0 3021 * Values [160,191] correspond to constant buffer bank 1 3022 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 3023 * Values [256,287] correspond to constant buffer bank 2 (EG) 3024 * Values [288,319] correspond to constant buffer bank 3 (EG) 3025 * Other special values are shown in the list below. 3026 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 3027 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 3028 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 3029 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 3030 * 248 SQ_ALU_SRC_0: special constant 0.0. 3031 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 3032 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 3033 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 3034 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 3035 * 253 SQ_ALU_SRC_LITERAL: literal constant. 3036 * 254 SQ_ALU_SRC_PV: previous vector result. 3037 * 255 SQ_ALU_SRC_PS: previous scalar result. 3038 */ 3039 for (i = 0; i < TGSI_FILE_COUNT; i++) { 3040 ctx.file_offset[i] = 0; 3041 } 3042 3043#ifdef R600_USE_LLVM 3044 if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) { 3045 fprintf(stderr, "Warning: R600 LLVM backend does not support " 3046 "indirect adressing. Falling back to TGSI " 3047 "backend.\n"); 3048 use_llvm = 0; 3049 } 3050#endif 3051 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 3052 ctx.file_offset[TGSI_FILE_INPUT] = 1; 3053 if (!use_llvm) { 3054 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); 3055 } 3056 } 3057 if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { 3058 if (ctx.bc->chip_class >= EVERGREEN) 3059 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 3060 else 3061 ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); 3062 } 3063 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3064 /* FIXME 1 would be enough in some cases (3 or less input vertices) */ 3065 ctx.file_offset[TGSI_FILE_INPUT] = 2; 3066 } 3067 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) 3068 ctx.file_offset[TGSI_FILE_INPUT] = 1; 3069 if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) { 3070 bool add_tesscoord = false, add_tess_inout = false; 3071 ctx.file_offset[TGSI_FILE_INPUT] = 1; 3072 for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) { 3073 /* if we have tesscoord save one reg */ 3074 if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSCOORD) 3075 add_tesscoord = true; 3076 if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSINNER || 3077 ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSOUTER) 3078 add_tess_inout = true; 3079 } 3080 if (add_tesscoord || add_tess_inout) 3081 ctx.file_offset[TGSI_FILE_INPUT]++; 3082 if (add_tess_inout) 3083 ctx.file_offset[TGSI_FILE_INPUT]+=2; 3084 } 3085 ctx.use_llvm = use_llvm; 3086 3087 if (use_llvm) { 3088 ctx.file_offset[TGSI_FILE_OUTPUT] = 3089 ctx.file_offset[TGSI_FILE_INPUT]; 3090 } else { 3091 ctx.file_offset[TGSI_FILE_OUTPUT] = 3092 ctx.file_offset[TGSI_FILE_INPUT] + 3093 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 3094 } 3095 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 3096 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 3097 3098 /* Outside the GPR range. This will be translated to one of the 3099 * kcache banks later. */ 3100 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 3101 3102 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 3103 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 3104 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 3105 ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1; 3106 ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2; 3107 3108 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) { 3109 ctx.tess_input_info = ctx.bc->ar_reg + 3; 3110 ctx.tess_output_info = ctx.bc->ar_reg + 4; 3111 ctx.temp_reg = ctx.bc->ar_reg + 5; 3112 } else if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) { 3113 ctx.tess_input_info = 0; 3114 ctx.tess_output_info = ctx.bc->ar_reg + 3; 3115 ctx.temp_reg = ctx.bc->ar_reg + 4; 3116 } else if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3117 ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3; 3118 ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4; 3119 ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5; 3120 ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6; 3121 ctx.temp_reg = ctx.bc->ar_reg + 7; 3122 } else { 3123 ctx.temp_reg = ctx.bc->ar_reg + 3; 3124 } 3125 3126 shader->max_arrays = 0; 3127 shader->num_arrays = 0; 3128 if (indirect_gprs) { 3129 3130 if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { 3131 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], 3132 ctx.file_offset[TGSI_FILE_OUTPUT] - 3133 ctx.file_offset[TGSI_FILE_INPUT], 3134 0x0F); 3135 } 3136 if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 3137 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT], 3138 ctx.file_offset[TGSI_FILE_TEMPORARY] - 3139 ctx.file_offset[TGSI_FILE_OUTPUT], 3140 0x0F); 3141 } 3142 } 3143 3144 ctx.nliterals = 0; 3145 ctx.literals = NULL; 3146 3147 shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]; 3148 shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 3149 shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]; 3150 3151 if (shader->vs_as_gs_a) 3152 vs_add_primid_output(&ctx, key.vs.prim_id_out); 3153 3154 if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) 3155 r600_fetch_tess_io_info(&ctx); 3156 3157 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 3158 tgsi_parse_token(&ctx.parse); 3159 switch (ctx.parse.FullToken.Token.Type) { 3160 case TGSI_TOKEN_TYPE_IMMEDIATE: 3161 immediate = &ctx.parse.FullToken.FullImmediate; 3162 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 3163 if(ctx.literals == NULL) { 3164 r = -ENOMEM; 3165 goto out_err; 3166 } 3167 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 3168 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 3169 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 3170 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 3171 ctx.nliterals++; 3172 break; 3173 case TGSI_TOKEN_TYPE_DECLARATION: 3174 r = tgsi_declaration(&ctx); 3175 if (r) 3176 goto out_err; 3177 break; 3178 case TGSI_TOKEN_TYPE_INSTRUCTION: 3179 case TGSI_TOKEN_TYPE_PROPERTY: 3180 break; 3181 default: 3182 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 3183 r = -EINVAL; 3184 goto out_err; 3185 } 3186 } 3187 3188 shader->ring_item_sizes[0] = ctx.next_ring_offset; 3189 shader->ring_item_sizes[1] = 0; 3190 shader->ring_item_sizes[2] = 0; 3191 shader->ring_item_sizes[3] = 0; 3192 3193 /* Process two side if needed */ 3194 if (shader->two_side && ctx.colors_used) { 3195 int i, count = ctx.shader->ninput; 3196 unsigned next_lds_loc = ctx.shader->nlds; 3197 3198 /* additional inputs will be allocated right after the existing inputs, 3199 * we won't need them after the color selection, so we don't need to 3200 * reserve these gprs for the rest of the shader code and to adjust 3201 * output offsets etc. */ 3202 int gpr = ctx.file_offset[TGSI_FILE_INPUT] + 3203 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 3204 3205 /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */ 3206 if (ctx.face_gpr == -1) { 3207 i = ctx.shader->ninput++; 3208 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; 3209 ctx.shader->input[i].spi_sid = 0; 3210 ctx.shader->input[i].gpr = gpr++; 3211 ctx.face_gpr = ctx.shader->input[i].gpr; 3212 } 3213 3214 for (i = 0; i < count; i++) { 3215 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { 3216 int ni = ctx.shader->ninput++; 3217 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); 3218 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 3219 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); 3220 ctx.shader->input[ni].gpr = gpr++; 3221 // TGSI to LLVM needs to know the lds position of inputs. 3222 // Non LLVM path computes it later (in process_twoside_color) 3223 ctx.shader->input[ni].lds_pos = next_lds_loc++; 3224 ctx.shader->input[i].back_color_input = ni; 3225 if (ctx.bc->chip_class >= EVERGREEN) { 3226 if ((r = evergreen_interp_input(&ctx, ni))) 3227 return r; 3228 } 3229 } 3230 } 3231 } 3232 3233/* LLVM backend setup */ 3234#ifdef R600_USE_LLVM 3235 if (use_llvm) { 3236 struct radeon_llvm_context radeon_llvm_ctx; 3237 LLVMModuleRef mod; 3238 bool dump = r600_can_dump_shader(&rscreen->b, tokens); 3239 boolean use_kill = false; 3240 3241 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); 3242 radeon_llvm_ctx.type = ctx.type; 3243 radeon_llvm_ctx.two_side = shader->two_side; 3244 radeon_llvm_ctx.face_gpr = ctx.face_gpr; 3245 radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1; 3246 radeon_llvm_ctx.r600_inputs = ctx.shader->input; 3247 radeon_llvm_ctx.r600_outputs = ctx.shader->output; 3248 radeon_llvm_ctx.color_buffer_count = max_color_exports; 3249 radeon_llvm_ctx.chip_class = ctx.bc->chip_class; 3250 radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN); 3251 radeon_llvm_ctx.stream_outputs = &so; 3252 radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one; 3253 radeon_llvm_ctx.has_compressed_msaa_texturing = 3254 ctx.bc->has_compressed_msaa_texturing; 3255 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); 3256 ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp; 3257 ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers; 3258 3259 if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, dump)) { 3260 radeon_llvm_dispose(&radeon_llvm_ctx); 3261 use_llvm = 0; 3262 fprintf(stderr, "R600 LLVM backend failed to compile " 3263 "shader. Falling back to TGSI\n"); 3264 } else { 3265 ctx.file_offset[TGSI_FILE_OUTPUT] = 3266 ctx.file_offset[TGSI_FILE_INPUT]; 3267 } 3268 if (use_kill) 3269 ctx.shader->uses_kill = use_kill; 3270 radeon_llvm_dispose(&radeon_llvm_ctx); 3271 } 3272#endif 3273/* End of LLVM backend setup */ 3274 3275 if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) 3276 shader->nr_ps_max_color_exports = 8; 3277 3278 if (!use_llvm) { 3279 if (ctx.fragcoord_input >= 0) { 3280 if (ctx.bc->chip_class == CAYMAN) { 3281 for (j = 0 ; j < 4; j++) { 3282 struct r600_bytecode_alu alu; 3283 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3284 alu.op = ALU_OP1_RECIP_IEEE; 3285 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3286 alu.src[0].chan = 3; 3287 3288 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 3289 alu.dst.chan = j; 3290 alu.dst.write = (j == 3); 3291 alu.last = 1; 3292 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3293 return r; 3294 } 3295 } else { 3296 struct r600_bytecode_alu alu; 3297 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3298 alu.op = ALU_OP1_RECIP_IEEE; 3299 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3300 alu.src[0].chan = 3; 3301 3302 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 3303 alu.dst.chan = 3; 3304 alu.dst.write = 1; 3305 alu.last = 1; 3306 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3307 return r; 3308 } 3309 } 3310 3311 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3312 struct r600_bytecode_alu alu; 3313 int r; 3314 3315 /* GS thread with no output workaround - emit a cut at start of GS */ 3316 if (ctx.bc->chip_class == R600) 3317 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); 3318 3319 for (j = 0; j < 4; j++) { 3320 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3321 alu.op = ALU_OP1_MOV; 3322 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 3323 alu.src[0].value = 0; 3324 alu.dst.sel = ctx.gs_export_gpr_tregs[j]; 3325 alu.dst.write = 1; 3326 alu.last = 1; 3327 r = r600_bytecode_add_alu(ctx.bc, &alu); 3328 if (r) 3329 return r; 3330 } 3331 } 3332 3333 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) 3334 r600_fetch_tess_io_info(&ctx); 3335 3336 if (shader->two_side && ctx.colors_used) { 3337 if ((r = process_twoside_color_inputs(&ctx))) 3338 return r; 3339 } 3340 3341 tgsi_parse_init(&ctx.parse, tokens); 3342 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 3343 tgsi_parse_token(&ctx.parse); 3344 switch (ctx.parse.FullToken.Token.Type) { 3345 case TGSI_TOKEN_TYPE_INSTRUCTION: 3346 r = tgsi_is_supported(&ctx); 3347 if (r) 3348 goto out_err; 3349 ctx.max_driver_temp_used = 0; 3350 /* reserve first tmp for everyone */ 3351 r600_get_temp(&ctx); 3352 3353 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 3354 if ((r = tgsi_split_constant(&ctx))) 3355 goto out_err; 3356 if ((r = tgsi_split_literal_constant(&ctx))) 3357 goto out_err; 3358 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3359 if ((r = tgsi_split_gs_inputs(&ctx))) 3360 goto out_err; 3361 } else if (lds_inputs) { 3362 if ((r = tgsi_split_lds_inputs(&ctx))) 3363 goto out_err; 3364 } 3365 if (ctx.bc->chip_class == CAYMAN) 3366 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 3367 else if (ctx.bc->chip_class >= EVERGREEN) 3368 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 3369 else 3370 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 3371 r = ctx.inst_info->process(&ctx); 3372 if (r) 3373 goto out_err; 3374 3375 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) { 3376 r = r600_store_tcs_output(&ctx); 3377 if (r) 3378 goto out_err; 3379 } 3380 break; 3381 default: 3382 break; 3383 } 3384 } 3385 } 3386 3387 /* Reset the temporary register counter. */ 3388 ctx.max_driver_temp_used = 0; 3389 3390 noutput = shader->noutput; 3391 3392 if (!ring_outputs && ctx.clip_vertex_write) { 3393 unsigned clipdist_temp[2]; 3394 3395 clipdist_temp[0] = r600_get_temp(&ctx); 3396 clipdist_temp[1] = r600_get_temp(&ctx); 3397 3398 /* need to convert a clipvertex write into clipdistance writes and not export 3399 the clip vertex anymore */ 3400 3401 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 3402 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 3403 shader->output[noutput].gpr = clipdist_temp[0]; 3404 noutput++; 3405 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 3406 shader->output[noutput].gpr = clipdist_temp[1]; 3407 noutput++; 3408 3409 /* reset spi_sid for clipvertex output to avoid confusing spi */ 3410 shader->output[ctx.cv_output].spi_sid = 0; 3411 3412 shader->clip_dist_write = 0xFF; 3413 3414 for (i = 0; i < 8; i++) { 3415 int oreg = i >> 2; 3416 int ochan = i & 3; 3417 3418 for (j = 0; j < 4; j++) { 3419 struct r600_bytecode_alu alu; 3420 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3421 alu.op = ALU_OP2_DOT4; 3422 alu.src[0].sel = shader->output[ctx.cv_output].gpr; 3423 alu.src[0].chan = j; 3424 3425 alu.src[1].sel = 512 + i; 3426 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 3427 alu.src[1].chan = j; 3428 3429 alu.dst.sel = clipdist_temp[oreg]; 3430 alu.dst.chan = j; 3431 alu.dst.write = (j == ochan); 3432 if (j == 3) 3433 alu.last = 1; 3434 if (!use_llvm) 3435 r = r600_bytecode_add_alu(ctx.bc, &alu); 3436 if (r) 3437 return r; 3438 } 3439 } 3440 } 3441 3442 /* Add stream outputs. */ 3443 if (!use_llvm && so.num_outputs) { 3444 bool emit = false; 3445 if (!lds_outputs && !ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX) 3446 emit = true; 3447 if (!ring_outputs && ctx.type == TGSI_PROCESSOR_TESS_EVAL) 3448 emit = true; 3449 if (emit) 3450 emit_streamout(&ctx, &so, -1, NULL); 3451 } 3452 pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 3453 convert_edgeflag_to_int(&ctx); 3454 3455 if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) 3456 r600_emit_tess_factor(&ctx); 3457 3458 if (lds_outputs) { 3459 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 3460 if (ctx.shader->noutput) 3461 emit_lds_vs_writes(&ctx); 3462 } 3463 } else if (ring_outputs) { 3464 if (shader->vs_as_es || shader->tes_as_es) { 3465 ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx); 3466 ctx.gs_export_gpr_tregs[1] = -1; 3467 ctx.gs_export_gpr_tregs[2] = -1; 3468 ctx.gs_export_gpr_tregs[3] = -1; 3469 3470 emit_gs_ring_writes(&ctx, &so, -1, FALSE); 3471 } 3472 } else { 3473 /* Export output */ 3474 next_clip_base = shader->vs_out_misc_write ? 62 : 61; 3475 3476 for (i = 0, j = 0; i < noutput; i++, j++) { 3477 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3478 output[j].gpr = shader->output[i].gpr; 3479 output[j].elem_size = 3; 3480 output[j].swizzle_x = 0; 3481 output[j].swizzle_y = 1; 3482 output[j].swizzle_z = 2; 3483 output[j].swizzle_w = 3; 3484 output[j].burst_count = 1; 3485 output[j].type = -1; 3486 output[j].op = CF_OP_EXPORT; 3487 switch (ctx.type) { 3488 case TGSI_PROCESSOR_VERTEX: 3489 case TGSI_PROCESSOR_TESS_EVAL: 3490 switch (shader->output[i].name) { 3491 case TGSI_SEMANTIC_POSITION: 3492 output[j].array_base = 60; 3493 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3494 pos_emitted = true; 3495 break; 3496 3497 case TGSI_SEMANTIC_PSIZE: 3498 output[j].array_base = 61; 3499 output[j].swizzle_y = 7; 3500 output[j].swizzle_z = 7; 3501 output[j].swizzle_w = 7; 3502 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3503 pos_emitted = true; 3504 break; 3505 case TGSI_SEMANTIC_EDGEFLAG: 3506 output[j].array_base = 61; 3507 output[j].swizzle_x = 7; 3508 output[j].swizzle_y = 0; 3509 output[j].swizzle_z = 7; 3510 output[j].swizzle_w = 7; 3511 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3512 pos_emitted = true; 3513 break; 3514 case TGSI_SEMANTIC_LAYER: 3515 /* spi_sid is 0 for outputs that are 3516 * not consumed by PS */ 3517 if (shader->output[i].spi_sid) { 3518 output[j].array_base = next_param_base++; 3519 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3520 j++; 3521 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3522 } 3523 output[j].array_base = 61; 3524 output[j].swizzle_x = 7; 3525 output[j].swizzle_y = 7; 3526 output[j].swizzle_z = 0; 3527 output[j].swizzle_w = 7; 3528 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3529 pos_emitted = true; 3530 break; 3531 case TGSI_SEMANTIC_VIEWPORT_INDEX: 3532 /* spi_sid is 0 for outputs that are 3533 * not consumed by PS */ 3534 if (shader->output[i].spi_sid) { 3535 output[j].array_base = next_param_base++; 3536 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3537 j++; 3538 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3539 } 3540 output[j].array_base = 61; 3541 output[j].swizzle_x = 7; 3542 output[j].swizzle_y = 7; 3543 output[j].swizzle_z = 7; 3544 output[j].swizzle_w = 0; 3545 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3546 pos_emitted = true; 3547 break; 3548 case TGSI_SEMANTIC_CLIPVERTEX: 3549 j--; 3550 break; 3551 case TGSI_SEMANTIC_CLIPDIST: 3552 output[j].array_base = next_clip_base++; 3553 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3554 pos_emitted = true; 3555 /* spi_sid is 0 for clipdistance outputs that were generated 3556 * for clipvertex - we don't need to pass them to PS */ 3557 if (shader->output[i].spi_sid) { 3558 j++; 3559 /* duplicate it as PARAM to pass to the pixel shader */ 3560 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3561 output[j].array_base = next_param_base++; 3562 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3563 } 3564 break; 3565 case TGSI_SEMANTIC_FOG: 3566 output[j].swizzle_y = 4; /* 0 */ 3567 output[j].swizzle_z = 4; /* 0 */ 3568 output[j].swizzle_w = 5; /* 1 */ 3569 break; 3570 case TGSI_SEMANTIC_PRIMID: 3571 output[j].swizzle_x = 2; 3572 output[j].swizzle_y = 4; /* 0 */ 3573 output[j].swizzle_z = 4; /* 0 */ 3574 output[j].swizzle_w = 4; /* 0 */ 3575 break; 3576 } 3577 3578 break; 3579 case TGSI_PROCESSOR_FRAGMENT: 3580 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 3581 /* never export more colors than the number of CBs */ 3582 if (shader->output[i].sid >= max_color_exports) { 3583 /* skip export */ 3584 j--; 3585 continue; 3586 } 3587 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 3588 output[j].array_base = shader->output[i].sid; 3589 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3590 shader->nr_ps_color_exports++; 3591 if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { 3592 for (k = 1; k < max_color_exports; k++) { 3593 j++; 3594 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3595 output[j].gpr = shader->output[i].gpr; 3596 output[j].elem_size = 3; 3597 output[j].swizzle_x = 0; 3598 output[j].swizzle_y = 1; 3599 output[j].swizzle_z = 2; 3600 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 3601 output[j].burst_count = 1; 3602 output[j].array_base = k; 3603 output[j].op = CF_OP_EXPORT; 3604 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3605 shader->nr_ps_color_exports++; 3606 } 3607 } 3608 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 3609 output[j].array_base = 61; 3610 output[j].swizzle_x = 2; 3611 output[j].swizzle_y = 7; 3612 output[j].swizzle_z = output[j].swizzle_w = 7; 3613 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3614 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 3615 output[j].array_base = 61; 3616 output[j].swizzle_x = 7; 3617 output[j].swizzle_y = 1; 3618 output[j].swizzle_z = output[j].swizzle_w = 7; 3619 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3620 } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 3621 output[j].array_base = 61; 3622 output[j].swizzle_x = 7; 3623 output[j].swizzle_y = 7; 3624 output[j].swizzle_z = 0; 3625 output[j].swizzle_w = 7; 3626 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3627 } else { 3628 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 3629 r = -EINVAL; 3630 goto out_err; 3631 } 3632 break; 3633 case TGSI_PROCESSOR_TESS_CTRL: 3634 break; 3635 default: 3636 R600_ERR("unsupported processor type %d\n", ctx.type); 3637 r = -EINVAL; 3638 goto out_err; 3639 } 3640 3641 if (output[j].type==-1) { 3642 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3643 output[j].array_base = next_param_base++; 3644 } 3645 } 3646 3647 /* add fake position export */ 3648 if ((ctx.type == TGSI_PROCESSOR_VERTEX || ctx.type == TGSI_PROCESSOR_TESS_EVAL) && pos_emitted == false) { 3649 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3650 output[j].gpr = 0; 3651 output[j].elem_size = 3; 3652 output[j].swizzle_x = 7; 3653 output[j].swizzle_y = 7; 3654 output[j].swizzle_z = 7; 3655 output[j].swizzle_w = 7; 3656 output[j].burst_count = 1; 3657 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3658 output[j].array_base = 60; 3659 output[j].op = CF_OP_EXPORT; 3660 j++; 3661 } 3662 3663 /* add fake param output for vertex shader if no param is exported */ 3664 if ((ctx.type == TGSI_PROCESSOR_VERTEX || ctx.type == TGSI_PROCESSOR_TESS_EVAL) && next_param_base == 0) { 3665 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3666 output[j].gpr = 0; 3667 output[j].elem_size = 3; 3668 output[j].swizzle_x = 7; 3669 output[j].swizzle_y = 7; 3670 output[j].swizzle_z = 7; 3671 output[j].swizzle_w = 7; 3672 output[j].burst_count = 1; 3673 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3674 output[j].array_base = 0; 3675 output[j].op = CF_OP_EXPORT; 3676 j++; 3677 } 3678 3679 /* add fake pixel export */ 3680 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { 3681 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3682 output[j].gpr = 0; 3683 output[j].elem_size = 3; 3684 output[j].swizzle_x = 7; 3685 output[j].swizzle_y = 7; 3686 output[j].swizzle_z = 7; 3687 output[j].swizzle_w = 7; 3688 output[j].burst_count = 1; 3689 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3690 output[j].array_base = 0; 3691 output[j].op = CF_OP_EXPORT; 3692 j++; 3693 shader->nr_ps_color_exports++; 3694 } 3695 3696 noutput = j; 3697 3698 /* set export done on last export of each type */ 3699 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 3700 if (!(output_done & (1 << output[i].type))) { 3701 output_done |= (1 << output[i].type); 3702 output[i].op = CF_OP_EXPORT_DONE; 3703 } 3704 } 3705 /* add output to bytecode */ 3706 if (!use_llvm) { 3707 for (i = 0; i < noutput; i++) { 3708 r = r600_bytecode_add_output(ctx.bc, &output[i]); 3709 if (r) 3710 goto out_err; 3711 } 3712 } 3713 } 3714 3715 /* add program end */ 3716 if (!use_llvm) { 3717 if (ctx.bc->chip_class == CAYMAN) 3718 cm_bytecode_add_cf_end(ctx.bc); 3719 else { 3720 const struct cf_op_info *last = NULL; 3721 3722 if (ctx.bc->cf_last) 3723 last = r600_isa_cf(ctx.bc->cf_last->op); 3724 3725 /* alu clause instructions don't have EOP bit, so add NOP */ 3726 if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS) 3727 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 3728 3729 ctx.bc->cf_last->end_of_program = 1; 3730 } 3731 } 3732 3733 /* check GPR limit - we have 124 = 128 - 4 3734 * (4 are reserved as alu clause temporary registers) */ 3735 if (ctx.bc->ngpr > 124) { 3736 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 3737 r = -ENOMEM; 3738 goto out_err; 3739 } 3740 3741 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 3742 if ((r = generate_gs_copy_shader(rctx, pipeshader, &so))) 3743 return r; 3744 } 3745 3746 free(ctx.literals); 3747 tgsi_parse_free(&ctx.parse); 3748 return 0; 3749out_err: 3750 free(ctx.literals); 3751 tgsi_parse_free(&ctx.parse); 3752 return r; 3753} 3754 3755static int tgsi_unsupported(struct r600_shader_ctx *ctx) 3756{ 3757 const unsigned tgsi_opcode = 3758 ctx->parse.FullToken.FullInstruction.Instruction.Opcode; 3759 R600_ERR("%s tgsi opcode unsupported\n", 3760 tgsi_get_opcode_name(tgsi_opcode)); 3761 return -EINVAL; 3762} 3763 3764static int tgsi_end(struct r600_shader_ctx *ctx) 3765{ 3766 return 0; 3767} 3768 3769static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 3770 const struct r600_shader_src *shader_src, 3771 unsigned chan) 3772{ 3773 bc_src->sel = shader_src->sel; 3774 bc_src->chan = shader_src->swizzle[chan]; 3775 bc_src->neg = shader_src->neg; 3776 bc_src->abs = shader_src->abs; 3777 bc_src->rel = shader_src->rel; 3778 bc_src->value = shader_src->value[bc_src->chan]; 3779 bc_src->kc_bank = shader_src->kc_bank; 3780 bc_src->kc_rel = shader_src->kc_rel; 3781} 3782 3783static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 3784{ 3785 bc_src->abs = 1; 3786 bc_src->neg = 0; 3787} 3788 3789static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 3790{ 3791 bc_src->neg = !bc_src->neg; 3792} 3793 3794static void tgsi_dst(struct r600_shader_ctx *ctx, 3795 const struct tgsi_full_dst_register *tgsi_dst, 3796 unsigned swizzle, 3797 struct r600_bytecode_alu_dst *r600_dst) 3798{ 3799 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3800 3801 r600_dst->sel = tgsi_dst->Register.Index; 3802 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 3803 r600_dst->chan = swizzle; 3804 r600_dst->write = 1; 3805 if (inst->Instruction.Saturate) { 3806 r600_dst->clamp = 1; 3807 } 3808 if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) { 3809 if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) { 3810 return; 3811 } 3812 } 3813 if (tgsi_dst->Register.Indirect) 3814 r600_dst->rel = V_SQ_REL_RELATIVE; 3815 3816} 3817 3818static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap) 3819{ 3820 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3821 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3822 struct r600_bytecode_alu alu; 3823 int i, j, r, lasti = tgsi_last_instruction(write_mask); 3824 int use_tmp = 0; 3825 3826 if (singledest) { 3827 switch (write_mask) { 3828 case 0x1: 3829 write_mask = 0x3; 3830 break; 3831 case 0x2: 3832 use_tmp = 1; 3833 write_mask = 0x3; 3834 break; 3835 case 0x4: 3836 write_mask = 0xc; 3837 break; 3838 case 0x8: 3839 write_mask = 0xc; 3840 use_tmp = 3; 3841 break; 3842 } 3843 } 3844 3845 lasti = tgsi_last_instruction(write_mask); 3846 for (i = 0; i <= lasti; i++) { 3847 3848 if (!(write_mask & (1 << i))) 3849 continue; 3850 3851 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3852 3853 if (singledest) { 3854 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3855 if (use_tmp) { 3856 alu.dst.sel = ctx->temp_reg; 3857 alu.dst.chan = i; 3858 alu.dst.write = 1; 3859 } 3860 if (i == 1 || i == 3) 3861 alu.dst.write = 0; 3862 } else 3863 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3864 3865 alu.op = ctx->inst_info->op; 3866 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) { 3867 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3868 } else if (!swap) { 3869 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3870 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 3871 } 3872 } else { 3873 r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i)); 3874 r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i)); 3875 } 3876 3877 /* handle some special cases */ 3878 if (i == 1 || i == 3) { 3879 switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { 3880 case TGSI_OPCODE_SUB: 3881 r600_bytecode_src_toggle_neg(&alu.src[1]); 3882 break; 3883 case TGSI_OPCODE_DABS: 3884 r600_bytecode_src_set_abs(&alu.src[0]); 3885 break; 3886 default: 3887 break; 3888 } 3889 } 3890 if (i == lasti) { 3891 alu.last = 1; 3892 } 3893 r = r600_bytecode_add_alu(ctx->bc, &alu); 3894 if (r) 3895 return r; 3896 } 3897 3898 if (use_tmp) { 3899 write_mask = inst->Dst[0].Register.WriteMask; 3900 3901 /* move result from temp to dst */ 3902 for (i = 0; i <= lasti; i++) { 3903 if (!(write_mask & (1 << i))) 3904 continue; 3905 3906 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3907 alu.op = ALU_OP1_MOV; 3908 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3909 alu.src[0].sel = ctx->temp_reg; 3910 alu.src[0].chan = use_tmp - 1; 3911 alu.last = (i == lasti); 3912 3913 r = r600_bytecode_add_alu(ctx->bc, &alu); 3914 if (r) 3915 return r; 3916 } 3917 } 3918 return 0; 3919} 3920 3921static int tgsi_op2_64(struct r600_shader_ctx *ctx) 3922{ 3923 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3924 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3925 /* confirm writemasking */ 3926 if ((write_mask & 0x3) != 0x3 && 3927 (write_mask & 0xc) != 0xc) { 3928 fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask); 3929 return -1; 3930 } 3931 return tgsi_op2_64_params(ctx, false, false); 3932} 3933 3934static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx) 3935{ 3936 return tgsi_op2_64_params(ctx, true, false); 3937} 3938 3939static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx) 3940{ 3941 return tgsi_op2_64_params(ctx, true, true); 3942} 3943 3944static int tgsi_op3_64(struct r600_shader_ctx *ctx) 3945{ 3946 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3947 struct r600_bytecode_alu alu; 3948 int i, j, r; 3949 int lasti = 3; 3950 int tmp = r600_get_temp(ctx); 3951 3952 for (i = 0; i < lasti + 1; i++) { 3953 3954 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3955 alu.op = ctx->inst_info->op; 3956 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3957 r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1); 3958 } 3959 3960 if (inst->Dst[0].Register.WriteMask & (1 << i)) 3961 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3962 else 3963 alu.dst.sel = tmp; 3964 3965 alu.dst.chan = i; 3966 alu.is_op3 = 1; 3967 if (i == lasti) { 3968 alu.last = 1; 3969 } 3970 r = r600_bytecode_add_alu(ctx->bc, &alu); 3971 if (r) 3972 return r; 3973 } 3974 return 0; 3975} 3976 3977static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 3978{ 3979 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3980 struct r600_bytecode_alu alu; 3981 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3982 int i, j, r, lasti = tgsi_last_instruction(write_mask); 3983 /* use temp register if trans_only and more than one dst component */ 3984 int use_tmp = trans_only && (write_mask ^ (1 << lasti)); 3985 3986 for (i = 0; i <= lasti; i++) { 3987 if (!(write_mask & (1 << i))) 3988 continue; 3989 3990 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3991 if (use_tmp) { 3992 alu.dst.sel = ctx->temp_reg; 3993 alu.dst.chan = i; 3994 alu.dst.write = 1; 3995 } else 3996 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3997 3998 alu.op = ctx->inst_info->op; 3999 if (!swap) { 4000 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4001 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 4002 } 4003 } else { 4004 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4005 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4006 } 4007 /* handle some special cases */ 4008 switch (inst->Instruction.Opcode) { 4009 case TGSI_OPCODE_SUB: 4010 r600_bytecode_src_toggle_neg(&alu.src[1]); 4011 break; 4012 case TGSI_OPCODE_ABS: 4013 r600_bytecode_src_set_abs(&alu.src[0]); 4014 break; 4015 default: 4016 break; 4017 } 4018 if (i == lasti || trans_only) { 4019 alu.last = 1; 4020 } 4021 r = r600_bytecode_add_alu(ctx->bc, &alu); 4022 if (r) 4023 return r; 4024 } 4025 4026 if (use_tmp) { 4027 /* move result from temp to dst */ 4028 for (i = 0; i <= lasti; i++) { 4029 if (!(write_mask & (1 << i))) 4030 continue; 4031 4032 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4033 alu.op = ALU_OP1_MOV; 4034 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4035 alu.src[0].sel = ctx->temp_reg; 4036 alu.src[0].chan = i; 4037 alu.last = (i == lasti); 4038 4039 r = r600_bytecode_add_alu(ctx->bc, &alu); 4040 if (r) 4041 return r; 4042 } 4043 } 4044 return 0; 4045} 4046 4047static int tgsi_op2(struct r600_shader_ctx *ctx) 4048{ 4049 return tgsi_op2_s(ctx, 0, 0); 4050} 4051 4052static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 4053{ 4054 return tgsi_op2_s(ctx, 1, 0); 4055} 4056 4057static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 4058{ 4059 return tgsi_op2_s(ctx, 0, 1); 4060} 4061 4062static int tgsi_ineg(struct r600_shader_ctx *ctx) 4063{ 4064 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4065 struct r600_bytecode_alu alu; 4066 int i, r; 4067 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4068 4069 for (i = 0; i < lasti + 1; i++) { 4070 4071 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4072 continue; 4073 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4074 alu.op = ctx->inst_info->op; 4075 4076 alu.src[0].sel = V_SQ_ALU_SRC_0; 4077 4078 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4079 4080 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4081 4082 if (i == lasti) { 4083 alu.last = 1; 4084 } 4085 r = r600_bytecode_add_alu(ctx->bc, &alu); 4086 if (r) 4087 return r; 4088 } 4089 return 0; 4090 4091} 4092 4093static int tgsi_dneg(struct r600_shader_ctx *ctx) 4094{ 4095 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4096 struct r600_bytecode_alu alu; 4097 int i, r; 4098 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4099 4100 for (i = 0; i < lasti + 1; i++) { 4101 4102 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4103 continue; 4104 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4105 alu.op = ALU_OP1_MOV; 4106 4107 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4108 4109 if (i == 1 || i == 3) 4110 r600_bytecode_src_toggle_neg(&alu.src[0]); 4111 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4112 4113 if (i == lasti) { 4114 alu.last = 1; 4115 } 4116 r = r600_bytecode_add_alu(ctx->bc, &alu); 4117 if (r) 4118 return r; 4119 } 4120 return 0; 4121 4122} 4123 4124static int tgsi_dfracexp(struct r600_shader_ctx *ctx) 4125{ 4126 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4127 struct r600_bytecode_alu alu; 4128 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4129 int i, j, r; 4130 int firsti = write_mask == 0xc ? 2 : 0; 4131 4132 for (i = 0; i <= 3; i++) { 4133 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4134 alu.op = ctx->inst_info->op; 4135 4136 alu.dst.sel = ctx->temp_reg; 4137 alu.dst.chan = i; 4138 alu.dst.write = 1; 4139 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4140 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 4141 } 4142 4143 if (i == 3) 4144 alu.last = 1; 4145 4146 r = r600_bytecode_add_alu(ctx->bc, &alu); 4147 if (r) 4148 return r; 4149 } 4150 4151 /* MOV first two channels to writemask dst0 */ 4152 for (i = 0; i <= 1; i++) { 4153 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4154 alu.op = ALU_OP1_MOV; 4155 alu.src[0].chan = i + 2; 4156 alu.src[0].sel = ctx->temp_reg; 4157 4158 tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst); 4159 alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1; 4160 alu.last = 1; 4161 r = r600_bytecode_add_alu(ctx->bc, &alu); 4162 if (r) 4163 return r; 4164 } 4165 4166 for (i = 0; i <= 3; i++) { 4167 if (inst->Dst[1].Register.WriteMask & (1 << i)) { 4168 /* MOV third channels to writemask dst1 */ 4169 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4170 alu.op = ALU_OP1_MOV; 4171 alu.src[0].chan = 1; 4172 alu.src[0].sel = ctx->temp_reg; 4173 4174 tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst); 4175 alu.last = 1; 4176 r = r600_bytecode_add_alu(ctx->bc, &alu); 4177 if (r) 4178 return r; 4179 break; 4180 } 4181 } 4182 return 0; 4183} 4184 4185 4186static int egcm_int_to_double(struct r600_shader_ctx *ctx) 4187{ 4188 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4189 struct r600_bytecode_alu alu; 4190 int i, r; 4191 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4192 4193 assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D || 4194 inst->Instruction.Opcode == TGSI_OPCODE_U2D); 4195 4196 for (i = 0; i <= (lasti+1)/2; i++) { 4197 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4198 alu.op = ctx->inst_info->op; 4199 4200 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4201 alu.dst.sel = ctx->temp_reg; 4202 alu.dst.chan = i; 4203 alu.dst.write = 1; 4204 alu.last = 1; 4205 4206 r = r600_bytecode_add_alu(ctx->bc, &alu); 4207 if (r) 4208 return r; 4209 } 4210 4211 for (i = 0; i <= lasti; i++) { 4212 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4213 alu.op = ALU_OP1_FLT32_TO_FLT64; 4214 4215 alu.src[0].chan = i/2; 4216 if (i%2 == 0) 4217 alu.src[0].sel = ctx->temp_reg; 4218 else { 4219 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 4220 alu.src[0].value = 0x0; 4221 } 4222 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4223 alu.last = i == lasti; 4224 4225 r = r600_bytecode_add_alu(ctx->bc, &alu); 4226 if (r) 4227 return r; 4228 } 4229 4230 return 0; 4231} 4232 4233static int egcm_double_to_int(struct r600_shader_ctx *ctx) 4234{ 4235 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4236 struct r600_bytecode_alu alu; 4237 int i, r; 4238 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4239 4240 assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I || 4241 inst->Instruction.Opcode == TGSI_OPCODE_D2U); 4242 4243 for (i = 0; i <= lasti; i++) { 4244 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4245 alu.op = ALU_OP1_FLT64_TO_FLT32; 4246 4247 r600_bytecode_src(&alu.src[0], &ctx->src[0], fp64_switch(i)); 4248 alu.dst.chan = i; 4249 alu.dst.sel = ctx->temp_reg; 4250 alu.dst.write = i%2 == 0; 4251 alu.last = i == lasti; 4252 4253 r = r600_bytecode_add_alu(ctx->bc, &alu); 4254 if (r) 4255 return r; 4256 } 4257 4258 for (i = 0; i <= (lasti+1)/2; i++) { 4259 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4260 alu.op = ctx->inst_info->op; 4261 4262 alu.src[0].chan = i*2; 4263 alu.src[0].sel = ctx->temp_reg; 4264 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4265 alu.last = 1; 4266 4267 r = r600_bytecode_add_alu(ctx->bc, &alu); 4268 if (r) 4269 return r; 4270 } 4271 4272 return 0; 4273} 4274 4275static int cayman_emit_double_instr(struct r600_shader_ctx *ctx) 4276{ 4277 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4278 int i, r; 4279 struct r600_bytecode_alu alu; 4280 int last_slot = 3; 4281 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4282 int t1 = ctx->temp_reg; 4283 4284 /* these have to write the result to X/Y by the looks of it */ 4285 for (i = 0 ; i < last_slot; i++) { 4286 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4287 alu.op = ctx->inst_info->op; 4288 4289 /* should only be one src regs */ 4290 assert (inst->Instruction.NumSrcRegs == 1); 4291 4292 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 4293 r600_bytecode_src(&alu.src[1], &ctx->src[0], 0); 4294 4295 /* RSQ should take the absolute value of src */ 4296 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ || 4297 ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) { 4298 r600_bytecode_src_set_abs(&alu.src[1]); 4299 } 4300 alu.dst.sel = t1; 4301 alu.dst.chan = i; 4302 alu.dst.write = (i == 0 || i == 1); 4303 4304 if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1) 4305 alu.last = 1; 4306 r = r600_bytecode_add_alu(ctx->bc, &alu); 4307 if (r) 4308 return r; 4309 } 4310 4311 for (i = 0 ; i <= lasti; i++) { 4312 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4313 continue; 4314 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4315 alu.op = ALU_OP1_MOV; 4316 alu.src[0].sel = t1; 4317 alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1; 4318 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4319 alu.dst.write = 1; 4320 if (i == lasti) 4321 alu.last = 1; 4322 r = r600_bytecode_add_alu(ctx->bc, &alu); 4323 if (r) 4324 return r; 4325 } 4326 return 0; 4327} 4328 4329static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 4330{ 4331 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4332 int i, j, r; 4333 struct r600_bytecode_alu alu; 4334 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4335 4336 for (i = 0 ; i < last_slot; i++) { 4337 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4338 alu.op = ctx->inst_info->op; 4339 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4340 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 4341 4342 /* RSQ should take the absolute value of src */ 4343 if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) { 4344 r600_bytecode_src_set_abs(&alu.src[j]); 4345 } 4346 } 4347 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4348 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4349 4350 if (i == last_slot - 1) 4351 alu.last = 1; 4352 r = r600_bytecode_add_alu(ctx->bc, &alu); 4353 if (r) 4354 return r; 4355 } 4356 return 0; 4357} 4358 4359static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 4360{ 4361 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4362 int i, j, k, r; 4363 struct r600_bytecode_alu alu; 4364 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4365 int t1 = ctx->temp_reg; 4366 4367 for (k = 0; k <= lasti; k++) { 4368 if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 4369 continue; 4370 4371 for (i = 0 ; i < 4; i++) { 4372 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4373 alu.op = ctx->inst_info->op; 4374 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4375 r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 4376 } 4377 alu.dst.sel = t1; 4378 alu.dst.chan = i; 4379 alu.dst.write = (i == k); 4380 if (i == 3) 4381 alu.last = 1; 4382 r = r600_bytecode_add_alu(ctx->bc, &alu); 4383 if (r) 4384 return r; 4385 } 4386 } 4387 4388 for (i = 0 ; i <= lasti; i++) { 4389 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4390 continue; 4391 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4392 alu.op = ALU_OP1_MOV; 4393 alu.src[0].sel = t1; 4394 alu.src[0].chan = i; 4395 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4396 alu.dst.write = 1; 4397 if (i == lasti) 4398 alu.last = 1; 4399 r = r600_bytecode_add_alu(ctx->bc, &alu); 4400 if (r) 4401 return r; 4402 } 4403 4404 return 0; 4405} 4406 4407 4408static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) 4409{ 4410 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4411 int i, j, k, r; 4412 struct r600_bytecode_alu alu; 4413 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4414 int t1 = ctx->temp_reg; 4415 4416 for (k = 0; k < 2; k++) { 4417 if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2)))) 4418 continue; 4419 4420 for (i = 0; i < 4; i++) { 4421 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4422 alu.op = ctx->inst_info->op; 4423 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4424 r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));; 4425 } 4426 alu.dst.sel = t1; 4427 alu.dst.chan = i; 4428 alu.dst.write = 1; 4429 if (i == 3) 4430 alu.last = 1; 4431 r = r600_bytecode_add_alu(ctx->bc, &alu); 4432 if (r) 4433 return r; 4434 } 4435 } 4436 4437 for (i = 0; i <= lasti; i++) { 4438 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4439 continue; 4440 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4441 alu.op = ALU_OP1_MOV; 4442 alu.src[0].sel = t1; 4443 alu.src[0].chan = i; 4444 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4445 alu.dst.write = 1; 4446 if (i == lasti) 4447 alu.last = 1; 4448 r = r600_bytecode_add_alu(ctx->bc, &alu); 4449 if (r) 4450 return r; 4451 } 4452 4453 return 0; 4454} 4455 4456/* 4457 * r600 - trunc to -PI..PI range 4458 * r700 - normalize by dividing by 2PI 4459 * see fdo bug 27901 4460 */ 4461static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 4462{ 4463 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 4464 static float double_pi = 3.1415926535 * 2; 4465 static float neg_pi = -3.1415926535; 4466 4467 int r; 4468 struct r600_bytecode_alu alu; 4469 4470 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4471 alu.op = ALU_OP3_MULADD; 4472 alu.is_op3 = 1; 4473 4474 alu.dst.chan = 0; 4475 alu.dst.sel = ctx->temp_reg; 4476 alu.dst.write = 1; 4477 4478 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4479 4480 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4481 alu.src[1].chan = 0; 4482 alu.src[1].value = *(uint32_t *)&half_inv_pi; 4483 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 4484 alu.src[2].chan = 0; 4485 alu.last = 1; 4486 r = r600_bytecode_add_alu(ctx->bc, &alu); 4487 if (r) 4488 return r; 4489 4490 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4491 alu.op = ALU_OP1_FRACT; 4492 4493 alu.dst.chan = 0; 4494 alu.dst.sel = ctx->temp_reg; 4495 alu.dst.write = 1; 4496 4497 alu.src[0].sel = ctx->temp_reg; 4498 alu.src[0].chan = 0; 4499 alu.last = 1; 4500 r = r600_bytecode_add_alu(ctx->bc, &alu); 4501 if (r) 4502 return r; 4503 4504 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4505 alu.op = ALU_OP3_MULADD; 4506 alu.is_op3 = 1; 4507 4508 alu.dst.chan = 0; 4509 alu.dst.sel = ctx->temp_reg; 4510 alu.dst.write = 1; 4511 4512 alu.src[0].sel = ctx->temp_reg; 4513 alu.src[0].chan = 0; 4514 4515 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4516 alu.src[1].chan = 0; 4517 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 4518 alu.src[2].chan = 0; 4519 4520 if (ctx->bc->chip_class == R600) { 4521 alu.src[1].value = *(uint32_t *)&double_pi; 4522 alu.src[2].value = *(uint32_t *)&neg_pi; 4523 } else { 4524 alu.src[1].sel = V_SQ_ALU_SRC_1; 4525 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 4526 alu.src[2].neg = 1; 4527 } 4528 4529 alu.last = 1; 4530 r = r600_bytecode_add_alu(ctx->bc, &alu); 4531 if (r) 4532 return r; 4533 return 0; 4534} 4535 4536static int cayman_trig(struct r600_shader_ctx *ctx) 4537{ 4538 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4539 struct r600_bytecode_alu alu; 4540 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4541 int i, r; 4542 4543 r = tgsi_setup_trig(ctx); 4544 if (r) 4545 return r; 4546 4547 4548 for (i = 0; i < last_slot; i++) { 4549 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4550 alu.op = ctx->inst_info->op; 4551 alu.dst.chan = i; 4552 4553 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4554 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4555 4556 alu.src[0].sel = ctx->temp_reg; 4557 alu.src[0].chan = 0; 4558 if (i == last_slot - 1) 4559 alu.last = 1; 4560 r = r600_bytecode_add_alu(ctx->bc, &alu); 4561 if (r) 4562 return r; 4563 } 4564 return 0; 4565} 4566 4567static int tgsi_trig(struct r600_shader_ctx *ctx) 4568{ 4569 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4570 struct r600_bytecode_alu alu; 4571 int i, r; 4572 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4573 4574 r = tgsi_setup_trig(ctx); 4575 if (r) 4576 return r; 4577 4578 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4579 alu.op = ctx->inst_info->op; 4580 alu.dst.chan = 0; 4581 alu.dst.sel = ctx->temp_reg; 4582 alu.dst.write = 1; 4583 4584 alu.src[0].sel = ctx->temp_reg; 4585 alu.src[0].chan = 0; 4586 alu.last = 1; 4587 r = r600_bytecode_add_alu(ctx->bc, &alu); 4588 if (r) 4589 return r; 4590 4591 /* replicate result */ 4592 for (i = 0; i < lasti + 1; i++) { 4593 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4594 continue; 4595 4596 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4597 alu.op = ALU_OP1_MOV; 4598 4599 alu.src[0].sel = ctx->temp_reg; 4600 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4601 if (i == lasti) 4602 alu.last = 1; 4603 r = r600_bytecode_add_alu(ctx->bc, &alu); 4604 if (r) 4605 return r; 4606 } 4607 return 0; 4608} 4609 4610static int tgsi_scs(struct r600_shader_ctx *ctx) 4611{ 4612 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4613 struct r600_bytecode_alu alu; 4614 int i, r; 4615 4616 /* We'll only need the trig stuff if we are going to write to the 4617 * X or Y components of the destination vector. 4618 */ 4619 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 4620 r = tgsi_setup_trig(ctx); 4621 if (r) 4622 return r; 4623 } 4624 4625 /* dst.x = COS */ 4626 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4627 if (ctx->bc->chip_class == CAYMAN) { 4628 for (i = 0 ; i < 3; i++) { 4629 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4630 alu.op = ALU_OP1_COS; 4631 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4632 4633 if (i == 0) 4634 alu.dst.write = 1; 4635 else 4636 alu.dst.write = 0; 4637 alu.src[0].sel = ctx->temp_reg; 4638 alu.src[0].chan = 0; 4639 if (i == 2) 4640 alu.last = 1; 4641 r = r600_bytecode_add_alu(ctx->bc, &alu); 4642 if (r) 4643 return r; 4644 } 4645 } else { 4646 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4647 alu.op = ALU_OP1_COS; 4648 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4649 4650 alu.src[0].sel = ctx->temp_reg; 4651 alu.src[0].chan = 0; 4652 alu.last = 1; 4653 r = r600_bytecode_add_alu(ctx->bc, &alu); 4654 if (r) 4655 return r; 4656 } 4657 } 4658 4659 /* dst.y = SIN */ 4660 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4661 if (ctx->bc->chip_class == CAYMAN) { 4662 for (i = 0 ; i < 3; i++) { 4663 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4664 alu.op = ALU_OP1_SIN; 4665 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4666 if (i == 1) 4667 alu.dst.write = 1; 4668 else 4669 alu.dst.write = 0; 4670 alu.src[0].sel = ctx->temp_reg; 4671 alu.src[0].chan = 0; 4672 if (i == 2) 4673 alu.last = 1; 4674 r = r600_bytecode_add_alu(ctx->bc, &alu); 4675 if (r) 4676 return r; 4677 } 4678 } else { 4679 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4680 alu.op = ALU_OP1_SIN; 4681 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 4682 4683 alu.src[0].sel = ctx->temp_reg; 4684 alu.src[0].chan = 0; 4685 alu.last = 1; 4686 r = r600_bytecode_add_alu(ctx->bc, &alu); 4687 if (r) 4688 return r; 4689 } 4690 } 4691 4692 /* dst.z = 0.0; */ 4693 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4694 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4695 4696 alu.op = ALU_OP1_MOV; 4697 4698 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 4699 4700 alu.src[0].sel = V_SQ_ALU_SRC_0; 4701 alu.src[0].chan = 0; 4702 4703 alu.last = 1; 4704 4705 r = r600_bytecode_add_alu(ctx->bc, &alu); 4706 if (r) 4707 return r; 4708 } 4709 4710 /* dst.w = 1.0; */ 4711 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4712 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4713 4714 alu.op = ALU_OP1_MOV; 4715 4716 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 4717 4718 alu.src[0].sel = V_SQ_ALU_SRC_1; 4719 alu.src[0].chan = 0; 4720 4721 alu.last = 1; 4722 4723 r = r600_bytecode_add_alu(ctx->bc, &alu); 4724 if (r) 4725 return r; 4726 } 4727 4728 return 0; 4729} 4730 4731static int tgsi_kill(struct r600_shader_ctx *ctx) 4732{ 4733 const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4734 struct r600_bytecode_alu alu; 4735 int i, r; 4736 4737 for (i = 0; i < 4; i++) { 4738 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4739 alu.op = ctx->inst_info->op; 4740 4741 alu.dst.chan = i; 4742 4743 alu.src[0].sel = V_SQ_ALU_SRC_0; 4744 4745 if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) { 4746 alu.src[1].sel = V_SQ_ALU_SRC_1; 4747 alu.src[1].neg = 1; 4748 } else { 4749 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4750 } 4751 if (i == 3) { 4752 alu.last = 1; 4753 } 4754 r = r600_bytecode_add_alu(ctx->bc, &alu); 4755 if (r) 4756 return r; 4757 } 4758 4759 /* kill must be last in ALU */ 4760 ctx->bc->force_add_cf = 1; 4761 ctx->shader->uses_kill = TRUE; 4762 return 0; 4763} 4764 4765static int tgsi_lit(struct r600_shader_ctx *ctx) 4766{ 4767 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4768 struct r600_bytecode_alu alu; 4769 int r; 4770 4771 /* tmp.x = max(src.y, 0.0) */ 4772 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4773 alu.op = ALU_OP2_MAX; 4774 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 4775 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 4776 alu.src[1].chan = 1; 4777 4778 alu.dst.sel = ctx->temp_reg; 4779 alu.dst.chan = 0; 4780 alu.dst.write = 1; 4781 4782 alu.last = 1; 4783 r = r600_bytecode_add_alu(ctx->bc, &alu); 4784 if (r) 4785 return r; 4786 4787 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 4788 { 4789 int chan; 4790 int sel; 4791 int i; 4792 4793 if (ctx->bc->chip_class == CAYMAN) { 4794 for (i = 0; i < 3; i++) { 4795 /* tmp.z = log(tmp.x) */ 4796 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4797 alu.op = ALU_OP1_LOG_CLAMPED; 4798 alu.src[0].sel = ctx->temp_reg; 4799 alu.src[0].chan = 0; 4800 alu.dst.sel = ctx->temp_reg; 4801 alu.dst.chan = i; 4802 if (i == 2) { 4803 alu.dst.write = 1; 4804 alu.last = 1; 4805 } else 4806 alu.dst.write = 0; 4807 4808 r = r600_bytecode_add_alu(ctx->bc, &alu); 4809 if (r) 4810 return r; 4811 } 4812 } else { 4813 /* tmp.z = log(tmp.x) */ 4814 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4815 alu.op = ALU_OP1_LOG_CLAMPED; 4816 alu.src[0].sel = ctx->temp_reg; 4817 alu.src[0].chan = 0; 4818 alu.dst.sel = ctx->temp_reg; 4819 alu.dst.chan = 2; 4820 alu.dst.write = 1; 4821 alu.last = 1; 4822 r = r600_bytecode_add_alu(ctx->bc, &alu); 4823 if (r) 4824 return r; 4825 } 4826 4827 chan = alu.dst.chan; 4828 sel = alu.dst.sel; 4829 4830 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 4831 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4832 alu.op = ALU_OP3_MUL_LIT; 4833 alu.src[0].sel = sel; 4834 alu.src[0].chan = chan; 4835 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 4836 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 4837 alu.dst.sel = ctx->temp_reg; 4838 alu.dst.chan = 0; 4839 alu.dst.write = 1; 4840 alu.is_op3 = 1; 4841 alu.last = 1; 4842 r = r600_bytecode_add_alu(ctx->bc, &alu); 4843 if (r) 4844 return r; 4845 4846 if (ctx->bc->chip_class == CAYMAN) { 4847 for (i = 0; i < 3; i++) { 4848 /* dst.z = exp(tmp.x) */ 4849 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4850 alu.op = ALU_OP1_EXP_IEEE; 4851 alu.src[0].sel = ctx->temp_reg; 4852 alu.src[0].chan = 0; 4853 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4854 if (i == 2) { 4855 alu.dst.write = 1; 4856 alu.last = 1; 4857 } else 4858 alu.dst.write = 0; 4859 r = r600_bytecode_add_alu(ctx->bc, &alu); 4860 if (r) 4861 return r; 4862 } 4863 } else { 4864 /* dst.z = exp(tmp.x) */ 4865 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4866 alu.op = ALU_OP1_EXP_IEEE; 4867 alu.src[0].sel = ctx->temp_reg; 4868 alu.src[0].chan = 0; 4869 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 4870 alu.last = 1; 4871 r = r600_bytecode_add_alu(ctx->bc, &alu); 4872 if (r) 4873 return r; 4874 } 4875 } 4876 4877 /* dst.x, <- 1.0 */ 4878 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4879 alu.op = ALU_OP1_MOV; 4880 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 4881 alu.src[0].chan = 0; 4882 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4883 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 4884 r = r600_bytecode_add_alu(ctx->bc, &alu); 4885 if (r) 4886 return r; 4887 4888 /* dst.y = max(src.x, 0.0) */ 4889 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4890 alu.op = ALU_OP2_MAX; 4891 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4892 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 4893 alu.src[1].chan = 0; 4894 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 4895 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 4896 r = r600_bytecode_add_alu(ctx->bc, &alu); 4897 if (r) 4898 return r; 4899 4900 /* dst.w, <- 1.0 */ 4901 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4902 alu.op = ALU_OP1_MOV; 4903 alu.src[0].sel = V_SQ_ALU_SRC_1; 4904 alu.src[0].chan = 0; 4905 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 4906 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 4907 alu.last = 1; 4908 r = r600_bytecode_add_alu(ctx->bc, &alu); 4909 if (r) 4910 return r; 4911 4912 return 0; 4913} 4914 4915static int tgsi_rsq(struct r600_shader_ctx *ctx) 4916{ 4917 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4918 struct r600_bytecode_alu alu; 4919 int i, r; 4920 4921 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4922 4923 /* XXX: 4924 * For state trackers other than OpenGL, we'll want to use 4925 * _RECIPSQRT_IEEE instead. 4926 */ 4927 alu.op = ALU_OP1_RECIPSQRT_CLAMPED; 4928 4929 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 4930 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 4931 r600_bytecode_src_set_abs(&alu.src[i]); 4932 } 4933 alu.dst.sel = ctx->temp_reg; 4934 alu.dst.write = 1; 4935 alu.last = 1; 4936 r = r600_bytecode_add_alu(ctx->bc, &alu); 4937 if (r) 4938 return r; 4939 /* replicate result */ 4940 return tgsi_helper_tempx_replicate(ctx); 4941} 4942 4943static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 4944{ 4945 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4946 struct r600_bytecode_alu alu; 4947 int i, r; 4948 4949 for (i = 0; i < 4; i++) { 4950 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4951 alu.src[0].sel = ctx->temp_reg; 4952 alu.op = ALU_OP1_MOV; 4953 alu.dst.chan = i; 4954 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4955 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4956 if (i == 3) 4957 alu.last = 1; 4958 r = r600_bytecode_add_alu(ctx->bc, &alu); 4959 if (r) 4960 return r; 4961 } 4962 return 0; 4963} 4964 4965static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 4966{ 4967 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4968 struct r600_bytecode_alu alu; 4969 int i, r; 4970 4971 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4972 alu.op = ctx->inst_info->op; 4973 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 4974 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 4975 } 4976 alu.dst.sel = ctx->temp_reg; 4977 alu.dst.write = 1; 4978 alu.last = 1; 4979 r = r600_bytecode_add_alu(ctx->bc, &alu); 4980 if (r) 4981 return r; 4982 /* replicate result */ 4983 return tgsi_helper_tempx_replicate(ctx); 4984} 4985 4986static int cayman_pow(struct r600_shader_ctx *ctx) 4987{ 4988 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4989 int i, r; 4990 struct r600_bytecode_alu alu; 4991 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4992 4993 for (i = 0; i < 3; i++) { 4994 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4995 alu.op = ALU_OP1_LOG_IEEE; 4996 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4997 alu.dst.sel = ctx->temp_reg; 4998 alu.dst.chan = i; 4999 alu.dst.write = 1; 5000 if (i == 2) 5001 alu.last = 1; 5002 r = r600_bytecode_add_alu(ctx->bc, &alu); 5003 if (r) 5004 return r; 5005 } 5006 5007 /* b * LOG2(a) */ 5008 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5009 alu.op = ALU_OP2_MUL; 5010 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 5011 alu.src[1].sel = ctx->temp_reg; 5012 alu.dst.sel = ctx->temp_reg; 5013 alu.dst.write = 1; 5014 alu.last = 1; 5015 r = r600_bytecode_add_alu(ctx->bc, &alu); 5016 if (r) 5017 return r; 5018 5019 for (i = 0; i < last_slot; i++) { 5020 /* POW(a,b) = EXP2(b * LOG2(a))*/ 5021 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5022 alu.op = ALU_OP1_EXP_IEEE; 5023 alu.src[0].sel = ctx->temp_reg; 5024 5025 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5026 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 5027 if (i == last_slot - 1) 5028 alu.last = 1; 5029 r = r600_bytecode_add_alu(ctx->bc, &alu); 5030 if (r) 5031 return r; 5032 } 5033 return 0; 5034} 5035 5036static int tgsi_pow(struct r600_shader_ctx *ctx) 5037{ 5038 struct r600_bytecode_alu alu; 5039 int r; 5040 5041 /* LOG2(a) */ 5042 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5043 alu.op = ALU_OP1_LOG_IEEE; 5044 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5045 alu.dst.sel = ctx->temp_reg; 5046 alu.dst.write = 1; 5047 alu.last = 1; 5048 r = r600_bytecode_add_alu(ctx->bc, &alu); 5049 if (r) 5050 return r; 5051 /* b * LOG2(a) */ 5052 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5053 alu.op = ALU_OP2_MUL; 5054 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 5055 alu.src[1].sel = ctx->temp_reg; 5056 alu.dst.sel = ctx->temp_reg; 5057 alu.dst.write = 1; 5058 alu.last = 1; 5059 r = r600_bytecode_add_alu(ctx->bc, &alu); 5060 if (r) 5061 return r; 5062 /* POW(a,b) = EXP2(b * LOG2(a))*/ 5063 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5064 alu.op = ALU_OP1_EXP_IEEE; 5065 alu.src[0].sel = ctx->temp_reg; 5066 alu.dst.sel = ctx->temp_reg; 5067 alu.dst.write = 1; 5068 alu.last = 1; 5069 r = r600_bytecode_add_alu(ctx->bc, &alu); 5070 if (r) 5071 return r; 5072 return tgsi_helper_tempx_replicate(ctx); 5073} 5074 5075static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 5076{ 5077 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5078 struct r600_bytecode_alu alu; 5079 int i, r, j; 5080 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5081 int tmp0 = ctx->temp_reg; 5082 int tmp1 = r600_get_temp(ctx); 5083 int tmp2 = r600_get_temp(ctx); 5084 int tmp3 = r600_get_temp(ctx); 5085 /* Unsigned path: 5086 * 5087 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 5088 * 5089 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 5090 * 2. tmp0.z = lo (tmp0.x * src2) 5091 * 3. tmp0.w = -tmp0.z 5092 * 4. tmp0.y = hi (tmp0.x * src2) 5093 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 5094 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 5095 * 7. tmp1.x = tmp0.x - tmp0.w 5096 * 8. tmp1.y = tmp0.x + tmp0.w 5097 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 5098 * 10. tmp0.z = hi(tmp0.x * src1) = q 5099 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 5100 * 5101 * 12. tmp0.w = src1 - tmp0.y = r 5102 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 5103 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 5104 * 5105 * if DIV 5106 * 5107 * 15. tmp1.z = tmp0.z + 1 = q + 1 5108 * 16. tmp1.w = tmp0.z - 1 = q - 1 5109 * 5110 * else MOD 5111 * 5112 * 15. tmp1.z = tmp0.w - src2 = r - src2 5113 * 16. tmp1.w = tmp0.w + src2 = r + src2 5114 * 5115 * endif 5116 * 5117 * 17. tmp1.x = tmp1.x & tmp1.y 5118 * 5119 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 5120 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 5121 * 5122 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 5123 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 5124 * 5125 * Signed path: 5126 * 5127 * Same as unsigned, using abs values of the operands, 5128 * and fixing the sign of the result in the end. 5129 */ 5130 5131 for (i = 0; i < 4; i++) { 5132 if (!(write_mask & (1<<i))) 5133 continue; 5134 5135 if (signed_op) { 5136 5137 /* tmp2.x = -src0 */ 5138 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5139 alu.op = ALU_OP2_SUB_INT; 5140 5141 alu.dst.sel = tmp2; 5142 alu.dst.chan = 0; 5143 alu.dst.write = 1; 5144 5145 alu.src[0].sel = V_SQ_ALU_SRC_0; 5146 5147 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5148 5149 alu.last = 1; 5150 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5151 return r; 5152 5153 /* tmp2.y = -src1 */ 5154 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5155 alu.op = ALU_OP2_SUB_INT; 5156 5157 alu.dst.sel = tmp2; 5158 alu.dst.chan = 1; 5159 alu.dst.write = 1; 5160 5161 alu.src[0].sel = V_SQ_ALU_SRC_0; 5162 5163 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5164 5165 alu.last = 1; 5166 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5167 return r; 5168 5169 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 5170 /* it will be a sign of the quotient */ 5171 if (!mod) { 5172 5173 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5174 alu.op = ALU_OP2_XOR_INT; 5175 5176 alu.dst.sel = tmp2; 5177 alu.dst.chan = 2; 5178 alu.dst.write = 1; 5179 5180 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5181 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5182 5183 alu.last = 1; 5184 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5185 return r; 5186 } 5187 5188 /* tmp2.x = |src0| */ 5189 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5190 alu.op = ALU_OP3_CNDGE_INT; 5191 alu.is_op3 = 1; 5192 5193 alu.dst.sel = tmp2; 5194 alu.dst.chan = 0; 5195 alu.dst.write = 1; 5196 5197 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5198 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5199 alu.src[2].sel = tmp2; 5200 alu.src[2].chan = 0; 5201 5202 alu.last = 1; 5203 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5204 return r; 5205 5206 /* tmp2.y = |src1| */ 5207 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5208 alu.op = ALU_OP3_CNDGE_INT; 5209 alu.is_op3 = 1; 5210 5211 alu.dst.sel = tmp2; 5212 alu.dst.chan = 1; 5213 alu.dst.write = 1; 5214 5215 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5216 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5217 alu.src[2].sel = tmp2; 5218 alu.src[2].chan = 1; 5219 5220 alu.last = 1; 5221 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5222 return r; 5223 5224 } 5225 5226 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 5227 if (ctx->bc->chip_class == CAYMAN) { 5228 /* tmp3.x = u2f(src2) */ 5229 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5230 alu.op = ALU_OP1_UINT_TO_FLT; 5231 5232 alu.dst.sel = tmp3; 5233 alu.dst.chan = 0; 5234 alu.dst.write = 1; 5235 5236 if (signed_op) { 5237 alu.src[0].sel = tmp2; 5238 alu.src[0].chan = 1; 5239 } else { 5240 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5241 } 5242 5243 alu.last = 1; 5244 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5245 return r; 5246 5247 /* tmp0.x = recip(tmp3.x) */ 5248 for (j = 0 ; j < 3; j++) { 5249 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5250 alu.op = ALU_OP1_RECIP_IEEE; 5251 5252 alu.dst.sel = tmp0; 5253 alu.dst.chan = j; 5254 alu.dst.write = (j == 0); 5255 5256 alu.src[0].sel = tmp3; 5257 alu.src[0].chan = 0; 5258 5259 if (j == 2) 5260 alu.last = 1; 5261 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5262 return r; 5263 } 5264 5265 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5266 alu.op = ALU_OP2_MUL; 5267 5268 alu.src[0].sel = tmp0; 5269 alu.src[0].chan = 0; 5270 5271 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 5272 alu.src[1].value = 0x4f800000; 5273 5274 alu.dst.sel = tmp3; 5275 alu.dst.write = 1; 5276 alu.last = 1; 5277 r = r600_bytecode_add_alu(ctx->bc, &alu); 5278 if (r) 5279 return r; 5280 5281 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5282 alu.op = ALU_OP1_FLT_TO_UINT; 5283 5284 alu.dst.sel = tmp0; 5285 alu.dst.chan = 0; 5286 alu.dst.write = 1; 5287 5288 alu.src[0].sel = tmp3; 5289 alu.src[0].chan = 0; 5290 5291 alu.last = 1; 5292 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5293 return r; 5294 5295 } else { 5296 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5297 alu.op = ALU_OP1_RECIP_UINT; 5298 5299 alu.dst.sel = tmp0; 5300 alu.dst.chan = 0; 5301 alu.dst.write = 1; 5302 5303 if (signed_op) { 5304 alu.src[0].sel = tmp2; 5305 alu.src[0].chan = 1; 5306 } else { 5307 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5308 } 5309 5310 alu.last = 1; 5311 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5312 return r; 5313 } 5314 5315 /* 2. tmp0.z = lo (tmp0.x * src2) */ 5316 if (ctx->bc->chip_class == CAYMAN) { 5317 for (j = 0 ; j < 4; j++) { 5318 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5319 alu.op = ALU_OP2_MULLO_UINT; 5320 5321 alu.dst.sel = tmp0; 5322 alu.dst.chan = j; 5323 alu.dst.write = (j == 2); 5324 5325 alu.src[0].sel = tmp0; 5326 alu.src[0].chan = 0; 5327 if (signed_op) { 5328 alu.src[1].sel = tmp2; 5329 alu.src[1].chan = 1; 5330 } else { 5331 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5332 } 5333 5334 alu.last = (j == 3); 5335 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5336 return r; 5337 } 5338 } else { 5339 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5340 alu.op = ALU_OP2_MULLO_UINT; 5341 5342 alu.dst.sel = tmp0; 5343 alu.dst.chan = 2; 5344 alu.dst.write = 1; 5345 5346 alu.src[0].sel = tmp0; 5347 alu.src[0].chan = 0; 5348 if (signed_op) { 5349 alu.src[1].sel = tmp2; 5350 alu.src[1].chan = 1; 5351 } else { 5352 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5353 } 5354 5355 alu.last = 1; 5356 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5357 return r; 5358 } 5359 5360 /* 3. tmp0.w = -tmp0.z */ 5361 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5362 alu.op = ALU_OP2_SUB_INT; 5363 5364 alu.dst.sel = tmp0; 5365 alu.dst.chan = 3; 5366 alu.dst.write = 1; 5367 5368 alu.src[0].sel = V_SQ_ALU_SRC_0; 5369 alu.src[1].sel = tmp0; 5370 alu.src[1].chan = 2; 5371 5372 alu.last = 1; 5373 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5374 return r; 5375 5376 /* 4. tmp0.y = hi (tmp0.x * src2) */ 5377 if (ctx->bc->chip_class == CAYMAN) { 5378 for (j = 0 ; j < 4; j++) { 5379 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5380 alu.op = ALU_OP2_MULHI_UINT; 5381 5382 alu.dst.sel = tmp0; 5383 alu.dst.chan = j; 5384 alu.dst.write = (j == 1); 5385 5386 alu.src[0].sel = tmp0; 5387 alu.src[0].chan = 0; 5388 5389 if (signed_op) { 5390 alu.src[1].sel = tmp2; 5391 alu.src[1].chan = 1; 5392 } else { 5393 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5394 } 5395 alu.last = (j == 3); 5396 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5397 return r; 5398 } 5399 } else { 5400 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5401 alu.op = ALU_OP2_MULHI_UINT; 5402 5403 alu.dst.sel = tmp0; 5404 alu.dst.chan = 1; 5405 alu.dst.write = 1; 5406 5407 alu.src[0].sel = tmp0; 5408 alu.src[0].chan = 0; 5409 5410 if (signed_op) { 5411 alu.src[1].sel = tmp2; 5412 alu.src[1].chan = 1; 5413 } else { 5414 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5415 } 5416 5417 alu.last = 1; 5418 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5419 return r; 5420 } 5421 5422 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 5423 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5424 alu.op = ALU_OP3_CNDE_INT; 5425 alu.is_op3 = 1; 5426 5427 alu.dst.sel = tmp0; 5428 alu.dst.chan = 2; 5429 alu.dst.write = 1; 5430 5431 alu.src[0].sel = tmp0; 5432 alu.src[0].chan = 1; 5433 alu.src[1].sel = tmp0; 5434 alu.src[1].chan = 3; 5435 alu.src[2].sel = tmp0; 5436 alu.src[2].chan = 2; 5437 5438 alu.last = 1; 5439 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5440 return r; 5441 5442 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 5443 if (ctx->bc->chip_class == CAYMAN) { 5444 for (j = 0 ; j < 4; j++) { 5445 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5446 alu.op = ALU_OP2_MULHI_UINT; 5447 5448 alu.dst.sel = tmp0; 5449 alu.dst.chan = j; 5450 alu.dst.write = (j == 3); 5451 5452 alu.src[0].sel = tmp0; 5453 alu.src[0].chan = 2; 5454 5455 alu.src[1].sel = tmp0; 5456 alu.src[1].chan = 0; 5457 5458 alu.last = (j == 3); 5459 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5460 return r; 5461 } 5462 } else { 5463 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5464 alu.op = ALU_OP2_MULHI_UINT; 5465 5466 alu.dst.sel = tmp0; 5467 alu.dst.chan = 3; 5468 alu.dst.write = 1; 5469 5470 alu.src[0].sel = tmp0; 5471 alu.src[0].chan = 2; 5472 5473 alu.src[1].sel = tmp0; 5474 alu.src[1].chan = 0; 5475 5476 alu.last = 1; 5477 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5478 return r; 5479 } 5480 5481 /* 7. tmp1.x = tmp0.x - tmp0.w */ 5482 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5483 alu.op = ALU_OP2_SUB_INT; 5484 5485 alu.dst.sel = tmp1; 5486 alu.dst.chan = 0; 5487 alu.dst.write = 1; 5488 5489 alu.src[0].sel = tmp0; 5490 alu.src[0].chan = 0; 5491 alu.src[1].sel = tmp0; 5492 alu.src[1].chan = 3; 5493 5494 alu.last = 1; 5495 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5496 return r; 5497 5498 /* 8. tmp1.y = tmp0.x + tmp0.w */ 5499 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5500 alu.op = ALU_OP2_ADD_INT; 5501 5502 alu.dst.sel = tmp1; 5503 alu.dst.chan = 1; 5504 alu.dst.write = 1; 5505 5506 alu.src[0].sel = tmp0; 5507 alu.src[0].chan = 0; 5508 alu.src[1].sel = tmp0; 5509 alu.src[1].chan = 3; 5510 5511 alu.last = 1; 5512 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5513 return r; 5514 5515 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 5516 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5517 alu.op = ALU_OP3_CNDE_INT; 5518 alu.is_op3 = 1; 5519 5520 alu.dst.sel = tmp0; 5521 alu.dst.chan = 0; 5522 alu.dst.write = 1; 5523 5524 alu.src[0].sel = tmp0; 5525 alu.src[0].chan = 1; 5526 alu.src[1].sel = tmp1; 5527 alu.src[1].chan = 1; 5528 alu.src[2].sel = tmp1; 5529 alu.src[2].chan = 0; 5530 5531 alu.last = 1; 5532 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5533 return r; 5534 5535 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 5536 if (ctx->bc->chip_class == CAYMAN) { 5537 for (j = 0 ; j < 4; j++) { 5538 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5539 alu.op = ALU_OP2_MULHI_UINT; 5540 5541 alu.dst.sel = tmp0; 5542 alu.dst.chan = j; 5543 alu.dst.write = (j == 2); 5544 5545 alu.src[0].sel = tmp0; 5546 alu.src[0].chan = 0; 5547 5548 if (signed_op) { 5549 alu.src[1].sel = tmp2; 5550 alu.src[1].chan = 0; 5551 } else { 5552 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5553 } 5554 5555 alu.last = (j == 3); 5556 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5557 return r; 5558 } 5559 } else { 5560 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5561 alu.op = ALU_OP2_MULHI_UINT; 5562 5563 alu.dst.sel = tmp0; 5564 alu.dst.chan = 2; 5565 alu.dst.write = 1; 5566 5567 alu.src[0].sel = tmp0; 5568 alu.src[0].chan = 0; 5569 5570 if (signed_op) { 5571 alu.src[1].sel = tmp2; 5572 alu.src[1].chan = 0; 5573 } else { 5574 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5575 } 5576 5577 alu.last = 1; 5578 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5579 return r; 5580 } 5581 5582 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 5583 if (ctx->bc->chip_class == CAYMAN) { 5584 for (j = 0 ; j < 4; j++) { 5585 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5586 alu.op = ALU_OP2_MULLO_UINT; 5587 5588 alu.dst.sel = tmp0; 5589 alu.dst.chan = j; 5590 alu.dst.write = (j == 1); 5591 5592 if (signed_op) { 5593 alu.src[0].sel = tmp2; 5594 alu.src[0].chan = 1; 5595 } else { 5596 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5597 } 5598 5599 alu.src[1].sel = tmp0; 5600 alu.src[1].chan = 2; 5601 5602 alu.last = (j == 3); 5603 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5604 return r; 5605 } 5606 } else { 5607 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5608 alu.op = ALU_OP2_MULLO_UINT; 5609 5610 alu.dst.sel = tmp0; 5611 alu.dst.chan = 1; 5612 alu.dst.write = 1; 5613 5614 if (signed_op) { 5615 alu.src[0].sel = tmp2; 5616 alu.src[0].chan = 1; 5617 } else { 5618 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5619 } 5620 5621 alu.src[1].sel = tmp0; 5622 alu.src[1].chan = 2; 5623 5624 alu.last = 1; 5625 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5626 return r; 5627 } 5628 5629 /* 12. tmp0.w = src1 - tmp0.y = r */ 5630 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5631 alu.op = ALU_OP2_SUB_INT; 5632 5633 alu.dst.sel = tmp0; 5634 alu.dst.chan = 3; 5635 alu.dst.write = 1; 5636 5637 if (signed_op) { 5638 alu.src[0].sel = tmp2; 5639 alu.src[0].chan = 0; 5640 } else { 5641 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5642 } 5643 5644 alu.src[1].sel = tmp0; 5645 alu.src[1].chan = 1; 5646 5647 alu.last = 1; 5648 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5649 return r; 5650 5651 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 5652 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5653 alu.op = ALU_OP2_SETGE_UINT; 5654 5655 alu.dst.sel = tmp1; 5656 alu.dst.chan = 0; 5657 alu.dst.write = 1; 5658 5659 alu.src[0].sel = tmp0; 5660 alu.src[0].chan = 3; 5661 if (signed_op) { 5662 alu.src[1].sel = tmp2; 5663 alu.src[1].chan = 1; 5664 } else { 5665 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5666 } 5667 5668 alu.last = 1; 5669 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5670 return r; 5671 5672 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 5673 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5674 alu.op = ALU_OP2_SETGE_UINT; 5675 5676 alu.dst.sel = tmp1; 5677 alu.dst.chan = 1; 5678 alu.dst.write = 1; 5679 5680 if (signed_op) { 5681 alu.src[0].sel = tmp2; 5682 alu.src[0].chan = 0; 5683 } else { 5684 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5685 } 5686 5687 alu.src[1].sel = tmp0; 5688 alu.src[1].chan = 1; 5689 5690 alu.last = 1; 5691 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5692 return r; 5693 5694 if (mod) { /* UMOD */ 5695 5696 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 5697 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5698 alu.op = ALU_OP2_SUB_INT; 5699 5700 alu.dst.sel = tmp1; 5701 alu.dst.chan = 2; 5702 alu.dst.write = 1; 5703 5704 alu.src[0].sel = tmp0; 5705 alu.src[0].chan = 3; 5706 5707 if (signed_op) { 5708 alu.src[1].sel = tmp2; 5709 alu.src[1].chan = 1; 5710 } else { 5711 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5712 } 5713 5714 alu.last = 1; 5715 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5716 return r; 5717 5718 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 5719 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5720 alu.op = ALU_OP2_ADD_INT; 5721 5722 alu.dst.sel = tmp1; 5723 alu.dst.chan = 3; 5724 alu.dst.write = 1; 5725 5726 alu.src[0].sel = tmp0; 5727 alu.src[0].chan = 3; 5728 if (signed_op) { 5729 alu.src[1].sel = tmp2; 5730 alu.src[1].chan = 1; 5731 } else { 5732 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5733 } 5734 5735 alu.last = 1; 5736 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5737 return r; 5738 5739 } else { /* UDIV */ 5740 5741 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 5742 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5743 alu.op = ALU_OP2_ADD_INT; 5744 5745 alu.dst.sel = tmp1; 5746 alu.dst.chan = 2; 5747 alu.dst.write = 1; 5748 5749 alu.src[0].sel = tmp0; 5750 alu.src[0].chan = 2; 5751 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 5752 5753 alu.last = 1; 5754 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5755 return r; 5756 5757 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 5758 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5759 alu.op = ALU_OP2_ADD_INT; 5760 5761 alu.dst.sel = tmp1; 5762 alu.dst.chan = 3; 5763 alu.dst.write = 1; 5764 5765 alu.src[0].sel = tmp0; 5766 alu.src[0].chan = 2; 5767 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 5768 5769 alu.last = 1; 5770 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5771 return r; 5772 5773 } 5774 5775 /* 17. tmp1.x = tmp1.x & tmp1.y */ 5776 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5777 alu.op = ALU_OP2_AND_INT; 5778 5779 alu.dst.sel = tmp1; 5780 alu.dst.chan = 0; 5781 alu.dst.write = 1; 5782 5783 alu.src[0].sel = tmp1; 5784 alu.src[0].chan = 0; 5785 alu.src[1].sel = tmp1; 5786 alu.src[1].chan = 1; 5787 5788 alu.last = 1; 5789 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5790 return r; 5791 5792 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 5793 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 5794 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5795 alu.op = ALU_OP3_CNDE_INT; 5796 alu.is_op3 = 1; 5797 5798 alu.dst.sel = tmp0; 5799 alu.dst.chan = 2; 5800 alu.dst.write = 1; 5801 5802 alu.src[0].sel = tmp1; 5803 alu.src[0].chan = 0; 5804 alu.src[1].sel = tmp0; 5805 alu.src[1].chan = mod ? 3 : 2; 5806 alu.src[2].sel = tmp1; 5807 alu.src[2].chan = 2; 5808 5809 alu.last = 1; 5810 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5811 return r; 5812 5813 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 5814 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5815 alu.op = ALU_OP3_CNDE_INT; 5816 alu.is_op3 = 1; 5817 5818 if (signed_op) { 5819 alu.dst.sel = tmp0; 5820 alu.dst.chan = 2; 5821 alu.dst.write = 1; 5822 } else { 5823 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5824 } 5825 5826 alu.src[0].sel = tmp1; 5827 alu.src[0].chan = 1; 5828 alu.src[1].sel = tmp1; 5829 alu.src[1].chan = 3; 5830 alu.src[2].sel = tmp0; 5831 alu.src[2].chan = 2; 5832 5833 alu.last = 1; 5834 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5835 return r; 5836 5837 if (signed_op) { 5838 5839 /* fix the sign of the result */ 5840 5841 if (mod) { 5842 5843 /* tmp0.x = -tmp0.z */ 5844 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5845 alu.op = ALU_OP2_SUB_INT; 5846 5847 alu.dst.sel = tmp0; 5848 alu.dst.chan = 0; 5849 alu.dst.write = 1; 5850 5851 alu.src[0].sel = V_SQ_ALU_SRC_0; 5852 alu.src[1].sel = tmp0; 5853 alu.src[1].chan = 2; 5854 5855 alu.last = 1; 5856 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5857 return r; 5858 5859 /* sign of the remainder is the same as the sign of src0 */ 5860 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 5861 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5862 alu.op = ALU_OP3_CNDGE_INT; 5863 alu.is_op3 = 1; 5864 5865 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5866 5867 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5868 alu.src[1].sel = tmp0; 5869 alu.src[1].chan = 2; 5870 alu.src[2].sel = tmp0; 5871 alu.src[2].chan = 0; 5872 5873 alu.last = 1; 5874 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5875 return r; 5876 5877 } else { 5878 5879 /* tmp0.x = -tmp0.z */ 5880 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5881 alu.op = ALU_OP2_SUB_INT; 5882 5883 alu.dst.sel = tmp0; 5884 alu.dst.chan = 0; 5885 alu.dst.write = 1; 5886 5887 alu.src[0].sel = V_SQ_ALU_SRC_0; 5888 alu.src[1].sel = tmp0; 5889 alu.src[1].chan = 2; 5890 5891 alu.last = 1; 5892 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5893 return r; 5894 5895 /* fix the quotient sign (same as the sign of src0*src1) */ 5896 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 5897 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5898 alu.op = ALU_OP3_CNDGE_INT; 5899 alu.is_op3 = 1; 5900 5901 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5902 5903 alu.src[0].sel = tmp2; 5904 alu.src[0].chan = 2; 5905 alu.src[1].sel = tmp0; 5906 alu.src[1].chan = 2; 5907 alu.src[2].sel = tmp0; 5908 alu.src[2].chan = 0; 5909 5910 alu.last = 1; 5911 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5912 return r; 5913 } 5914 } 5915 } 5916 return 0; 5917} 5918 5919static int tgsi_udiv(struct r600_shader_ctx *ctx) 5920{ 5921 return tgsi_divmod(ctx, 0, 0); 5922} 5923 5924static int tgsi_umod(struct r600_shader_ctx *ctx) 5925{ 5926 return tgsi_divmod(ctx, 1, 0); 5927} 5928 5929static int tgsi_idiv(struct r600_shader_ctx *ctx) 5930{ 5931 return tgsi_divmod(ctx, 0, 1); 5932} 5933 5934static int tgsi_imod(struct r600_shader_ctx *ctx) 5935{ 5936 return tgsi_divmod(ctx, 1, 1); 5937} 5938 5939 5940static int tgsi_f2i(struct r600_shader_ctx *ctx) 5941{ 5942 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5943 struct r600_bytecode_alu alu; 5944 int i, r; 5945 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5946 int last_inst = tgsi_last_instruction(write_mask); 5947 5948 for (i = 0; i < 4; i++) { 5949 if (!(write_mask & (1<<i))) 5950 continue; 5951 5952 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5953 alu.op = ALU_OP1_TRUNC; 5954 5955 alu.dst.sel = ctx->temp_reg; 5956 alu.dst.chan = i; 5957 alu.dst.write = 1; 5958 5959 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5960 if (i == last_inst) 5961 alu.last = 1; 5962 r = r600_bytecode_add_alu(ctx->bc, &alu); 5963 if (r) 5964 return r; 5965 } 5966 5967 for (i = 0; i < 4; i++) { 5968 if (!(write_mask & (1<<i))) 5969 continue; 5970 5971 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5972 alu.op = ctx->inst_info->op; 5973 5974 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5975 5976 alu.src[0].sel = ctx->temp_reg; 5977 alu.src[0].chan = i; 5978 5979 if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT) 5980 alu.last = 1; 5981 r = r600_bytecode_add_alu(ctx->bc, &alu); 5982 if (r) 5983 return r; 5984 } 5985 5986 return 0; 5987} 5988 5989static int tgsi_iabs(struct r600_shader_ctx *ctx) 5990{ 5991 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5992 struct r600_bytecode_alu alu; 5993 int i, r; 5994 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5995 int last_inst = tgsi_last_instruction(write_mask); 5996 5997 /* tmp = -src */ 5998 for (i = 0; i < 4; i++) { 5999 if (!(write_mask & (1<<i))) 6000 continue; 6001 6002 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6003 alu.op = ALU_OP2_SUB_INT; 6004 6005 alu.dst.sel = ctx->temp_reg; 6006 alu.dst.chan = i; 6007 alu.dst.write = 1; 6008 6009 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6010 alu.src[0].sel = V_SQ_ALU_SRC_0; 6011 6012 if (i == last_inst) 6013 alu.last = 1; 6014 r = r600_bytecode_add_alu(ctx->bc, &alu); 6015 if (r) 6016 return r; 6017 } 6018 6019 /* dst = (src >= 0 ? src : tmp) */ 6020 for (i = 0; i < 4; i++) { 6021 if (!(write_mask & (1<<i))) 6022 continue; 6023 6024 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6025 alu.op = ALU_OP3_CNDGE_INT; 6026 alu.is_op3 = 1; 6027 alu.dst.write = 1; 6028 6029 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6030 6031 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6032 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6033 alu.src[2].sel = ctx->temp_reg; 6034 alu.src[2].chan = i; 6035 6036 if (i == last_inst) 6037 alu.last = 1; 6038 r = r600_bytecode_add_alu(ctx->bc, &alu); 6039 if (r) 6040 return r; 6041 } 6042 return 0; 6043} 6044 6045static int tgsi_issg(struct r600_shader_ctx *ctx) 6046{ 6047 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6048 struct r600_bytecode_alu alu; 6049 int i, r; 6050 unsigned write_mask = inst->Dst[0].Register.WriteMask; 6051 int last_inst = tgsi_last_instruction(write_mask); 6052 6053 /* tmp = (src >= 0 ? src : -1) */ 6054 for (i = 0; i < 4; i++) { 6055 if (!(write_mask & (1<<i))) 6056 continue; 6057 6058 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6059 alu.op = ALU_OP3_CNDGE_INT; 6060 alu.is_op3 = 1; 6061 6062 alu.dst.sel = ctx->temp_reg; 6063 alu.dst.chan = i; 6064 alu.dst.write = 1; 6065 6066 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6067 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6068 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 6069 6070 if (i == last_inst) 6071 alu.last = 1; 6072 r = r600_bytecode_add_alu(ctx->bc, &alu); 6073 if (r) 6074 return r; 6075 } 6076 6077 /* dst = (tmp > 0 ? 1 : tmp) */ 6078 for (i = 0; i < 4; i++) { 6079 if (!(write_mask & (1<<i))) 6080 continue; 6081 6082 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6083 alu.op = ALU_OP3_CNDGT_INT; 6084 alu.is_op3 = 1; 6085 alu.dst.write = 1; 6086 6087 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6088 6089 alu.src[0].sel = ctx->temp_reg; 6090 alu.src[0].chan = i; 6091 6092 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 6093 6094 alu.src[2].sel = ctx->temp_reg; 6095 alu.src[2].chan = i; 6096 6097 if (i == last_inst) 6098 alu.last = 1; 6099 r = r600_bytecode_add_alu(ctx->bc, &alu); 6100 if (r) 6101 return r; 6102 } 6103 return 0; 6104} 6105 6106 6107 6108static int tgsi_ssg(struct r600_shader_ctx *ctx) 6109{ 6110 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6111 struct r600_bytecode_alu alu; 6112 int i, r; 6113 6114 /* tmp = (src > 0 ? 1 : src) */ 6115 for (i = 0; i < 4; i++) { 6116 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6117 alu.op = ALU_OP3_CNDGT; 6118 alu.is_op3 = 1; 6119 6120 alu.dst.sel = ctx->temp_reg; 6121 alu.dst.chan = i; 6122 6123 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6124 alu.src[1].sel = V_SQ_ALU_SRC_1; 6125 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 6126 6127 if (i == 3) 6128 alu.last = 1; 6129 r = r600_bytecode_add_alu(ctx->bc, &alu); 6130 if (r) 6131 return r; 6132 } 6133 6134 /* dst = (-tmp > 0 ? -1 : tmp) */ 6135 for (i = 0; i < 4; i++) { 6136 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6137 alu.op = ALU_OP3_CNDGT; 6138 alu.is_op3 = 1; 6139 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6140 6141 alu.src[0].sel = ctx->temp_reg; 6142 alu.src[0].chan = i; 6143 alu.src[0].neg = 1; 6144 6145 alu.src[1].sel = V_SQ_ALU_SRC_1; 6146 alu.src[1].neg = 1; 6147 6148 alu.src[2].sel = ctx->temp_reg; 6149 alu.src[2].chan = i; 6150 6151 if (i == 3) 6152 alu.last = 1; 6153 r = r600_bytecode_add_alu(ctx->bc, &alu); 6154 if (r) 6155 return r; 6156 } 6157 return 0; 6158} 6159 6160static int tgsi_bfi(struct r600_shader_ctx *ctx) 6161{ 6162 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6163 struct r600_bytecode_alu alu; 6164 int i, r, t1, t2; 6165 6166 unsigned write_mask = inst->Dst[0].Register.WriteMask; 6167 int last_inst = tgsi_last_instruction(write_mask); 6168 6169 t1 = ctx->temp_reg; 6170 6171 for (i = 0; i < 4; i++) { 6172 if (!(write_mask & (1<<i))) 6173 continue; 6174 6175 /* create mask tmp */ 6176 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6177 alu.op = ALU_OP2_BFM_INT; 6178 alu.dst.sel = t1; 6179 alu.dst.chan = i; 6180 alu.dst.write = 1; 6181 alu.last = i == last_inst; 6182 6183 r600_bytecode_src(&alu.src[0], &ctx->src[3], i); 6184 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6185 6186 r = r600_bytecode_add_alu(ctx->bc, &alu); 6187 if (r) 6188 return r; 6189 } 6190 6191 t2 = r600_get_temp(ctx); 6192 6193 for (i = 0; i < 4; i++) { 6194 if (!(write_mask & (1<<i))) 6195 continue; 6196 6197 /* shift insert left */ 6198 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6199 alu.op = ALU_OP2_LSHL_INT; 6200 alu.dst.sel = t2; 6201 alu.dst.chan = i; 6202 alu.dst.write = 1; 6203 alu.last = i == last_inst; 6204 6205 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 6206 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6207 6208 r = r600_bytecode_add_alu(ctx->bc, &alu); 6209 if (r) 6210 return r; 6211 } 6212 6213 for (i = 0; i < 4; i++) { 6214 if (!(write_mask & (1<<i))) 6215 continue; 6216 6217 /* actual bitfield insert */ 6218 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6219 alu.op = ALU_OP3_BFI_INT; 6220 alu.is_op3 = 1; 6221 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6222 alu.dst.chan = i; 6223 alu.dst.write = 1; 6224 alu.last = i == last_inst; 6225 6226 alu.src[0].sel = t1; 6227 alu.src[0].chan = i; 6228 alu.src[1].sel = t2; 6229 alu.src[1].chan = i; 6230 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 6231 6232 r = r600_bytecode_add_alu(ctx->bc, &alu); 6233 if (r) 6234 return r; 6235 } 6236 6237 return 0; 6238} 6239 6240static int tgsi_msb(struct r600_shader_ctx *ctx) 6241{ 6242 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6243 struct r600_bytecode_alu alu; 6244 int i, r, t1, t2; 6245 6246 unsigned write_mask = inst->Dst[0].Register.WriteMask; 6247 int last_inst = tgsi_last_instruction(write_mask); 6248 6249 assert(ctx->inst_info->op == ALU_OP1_FFBH_INT || 6250 ctx->inst_info->op == ALU_OP1_FFBH_UINT); 6251 6252 t1 = ctx->temp_reg; 6253 6254 /* bit position is indexed from lsb by TGSI, and from msb by the hardware */ 6255 for (i = 0; i < 4; i++) { 6256 if (!(write_mask & (1<<i))) 6257 continue; 6258 6259 /* t1 = FFBH_INT / FFBH_UINT */ 6260 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6261 alu.op = ctx->inst_info->op; 6262 alu.dst.sel = t1; 6263 alu.dst.chan = i; 6264 alu.dst.write = 1; 6265 alu.last = i == last_inst; 6266 6267 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6268 6269 r = r600_bytecode_add_alu(ctx->bc, &alu); 6270 if (r) 6271 return r; 6272 } 6273 6274 t2 = r600_get_temp(ctx); 6275 6276 for (i = 0; i < 4; i++) { 6277 if (!(write_mask & (1<<i))) 6278 continue; 6279 6280 /* t2 = 31 - t1 */ 6281 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6282 alu.op = ALU_OP2_SUB_INT; 6283 alu.dst.sel = t2; 6284 alu.dst.chan = i; 6285 alu.dst.write = 1; 6286 alu.last = i == last_inst; 6287 6288 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 6289 alu.src[0].value = 31; 6290 alu.src[1].sel = t1; 6291 alu.src[1].chan = i; 6292 6293 r = r600_bytecode_add_alu(ctx->bc, &alu); 6294 if (r) 6295 return r; 6296 } 6297 6298 for (i = 0; i < 4; i++) { 6299 if (!(write_mask & (1<<i))) 6300 continue; 6301 6302 /* result = t1 >= 0 ? t2 : t1 */ 6303 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6304 alu.op = ALU_OP3_CNDGE_INT; 6305 alu.is_op3 = 1; 6306 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6307 alu.dst.chan = i; 6308 alu.dst.write = 1; 6309 alu.last = i == last_inst; 6310 6311 alu.src[0].sel = t1; 6312 alu.src[0].chan = i; 6313 alu.src[1].sel = t2; 6314 alu.src[1].chan = i; 6315 alu.src[2].sel = t1; 6316 alu.src[2].chan = i; 6317 6318 r = r600_bytecode_add_alu(ctx->bc, &alu); 6319 if (r) 6320 return r; 6321 } 6322 6323 return 0; 6324} 6325 6326static int tgsi_interp_egcm(struct r600_shader_ctx *ctx) 6327{ 6328 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6329 struct r600_bytecode_alu alu; 6330 int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti; 6331 unsigned location; 6332 int input; 6333 6334 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); 6335 6336 input = inst->Src[0].Register.Index; 6337 6338 /* Interpolators have been marked for use already by allocate_system_value_inputs */ 6339 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6340 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6341 location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */ 6342 } 6343 else { 6344 location = TGSI_INTERPOLATE_LOC_CENTROID; 6345 } 6346 6347 k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location); 6348 if (k < 0) 6349 k = 0; 6350 interp_gpr = ctx->eg_interpolators[k].ij_index / 2; 6351 interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2); 6352 6353 /* NOTE: currently offset is not perspective correct */ 6354 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6355 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6356 int sample_gpr = -1; 6357 int gradientsH, gradientsV; 6358 struct r600_bytecode_tex tex; 6359 6360 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6361 sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]); 6362 } 6363 6364 gradientsH = r600_get_temp(ctx); 6365 gradientsV = r600_get_temp(ctx); 6366 for (i = 0; i < 2; i++) { 6367 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6368 tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V; 6369 tex.src_gpr = interp_gpr; 6370 tex.src_sel_x = interp_base_chan + 0; 6371 tex.src_sel_y = interp_base_chan + 1; 6372 tex.src_sel_z = 0; 6373 tex.src_sel_w = 0; 6374 tex.dst_gpr = i == 0 ? gradientsH : gradientsV; 6375 tex.dst_sel_x = 0; 6376 tex.dst_sel_y = 1; 6377 tex.dst_sel_z = 7; 6378 tex.dst_sel_w = 7; 6379 tex.inst_mod = 1; // Use per pixel gradient calculation 6380 tex.sampler_id = 0; 6381 tex.resource_id = tex.sampler_id; 6382 r = r600_bytecode_add_tex(ctx->bc, &tex); 6383 if (r) 6384 return r; 6385 } 6386 6387 for (i = 0; i < 2; i++) { 6388 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6389 alu.op = ALU_OP3_MULADD; 6390 alu.is_op3 = 1; 6391 alu.src[0].sel = gradientsH; 6392 alu.src[0].chan = i; 6393 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6394 alu.src[1].sel = sample_gpr; 6395 alu.src[1].chan = 2; 6396 } 6397 else { 6398 r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 6399 } 6400 alu.src[2].sel = interp_gpr; 6401 alu.src[2].chan = interp_base_chan + i; 6402 alu.dst.sel = ctx->temp_reg; 6403 alu.dst.chan = i; 6404 alu.last = i == 1; 6405 6406 r = r600_bytecode_add_alu(ctx->bc, &alu); 6407 if (r) 6408 return r; 6409 } 6410 6411 for (i = 0; i < 2; i++) { 6412 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6413 alu.op = ALU_OP3_MULADD; 6414 alu.is_op3 = 1; 6415 alu.src[0].sel = gradientsV; 6416 alu.src[0].chan = i; 6417 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6418 alu.src[1].sel = sample_gpr; 6419 alu.src[1].chan = 3; 6420 } 6421 else { 6422 r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 6423 } 6424 alu.src[2].sel = ctx->temp_reg; 6425 alu.src[2].chan = i; 6426 alu.dst.sel = ctx->temp_reg; 6427 alu.dst.chan = i; 6428 alu.last = i == 1; 6429 6430 r = r600_bytecode_add_alu(ctx->bc, &alu); 6431 if (r) 6432 return r; 6433 } 6434 } 6435 6436 tmp = r600_get_temp(ctx); 6437 for (i = 0; i < 8; i++) { 6438 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6439 alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY; 6440 6441 alu.dst.sel = tmp; 6442 if ((i > 1 && i < 6)) { 6443 alu.dst.write = 1; 6444 } 6445 else { 6446 alu.dst.write = 0; 6447 } 6448 alu.dst.chan = i % 4; 6449 6450 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6451 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6452 alu.src[0].sel = ctx->temp_reg; 6453 alu.src[0].chan = 1 - (i % 2); 6454 } else { 6455 alu.src[0].sel = interp_gpr; 6456 alu.src[0].chan = interp_base_chan + 1 - (i % 2); 6457 } 6458 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 6459 alu.src[1].chan = 0; 6460 6461 alu.last = i % 4 == 3; 6462 alu.bank_swizzle_force = SQ_ALU_VEC_210; 6463 6464 r = r600_bytecode_add_alu(ctx->bc, &alu); 6465 if (r) 6466 return r; 6467 } 6468 6469 // INTERP can't swizzle dst 6470 lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6471 for (i = 0; i <= lasti; i++) { 6472 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6473 continue; 6474 6475 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6476 alu.op = ALU_OP1_MOV; 6477 alu.src[0].sel = tmp; 6478 alu.src[0].chan = ctx->src[0].swizzle[i]; 6479 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6480 alu.dst.write = 1; 6481 alu.last = i == lasti; 6482 r = r600_bytecode_add_alu(ctx->bc, &alu); 6483 if (r) 6484 return r; 6485 } 6486 6487 return 0; 6488} 6489 6490 6491static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 6492{ 6493 struct r600_bytecode_alu alu; 6494 int i, r; 6495 6496 for (i = 0; i < 4; i++) { 6497 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6498 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 6499 alu.op = ALU_OP0_NOP; 6500 alu.dst.chan = i; 6501 } else { 6502 alu.op = ALU_OP1_MOV; 6503 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6504 alu.src[0].sel = ctx->temp_reg; 6505 alu.src[0].chan = i; 6506 } 6507 if (i == 3) { 6508 alu.last = 1; 6509 } 6510 r = r600_bytecode_add_alu(ctx->bc, &alu); 6511 if (r) 6512 return r; 6513 } 6514 return 0; 6515} 6516 6517static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, 6518 unsigned temp, int chan, 6519 struct r600_bytecode_alu_src *bc_src, 6520 const struct r600_shader_src *shader_src) 6521{ 6522 struct r600_bytecode_alu alu; 6523 int r; 6524 6525 r600_bytecode_src(bc_src, shader_src, chan); 6526 6527 /* op3 operands don't support abs modifier */ 6528 if (bc_src->abs) { 6529 assert(temp!=0); /* we actually need the extra register, make sure it is allocated. */ 6530 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6531 alu.op = ALU_OP1_MOV; 6532 alu.dst.sel = temp; 6533 alu.dst.chan = chan; 6534 alu.dst.write = 1; 6535 6536 alu.src[0] = *bc_src; 6537 alu.last = true; // sufficient? 6538 r = r600_bytecode_add_alu(ctx->bc, &alu); 6539 if (r) 6540 return r; 6541 6542 memset(bc_src, 0, sizeof(*bc_src)); 6543 bc_src->sel = temp; 6544 bc_src->chan = chan; 6545 } 6546 return 0; 6547} 6548 6549static int tgsi_op3(struct r600_shader_ctx *ctx) 6550{ 6551 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6552 struct r600_bytecode_alu alu; 6553 int i, j, r; 6554 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6555 int temp_regs[4]; 6556 6557 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6558 temp_regs[j] = 0; 6559 if (ctx->src[j].abs) 6560 temp_regs[j] = r600_get_temp(ctx); 6561 } 6562 for (i = 0; i < lasti + 1; i++) { 6563 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6564 continue; 6565 6566 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6567 alu.op = ctx->inst_info->op; 6568 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6569 r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]); 6570 if (r) 6571 return r; 6572 } 6573 6574 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6575 alu.dst.chan = i; 6576 alu.dst.write = 1; 6577 alu.is_op3 = 1; 6578 if (i == lasti) { 6579 alu.last = 1; 6580 } 6581 r = r600_bytecode_add_alu(ctx->bc, &alu); 6582 if (r) 6583 return r; 6584 } 6585 return 0; 6586} 6587 6588static int tgsi_dp(struct r600_shader_ctx *ctx) 6589{ 6590 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6591 struct r600_bytecode_alu alu; 6592 int i, j, r; 6593 6594 for (i = 0; i < 4; i++) { 6595 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6596 alu.op = ctx->inst_info->op; 6597 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6598 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 6599 } 6600 6601 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6602 alu.dst.chan = i; 6603 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 6604 /* handle some special cases */ 6605 switch (inst->Instruction.Opcode) { 6606 case TGSI_OPCODE_DP2: 6607 if (i > 1) { 6608 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 6609 alu.src[0].chan = alu.src[1].chan = 0; 6610 } 6611 break; 6612 case TGSI_OPCODE_DP3: 6613 if (i > 2) { 6614 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 6615 alu.src[0].chan = alu.src[1].chan = 0; 6616 } 6617 break; 6618 case TGSI_OPCODE_DPH: 6619 if (i == 3) { 6620 alu.src[0].sel = V_SQ_ALU_SRC_1; 6621 alu.src[0].chan = 0; 6622 alu.src[0].neg = 0; 6623 } 6624 break; 6625 default: 6626 break; 6627 } 6628 if (i == 3) { 6629 alu.last = 1; 6630 } 6631 r = r600_bytecode_add_alu(ctx->bc, &alu); 6632 if (r) 6633 return r; 6634 } 6635 return 0; 6636} 6637 6638static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 6639 unsigned index) 6640{ 6641 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6642 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 6643 inst->Src[index].Register.File != TGSI_FILE_INPUT && 6644 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 6645 ctx->src[index].neg || ctx->src[index].abs || 6646 (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == TGSI_PROCESSOR_GEOMETRY); 6647} 6648 6649static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 6650 unsigned index) 6651{ 6652 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6653 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 6654} 6655 6656static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) 6657{ 6658 struct r600_bytecode_vtx vtx; 6659 struct r600_bytecode_alu alu; 6660 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6661 int src_gpr, r, i; 6662 int id = tgsi_tex_get_src_gpr(ctx, 1); 6663 6664 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 6665 if (src_requires_loading) { 6666 for (i = 0; i < 4; i++) { 6667 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6668 alu.op = ALU_OP1_MOV; 6669 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6670 alu.dst.sel = ctx->temp_reg; 6671 alu.dst.chan = i; 6672 if (i == 3) 6673 alu.last = 1; 6674 alu.dst.write = 1; 6675 r = r600_bytecode_add_alu(ctx->bc, &alu); 6676 if (r) 6677 return r; 6678 } 6679 src_gpr = ctx->temp_reg; 6680 } 6681 6682 memset(&vtx, 0, sizeof(vtx)); 6683 vtx.op = FETCH_OP_VFETCH; 6684 vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; 6685 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 6686 vtx.src_gpr = src_gpr; 6687 vtx.mega_fetch_count = 16; 6688 vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 6689 vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 6690 vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 6691 vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 6692 vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 6693 vtx.use_const_fields = 1; 6694 6695 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 6696 return r; 6697 6698 if (ctx->bc->chip_class >= EVERGREEN) 6699 return 0; 6700 6701 for (i = 0; i < 4; i++) { 6702 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6703 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6704 continue; 6705 6706 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6707 alu.op = ALU_OP2_AND_INT; 6708 6709 alu.dst.chan = i; 6710 alu.dst.sel = vtx.dst_gpr; 6711 alu.dst.write = 1; 6712 6713 alu.src[0].sel = vtx.dst_gpr; 6714 alu.src[0].chan = i; 6715 6716 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL; 6717 alu.src[1].sel += (id * 2); 6718 alu.src[1].chan = i % 4; 6719 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6720 6721 if (i == lasti) 6722 alu.last = 1; 6723 r = r600_bytecode_add_alu(ctx->bc, &alu); 6724 if (r) 6725 return r; 6726 } 6727 6728 if (inst->Dst[0].Register.WriteMask & 3) { 6729 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6730 alu.op = ALU_OP2_OR_INT; 6731 6732 alu.dst.chan = 3; 6733 alu.dst.sel = vtx.dst_gpr; 6734 alu.dst.write = 1; 6735 6736 alu.src[0].sel = vtx.dst_gpr; 6737 alu.src[0].chan = 3; 6738 6739 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1; 6740 alu.src[1].chan = 0; 6741 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6742 6743 alu.last = 1; 6744 r = r600_bytecode_add_alu(ctx->bc, &alu); 6745 if (r) 6746 return r; 6747 } 6748 return 0; 6749} 6750 6751static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) 6752{ 6753 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6754 struct r600_bytecode_alu alu; 6755 int r; 6756 int id = tgsi_tex_get_src_gpr(ctx, 1); 6757 6758 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6759 alu.op = ALU_OP1_MOV; 6760 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 6761 if (ctx->bc->chip_class >= EVERGREEN) { 6762 /* channel 0 or 2 of each word */ 6763 alu.src[0].sel += (id / 2); 6764 alu.src[0].chan = (id % 2) * 2; 6765 } else { 6766 /* r600 we have them at channel 2 of the second dword */ 6767 alu.src[0].sel += (id * 2) + 1; 6768 alu.src[0].chan = 1; 6769 } 6770 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6771 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 6772 alu.last = 1; 6773 r = r600_bytecode_add_alu(ctx->bc, &alu); 6774 if (r) 6775 return r; 6776 return 0; 6777} 6778 6779static int tgsi_tex(struct r600_shader_ctx *ctx) 6780{ 6781 static float one_point_five = 1.5f; 6782 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6783 struct r600_bytecode_tex tex; 6784 struct r600_bytecode_alu alu; 6785 unsigned src_gpr; 6786 int r, i, j; 6787 int opcode; 6788 bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing && 6789 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 6790 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || 6791 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); 6792 6793 bool txf_add_offsets = inst->Texture.NumOffsets && 6794 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 6795 inst->Texture.Texture != TGSI_TEXTURE_BUFFER; 6796 6797 /* Texture fetch instructions can only use gprs as source. 6798 * Also they cannot negate the source or take the absolute value */ 6799 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && 6800 inst->Instruction.Opcode != TGSI_OPCODE_TXQS && 6801 tgsi_tex_src_requires_loading(ctx, 0)) || 6802 read_compressed_msaa || txf_add_offsets; 6803 6804 boolean src_loaded = FALSE; 6805 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; 6806 int8_t offset_x = 0, offset_y = 0, offset_z = 0; 6807 boolean has_txq_cube_array_z = false; 6808 unsigned sampler_index_mode; 6809 6810 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && 6811 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6812 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) 6813 if (inst->Dst[0].Register.WriteMask & 4) { 6814 ctx->shader->has_txq_cube_array_z_comp = true; 6815 has_txq_cube_array_z = true; 6816 } 6817 6818 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 6819 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 6820 inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || 6821 inst->Instruction.Opcode == TGSI_OPCODE_TG4) 6822 sampler_src_reg = 2; 6823 6824 /* TGSI moves the sampler to src reg 3 for TXD */ 6825 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) 6826 sampler_src_reg = 3; 6827 6828 sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 6829 6830 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 6831 6832 if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { 6833 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { 6834 ctx->shader->uses_tex_buffers = true; 6835 return r600_do_buffer_txq(ctx); 6836 } 6837 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 6838 if (ctx->bc->chip_class < EVERGREEN) 6839 ctx->shader->uses_tex_buffers = true; 6840 return do_vtx_fetch_inst(ctx, src_requires_loading); 6841 } 6842 } 6843 6844 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 6845 int out_chan; 6846 /* Add perspective divide */ 6847 if (ctx->bc->chip_class == CAYMAN) { 6848 out_chan = 2; 6849 for (i = 0; i < 3; i++) { 6850 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6851 alu.op = ALU_OP1_RECIP_IEEE; 6852 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6853 6854 alu.dst.sel = ctx->temp_reg; 6855 alu.dst.chan = i; 6856 if (i == 2) 6857 alu.last = 1; 6858 if (out_chan == i) 6859 alu.dst.write = 1; 6860 r = r600_bytecode_add_alu(ctx->bc, &alu); 6861 if (r) 6862 return r; 6863 } 6864 6865 } else { 6866 out_chan = 3; 6867 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6868 alu.op = ALU_OP1_RECIP_IEEE; 6869 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6870 6871 alu.dst.sel = ctx->temp_reg; 6872 alu.dst.chan = out_chan; 6873 alu.last = 1; 6874 alu.dst.write = 1; 6875 r = r600_bytecode_add_alu(ctx->bc, &alu); 6876 if (r) 6877 return r; 6878 } 6879 6880 for (i = 0; i < 3; i++) { 6881 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6882 alu.op = ALU_OP2_MUL; 6883 alu.src[0].sel = ctx->temp_reg; 6884 alu.src[0].chan = out_chan; 6885 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6886 alu.dst.sel = ctx->temp_reg; 6887 alu.dst.chan = i; 6888 alu.dst.write = 1; 6889 r = r600_bytecode_add_alu(ctx->bc, &alu); 6890 if (r) 6891 return r; 6892 } 6893 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6894 alu.op = ALU_OP1_MOV; 6895 alu.src[0].sel = V_SQ_ALU_SRC_1; 6896 alu.src[0].chan = 0; 6897 alu.dst.sel = ctx->temp_reg; 6898 alu.dst.chan = 3; 6899 alu.last = 1; 6900 alu.dst.write = 1; 6901 r = r600_bytecode_add_alu(ctx->bc, &alu); 6902 if (r) 6903 return r; 6904 src_loaded = TRUE; 6905 src_gpr = ctx->temp_reg; 6906 } 6907 6908 6909 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 6910 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6911 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6912 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 6913 inst->Instruction.Opcode != TGSI_OPCODE_TXQ && 6914 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { 6915 6916 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 6917 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 6918 6919 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 6920 for (i = 0; i < 4; i++) { 6921 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6922 alu.op = ALU_OP2_CUBE; 6923 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 6924 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 6925 alu.dst.sel = ctx->temp_reg; 6926 alu.dst.chan = i; 6927 if (i == 3) 6928 alu.last = 1; 6929 alu.dst.write = 1; 6930 r = r600_bytecode_add_alu(ctx->bc, &alu); 6931 if (r) 6932 return r; 6933 } 6934 6935 /* tmp1.z = RCP_e(|tmp1.z|) */ 6936 if (ctx->bc->chip_class == CAYMAN) { 6937 for (i = 0; i < 3; i++) { 6938 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6939 alu.op = ALU_OP1_RECIP_IEEE; 6940 alu.src[0].sel = ctx->temp_reg; 6941 alu.src[0].chan = 2; 6942 alu.src[0].abs = 1; 6943 alu.dst.sel = ctx->temp_reg; 6944 alu.dst.chan = i; 6945 if (i == 2) 6946 alu.dst.write = 1; 6947 if (i == 2) 6948 alu.last = 1; 6949 r = r600_bytecode_add_alu(ctx->bc, &alu); 6950 if (r) 6951 return r; 6952 } 6953 } else { 6954 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6955 alu.op = ALU_OP1_RECIP_IEEE; 6956 alu.src[0].sel = ctx->temp_reg; 6957 alu.src[0].chan = 2; 6958 alu.src[0].abs = 1; 6959 alu.dst.sel = ctx->temp_reg; 6960 alu.dst.chan = 2; 6961 alu.dst.write = 1; 6962 alu.last = 1; 6963 r = r600_bytecode_add_alu(ctx->bc, &alu); 6964 if (r) 6965 return r; 6966 } 6967 6968 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 6969 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 6970 * muladd has no writemask, have to use another temp 6971 */ 6972 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6973 alu.op = ALU_OP3_MULADD; 6974 alu.is_op3 = 1; 6975 6976 alu.src[0].sel = ctx->temp_reg; 6977 alu.src[0].chan = 0; 6978 alu.src[1].sel = ctx->temp_reg; 6979 alu.src[1].chan = 2; 6980 6981 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 6982 alu.src[2].chan = 0; 6983 alu.src[2].value = *(uint32_t *)&one_point_five; 6984 6985 alu.dst.sel = ctx->temp_reg; 6986 alu.dst.chan = 0; 6987 alu.dst.write = 1; 6988 6989 r = r600_bytecode_add_alu(ctx->bc, &alu); 6990 if (r) 6991 return r; 6992 6993 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6994 alu.op = ALU_OP3_MULADD; 6995 alu.is_op3 = 1; 6996 6997 alu.src[0].sel = ctx->temp_reg; 6998 alu.src[0].chan = 1; 6999 alu.src[1].sel = ctx->temp_reg; 7000 alu.src[1].chan = 2; 7001 7002 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 7003 alu.src[2].chan = 0; 7004 alu.src[2].value = *(uint32_t *)&one_point_five; 7005 7006 alu.dst.sel = ctx->temp_reg; 7007 alu.dst.chan = 1; 7008 alu.dst.write = 1; 7009 7010 alu.last = 1; 7011 r = r600_bytecode_add_alu(ctx->bc, &alu); 7012 if (r) 7013 return r; 7014 /* write initial compare value into Z component 7015 - W src 0 for shadow cube 7016 - X src 1 for shadow cube array */ 7017 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7018 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7019 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7020 alu.op = ALU_OP1_MOV; 7021 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) 7022 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 7023 else 7024 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7025 alu.dst.sel = ctx->temp_reg; 7026 alu.dst.chan = 2; 7027 alu.dst.write = 1; 7028 alu.last = 1; 7029 r = r600_bytecode_add_alu(ctx->bc, &alu); 7030 if (r) 7031 return r; 7032 } 7033 7034 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7035 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7036 if (ctx->bc->chip_class >= EVERGREEN) { 7037 int mytmp = r600_get_temp(ctx); 7038 static const float eight = 8.0f; 7039 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7040 alu.op = ALU_OP1_MOV; 7041 alu.src[0].sel = ctx->temp_reg; 7042 alu.src[0].chan = 3; 7043 alu.dst.sel = mytmp; 7044 alu.dst.chan = 0; 7045 alu.dst.write = 1; 7046 alu.last = 1; 7047 r = r600_bytecode_add_alu(ctx->bc, &alu); 7048 if (r) 7049 return r; 7050 7051 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ 7052 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7053 alu.op = ALU_OP3_MULADD; 7054 alu.is_op3 = 1; 7055 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7056 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7057 alu.src[1].chan = 0; 7058 alu.src[1].value = *(uint32_t *)&eight; 7059 alu.src[2].sel = mytmp; 7060 alu.src[2].chan = 0; 7061 alu.dst.sel = ctx->temp_reg; 7062 alu.dst.chan = 3; 7063 alu.dst.write = 1; 7064 alu.last = 1; 7065 r = r600_bytecode_add_alu(ctx->bc, &alu); 7066 if (r) 7067 return r; 7068 } else if (ctx->bc->chip_class < EVERGREEN) { 7069 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7070 tex.op = FETCH_OP_SET_CUBEMAP_INDEX; 7071 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7072 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7073 tex.src_gpr = r600_get_temp(ctx); 7074 tex.src_sel_x = 0; 7075 tex.src_sel_y = 0; 7076 tex.src_sel_z = 0; 7077 tex.src_sel_w = 0; 7078 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 7079 tex.coord_type_x = 1; 7080 tex.coord_type_y = 1; 7081 tex.coord_type_z = 1; 7082 tex.coord_type_w = 1; 7083 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7084 alu.op = ALU_OP1_MOV; 7085 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7086 alu.dst.sel = tex.src_gpr; 7087 alu.dst.chan = 0; 7088 alu.last = 1; 7089 alu.dst.write = 1; 7090 r = r600_bytecode_add_alu(ctx->bc, &alu); 7091 if (r) 7092 return r; 7093 7094 r = r600_bytecode_add_tex(ctx->bc, &tex); 7095 if (r) 7096 return r; 7097 } 7098 7099 } 7100 7101 /* for cube forms of lod and bias we need to route things */ 7102 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || 7103 inst->Instruction.Opcode == TGSI_OPCODE_TXL || 7104 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 7105 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { 7106 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7107 alu.op = ALU_OP1_MOV; 7108 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 7109 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 7110 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 7111 else 7112 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7113 alu.dst.sel = ctx->temp_reg; 7114 alu.dst.chan = 2; 7115 alu.last = 1; 7116 alu.dst.write = 1; 7117 r = r600_bytecode_add_alu(ctx->bc, &alu); 7118 if (r) 7119 return r; 7120 } 7121 7122 src_loaded = TRUE; 7123 src_gpr = ctx->temp_reg; 7124 } 7125 7126 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 7127 int temp_h = 0, temp_v = 0; 7128 int start_val = 0; 7129 7130 /* if we've already loaded the src (i.e. CUBE don't reload it). */ 7131 if (src_loaded == TRUE) 7132 start_val = 1; 7133 else 7134 src_loaded = TRUE; 7135 for (i = start_val; i < 3; i++) { 7136 int treg = r600_get_temp(ctx); 7137 7138 if (i == 0) 7139 src_gpr = treg; 7140 else if (i == 1) 7141 temp_h = treg; 7142 else 7143 temp_v = treg; 7144 7145 for (j = 0; j < 4; j++) { 7146 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7147 alu.op = ALU_OP1_MOV; 7148 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 7149 alu.dst.sel = treg; 7150 alu.dst.chan = j; 7151 if (j == 3) 7152 alu.last = 1; 7153 alu.dst.write = 1; 7154 r = r600_bytecode_add_alu(ctx->bc, &alu); 7155 if (r) 7156 return r; 7157 } 7158 } 7159 for (i = 1; i < 3; i++) { 7160 /* set gradients h/v */ 7161 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7162 tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H : 7163 FETCH_OP_SET_GRADIENTS_V; 7164 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7165 tex.sampler_index_mode = sampler_index_mode; 7166 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7167 tex.resource_index_mode = sampler_index_mode; 7168 7169 tex.src_gpr = (i == 1) ? temp_h : temp_v; 7170 tex.src_sel_x = 0; 7171 tex.src_sel_y = 1; 7172 tex.src_sel_z = 2; 7173 tex.src_sel_w = 3; 7174 7175 tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */ 7176 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 7177 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 7178 tex.coord_type_x = 1; 7179 tex.coord_type_y = 1; 7180 tex.coord_type_z = 1; 7181 tex.coord_type_w = 1; 7182 } 7183 r = r600_bytecode_add_tex(ctx->bc, &tex); 7184 if (r) 7185 return r; 7186 } 7187 } 7188 7189 if (src_requires_loading && !src_loaded) { 7190 for (i = 0; i < 4; i++) { 7191 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7192 alu.op = ALU_OP1_MOV; 7193 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7194 alu.dst.sel = ctx->temp_reg; 7195 alu.dst.chan = i; 7196 if (i == 3) 7197 alu.last = 1; 7198 alu.dst.write = 1; 7199 r = r600_bytecode_add_alu(ctx->bc, &alu); 7200 if (r) 7201 return r; 7202 } 7203 src_loaded = TRUE; 7204 src_gpr = ctx->temp_reg; 7205 } 7206 7207 /* get offset values */ 7208 if (inst->Texture.NumOffsets) { 7209 assert(inst->Texture.NumOffsets == 1); 7210 7211 /* The texture offset feature doesn't work with the TXF instruction 7212 * and must be emulated by adding the offset to the texture coordinates. */ 7213 if (txf_add_offsets) { 7214 const struct tgsi_texture_offset *off = inst->TexOffsets; 7215 7216 switch (inst->Texture.Texture) { 7217 case TGSI_TEXTURE_3D: 7218 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7219 alu.op = ALU_OP2_ADD_INT; 7220 alu.src[0].sel = src_gpr; 7221 alu.src[0].chan = 2; 7222 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7223 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ]; 7224 alu.dst.sel = src_gpr; 7225 alu.dst.chan = 2; 7226 alu.dst.write = 1; 7227 alu.last = 1; 7228 r = r600_bytecode_add_alu(ctx->bc, &alu); 7229 if (r) 7230 return r; 7231 /* fall through */ 7232 7233 case TGSI_TEXTURE_2D: 7234 case TGSI_TEXTURE_SHADOW2D: 7235 case TGSI_TEXTURE_RECT: 7236 case TGSI_TEXTURE_SHADOWRECT: 7237 case TGSI_TEXTURE_2D_ARRAY: 7238 case TGSI_TEXTURE_SHADOW2D_ARRAY: 7239 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7240 alu.op = ALU_OP2_ADD_INT; 7241 alu.src[0].sel = src_gpr; 7242 alu.src[0].chan = 1; 7243 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7244 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY]; 7245 alu.dst.sel = src_gpr; 7246 alu.dst.chan = 1; 7247 alu.dst.write = 1; 7248 alu.last = 1; 7249 r = r600_bytecode_add_alu(ctx->bc, &alu); 7250 if (r) 7251 return r; 7252 /* fall through */ 7253 7254 case TGSI_TEXTURE_1D: 7255 case TGSI_TEXTURE_SHADOW1D: 7256 case TGSI_TEXTURE_1D_ARRAY: 7257 case TGSI_TEXTURE_SHADOW1D_ARRAY: 7258 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7259 alu.op = ALU_OP2_ADD_INT; 7260 alu.src[0].sel = src_gpr; 7261 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7262 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX]; 7263 alu.dst.sel = src_gpr; 7264 alu.dst.write = 1; 7265 alu.last = 1; 7266 r = r600_bytecode_add_alu(ctx->bc, &alu); 7267 if (r) 7268 return r; 7269 break; 7270 /* texture offsets do not apply to other texture targets */ 7271 } 7272 } else { 7273 switch (inst->Texture.Texture) { 7274 case TGSI_TEXTURE_3D: 7275 offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 7276 /* fallthrough */ 7277 case TGSI_TEXTURE_2D: 7278 case TGSI_TEXTURE_SHADOW2D: 7279 case TGSI_TEXTURE_RECT: 7280 case TGSI_TEXTURE_SHADOWRECT: 7281 case TGSI_TEXTURE_2D_ARRAY: 7282 case TGSI_TEXTURE_SHADOW2D_ARRAY: 7283 offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 7284 /* fallthrough */ 7285 case TGSI_TEXTURE_1D: 7286 case TGSI_TEXTURE_SHADOW1D: 7287 case TGSI_TEXTURE_1D_ARRAY: 7288 case TGSI_TEXTURE_SHADOW1D_ARRAY: 7289 offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 7290 } 7291 } 7292 } 7293 7294 /* Obtain the sample index for reading a compressed MSAA color texture. 7295 * To read the FMASK, we use the ldfptr instruction, which tells us 7296 * where the samples are stored. 7297 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, 7298 * which is the identity mapping. Each nibble says which physical sample 7299 * should be fetched to get that sample. 7300 * 7301 * Assume src.z contains the sample index. It should be modified like this: 7302 * src.z = (ldfptr() >> (src.z * 4)) & 0xF; 7303 * Then fetch the texel with src. 7304 */ 7305 if (read_compressed_msaa) { 7306 unsigned sample_chan = 3; 7307 unsigned temp = r600_get_temp(ctx); 7308 assert(src_loaded); 7309 7310 /* temp.w = ldfptr() */ 7311 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7312 tex.op = FETCH_OP_LD; 7313 tex.inst_mod = 1; /* to indicate this is ldfptr */ 7314 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7315 tex.sampler_index_mode = sampler_index_mode; 7316 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7317 tex.resource_index_mode = sampler_index_mode; 7318 tex.src_gpr = src_gpr; 7319 tex.dst_gpr = temp; 7320 tex.dst_sel_x = 7; /* mask out these components */ 7321 tex.dst_sel_y = 7; 7322 tex.dst_sel_z = 7; 7323 tex.dst_sel_w = 0; /* store X */ 7324 tex.src_sel_x = 0; 7325 tex.src_sel_y = 1; 7326 tex.src_sel_z = 2; 7327 tex.src_sel_w = 3; 7328 tex.offset_x = offset_x; 7329 tex.offset_y = offset_y; 7330 tex.offset_z = offset_z; 7331 r = r600_bytecode_add_tex(ctx->bc, &tex); 7332 if (r) 7333 return r; 7334 7335 /* temp.x = sample_index*4 */ 7336 if (ctx->bc->chip_class == CAYMAN) { 7337 for (i = 0 ; i < 4; i++) { 7338 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7339 alu.op = ALU_OP2_MULLO_INT; 7340 alu.src[0].sel = src_gpr; 7341 alu.src[0].chan = sample_chan; 7342 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7343 alu.src[1].value = 4; 7344 alu.dst.sel = temp; 7345 alu.dst.chan = i; 7346 alu.dst.write = i == 0; 7347 if (i == 3) 7348 alu.last = 1; 7349 r = r600_bytecode_add_alu(ctx->bc, &alu); 7350 if (r) 7351 return r; 7352 } 7353 } else { 7354 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7355 alu.op = ALU_OP2_MULLO_INT; 7356 alu.src[0].sel = src_gpr; 7357 alu.src[0].chan = sample_chan; 7358 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7359 alu.src[1].value = 4; 7360 alu.dst.sel = temp; 7361 alu.dst.chan = 0; 7362 alu.dst.write = 1; 7363 alu.last = 1; 7364 r = r600_bytecode_add_alu(ctx->bc, &alu); 7365 if (r) 7366 return r; 7367 } 7368 7369 /* sample_index = temp.w >> temp.x */ 7370 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7371 alu.op = ALU_OP2_LSHR_INT; 7372 alu.src[0].sel = temp; 7373 alu.src[0].chan = 3; 7374 alu.src[1].sel = temp; 7375 alu.src[1].chan = 0; 7376 alu.dst.sel = src_gpr; 7377 alu.dst.chan = sample_chan; 7378 alu.dst.write = 1; 7379 alu.last = 1; 7380 r = r600_bytecode_add_alu(ctx->bc, &alu); 7381 if (r) 7382 return r; 7383 7384 /* sample_index & 0xF */ 7385 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7386 alu.op = ALU_OP2_AND_INT; 7387 alu.src[0].sel = src_gpr; 7388 alu.src[0].chan = sample_chan; 7389 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7390 alu.src[1].value = 0xF; 7391 alu.dst.sel = src_gpr; 7392 alu.dst.chan = sample_chan; 7393 alu.dst.write = 1; 7394 alu.last = 1; 7395 r = r600_bytecode_add_alu(ctx->bc, &alu); 7396 if (r) 7397 return r; 7398#if 0 7399 /* visualize the FMASK */ 7400 for (i = 0; i < 4; i++) { 7401 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7402 alu.op = ALU_OP1_INT_TO_FLT; 7403 alu.src[0].sel = src_gpr; 7404 alu.src[0].chan = sample_chan; 7405 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 7406 alu.dst.chan = i; 7407 alu.dst.write = 1; 7408 alu.last = 1; 7409 r = r600_bytecode_add_alu(ctx->bc, &alu); 7410 if (r) 7411 return r; 7412 } 7413 return 0; 7414#endif 7415 } 7416 7417 /* does this shader want a num layers from TXQ for a cube array? */ 7418 if (has_txq_cube_array_z) { 7419 int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7420 7421 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7422 alu.op = ALU_OP1_MOV; 7423 7424 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 7425 if (ctx->bc->chip_class >= EVERGREEN) { 7426 /* channel 1 or 3 of each word */ 7427 alu.src[0].sel += (id / 2); 7428 alu.src[0].chan = ((id % 2) * 2) + 1; 7429 } else { 7430 /* r600 we have them at channel 2 of the second dword */ 7431 alu.src[0].sel += (id * 2) + 1; 7432 alu.src[0].chan = 2; 7433 } 7434 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 7435 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 7436 alu.last = 1; 7437 r = r600_bytecode_add_alu(ctx->bc, &alu); 7438 if (r) 7439 return r; 7440 /* disable writemask from texture instruction */ 7441 inst->Dst[0].Register.WriteMask &= ~4; 7442 } 7443 7444 opcode = ctx->inst_info->op; 7445 if (opcode == FETCH_OP_GATHER4 && 7446 inst->TexOffsets[0].File != TGSI_FILE_NULL && 7447 inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) { 7448 opcode = FETCH_OP_GATHER4_O; 7449 7450 /* GATHER4_O/GATHER4_C_O use offset values loaded by 7451 SET_TEXTURE_OFFSETS instruction. The immediate offset values 7452 encoded in the instruction are ignored. */ 7453 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7454 tex.op = FETCH_OP_SET_TEXTURE_OFFSETS; 7455 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7456 tex.sampler_index_mode = sampler_index_mode; 7457 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7458 tex.resource_index_mode = sampler_index_mode; 7459 7460 tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index; 7461 tex.src_sel_x = inst->TexOffsets[0].SwizzleX; 7462 tex.src_sel_y = inst->TexOffsets[0].SwizzleY; 7463 tex.src_sel_z = inst->TexOffsets[0].SwizzleZ; 7464 tex.src_sel_w = 4; 7465 7466 tex.dst_sel_x = 7; 7467 tex.dst_sel_y = 7; 7468 tex.dst_sel_z = 7; 7469 tex.dst_sel_w = 7; 7470 7471 r = r600_bytecode_add_tex(ctx->bc, &tex); 7472 if (r) 7473 return r; 7474 } 7475 7476 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 7477 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 7478 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 7479 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7480 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 7481 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 7482 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7483 switch (opcode) { 7484 case FETCH_OP_SAMPLE: 7485 opcode = FETCH_OP_SAMPLE_C; 7486 break; 7487 case FETCH_OP_SAMPLE_L: 7488 opcode = FETCH_OP_SAMPLE_C_L; 7489 break; 7490 case FETCH_OP_SAMPLE_LB: 7491 opcode = FETCH_OP_SAMPLE_C_LB; 7492 break; 7493 case FETCH_OP_SAMPLE_G: 7494 opcode = FETCH_OP_SAMPLE_C_G; 7495 break; 7496 /* Texture gather variants */ 7497 case FETCH_OP_GATHER4: 7498 opcode = FETCH_OP_GATHER4_C; 7499 break; 7500 case FETCH_OP_GATHER4_O: 7501 opcode = FETCH_OP_GATHER4_C_O; 7502 break; 7503 } 7504 } 7505 7506 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7507 tex.op = opcode; 7508 7509 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7510 tex.sampler_index_mode = sampler_index_mode; 7511 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7512 tex.resource_index_mode = sampler_index_mode; 7513 tex.src_gpr = src_gpr; 7514 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 7515 7516 if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE || 7517 inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) { 7518 tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */ 7519 } 7520 7521 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) { 7522 int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]; 7523 tex.inst_mod = texture_component_select; 7524 7525 if (ctx->bc->chip_class == CAYMAN) { 7526 /* GATHER4 result order is different from TGSI TG4 */ 7527 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 0 : 7; 7528 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 1 : 7; 7529 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 2 : 7; 7530 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7531 } else { 7532 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7533 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 7534 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7535 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7536 } 7537 } 7538 else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) { 7539 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7540 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7541 tex.dst_sel_z = 7; 7542 tex.dst_sel_w = 7; 7543 } 7544 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 7545 tex.dst_sel_x = 3; 7546 tex.dst_sel_y = 7; 7547 tex.dst_sel_z = 7; 7548 tex.dst_sel_w = 7; 7549 } 7550 else { 7551 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7552 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7553 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 7554 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7555 } 7556 7557 7558 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ || 7559 inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 7560 tex.src_sel_x = 4; 7561 tex.src_sel_y = 4; 7562 tex.src_sel_z = 4; 7563 tex.src_sel_w = 4; 7564 } else if (src_loaded) { 7565 tex.src_sel_x = 0; 7566 tex.src_sel_y = 1; 7567 tex.src_sel_z = 2; 7568 tex.src_sel_w = 3; 7569 } else { 7570 tex.src_sel_x = ctx->src[0].swizzle[0]; 7571 tex.src_sel_y = ctx->src[0].swizzle[1]; 7572 tex.src_sel_z = ctx->src[0].swizzle[2]; 7573 tex.src_sel_w = ctx->src[0].swizzle[3]; 7574 tex.src_rel = ctx->src[0].rel; 7575 } 7576 7577 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || 7578 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7579 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7580 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7581 tex.src_sel_x = 1; 7582 tex.src_sel_y = 0; 7583 tex.src_sel_z = 3; 7584 tex.src_sel_w = 2; /* route Z compare or Lod value into W */ 7585 } 7586 7587 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 7588 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 7589 tex.coord_type_x = 1; 7590 tex.coord_type_y = 1; 7591 } 7592 tex.coord_type_z = 1; 7593 tex.coord_type_w = 1; 7594 7595 tex.offset_x = offset_x; 7596 tex.offset_y = offset_y; 7597 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && 7598 (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 7599 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) { 7600 tex.offset_z = 0; 7601 } 7602 else { 7603 tex.offset_z = offset_z; 7604 } 7605 7606 /* Put the depth for comparison in W. 7607 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 7608 * Some instructions expect the depth in Z. */ 7609 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 7610 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 7611 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 7612 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 7613 opcode != FETCH_OP_SAMPLE_C_L && 7614 opcode != FETCH_OP_SAMPLE_C_LB) { 7615 tex.src_sel_w = tex.src_sel_z; 7616 } 7617 7618 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 7619 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 7620 if (opcode == FETCH_OP_SAMPLE_C_L || 7621 opcode == FETCH_OP_SAMPLE_C_LB) { 7622 /* the array index is read from Y */ 7623 tex.coord_type_y = 0; 7624 } else { 7625 /* the array index is read from Z */ 7626 tex.coord_type_z = 0; 7627 tex.src_sel_z = tex.src_sel_y; 7628 } 7629 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 7630 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 7631 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7632 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 7633 (ctx->bc->chip_class >= EVERGREEN))) 7634 /* the array index is read from Z */ 7635 tex.coord_type_z = 0; 7636 7637 /* mask unused source components */ 7638 if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { 7639 switch (inst->Texture.Texture) { 7640 case TGSI_TEXTURE_2D: 7641 case TGSI_TEXTURE_RECT: 7642 tex.src_sel_z = 7; 7643 tex.src_sel_w = 7; 7644 break; 7645 case TGSI_TEXTURE_1D_ARRAY: 7646 tex.src_sel_y = 7; 7647 tex.src_sel_w = 7; 7648 break; 7649 case TGSI_TEXTURE_1D: 7650 tex.src_sel_y = 7; 7651 tex.src_sel_z = 7; 7652 tex.src_sel_w = 7; 7653 break; 7654 } 7655 } 7656 7657 r = r600_bytecode_add_tex(ctx->bc, &tex); 7658 if (r) 7659 return r; 7660 7661 /* add shadow ambient support - gallium doesn't do it yet */ 7662 return 0; 7663} 7664 7665static int tgsi_lrp(struct r600_shader_ctx *ctx) 7666{ 7667 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7668 struct r600_bytecode_alu alu; 7669 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7670 unsigned i, temp_regs[2]; 7671 int r; 7672 7673 /* optimize if it's just an equal balance */ 7674 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 7675 for (i = 0; i < lasti + 1; i++) { 7676 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7677 continue; 7678 7679 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7680 alu.op = ALU_OP2_ADD; 7681 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 7682 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7683 alu.omod = 3; 7684 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7685 alu.dst.chan = i; 7686 if (i == lasti) { 7687 alu.last = 1; 7688 } 7689 r = r600_bytecode_add_alu(ctx->bc, &alu); 7690 if (r) 7691 return r; 7692 } 7693 return 0; 7694 } 7695 7696 /* 1 - src0 */ 7697 for (i = 0; i < lasti + 1; i++) { 7698 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7699 continue; 7700 7701 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7702 alu.op = ALU_OP2_ADD; 7703 alu.src[0].sel = V_SQ_ALU_SRC_1; 7704 alu.src[0].chan = 0; 7705 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 7706 r600_bytecode_src_toggle_neg(&alu.src[1]); 7707 alu.dst.sel = ctx->temp_reg; 7708 alu.dst.chan = i; 7709 if (i == lasti) { 7710 alu.last = 1; 7711 } 7712 alu.dst.write = 1; 7713 r = r600_bytecode_add_alu(ctx->bc, &alu); 7714 if (r) 7715 return r; 7716 } 7717 7718 /* (1 - src0) * src2 */ 7719 for (i = 0; i < lasti + 1; i++) { 7720 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7721 continue; 7722 7723 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7724 alu.op = ALU_OP2_MUL; 7725 alu.src[0].sel = ctx->temp_reg; 7726 alu.src[0].chan = i; 7727 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7728 alu.dst.sel = ctx->temp_reg; 7729 alu.dst.chan = i; 7730 if (i == lasti) { 7731 alu.last = 1; 7732 } 7733 alu.dst.write = 1; 7734 r = r600_bytecode_add_alu(ctx->bc, &alu); 7735 if (r) 7736 return r; 7737 } 7738 7739 /* src0 * src1 + (1 - src0) * src2 */ 7740 if (ctx->src[0].abs) 7741 temp_regs[0] = r600_get_temp(ctx); 7742 else 7743 temp_regs[0] = 0; 7744 if (ctx->src[1].abs) 7745 temp_regs[1] = r600_get_temp(ctx); 7746 else 7747 temp_regs[1] = 0; 7748 7749 for (i = 0; i < lasti + 1; i++) { 7750 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7751 continue; 7752 7753 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7754 alu.op = ALU_OP3_MULADD; 7755 alu.is_op3 = 1; 7756 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 7757 if (r) 7758 return r; 7759 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[1]); 7760 if (r) 7761 return r; 7762 alu.src[2].sel = ctx->temp_reg; 7763 alu.src[2].chan = i; 7764 7765 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7766 alu.dst.chan = i; 7767 if (i == lasti) { 7768 alu.last = 1; 7769 } 7770 r = r600_bytecode_add_alu(ctx->bc, &alu); 7771 if (r) 7772 return r; 7773 } 7774 return 0; 7775} 7776 7777static int tgsi_cmp(struct r600_shader_ctx *ctx) 7778{ 7779 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7780 struct r600_bytecode_alu alu; 7781 int i, r, j; 7782 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7783 int temp_regs[3]; 7784 7785 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 7786 temp_regs[j] = 0; 7787 if (ctx->src[j].abs) 7788 temp_regs[j] = r600_get_temp(ctx); 7789 } 7790 7791 for (i = 0; i < lasti + 1; i++) { 7792 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7793 continue; 7794 7795 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7796 alu.op = ALU_OP3_CNDGE; 7797 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 7798 if (r) 7799 return r; 7800 r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]); 7801 if (r) 7802 return r; 7803 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]); 7804 if (r) 7805 return r; 7806 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7807 alu.dst.chan = i; 7808 alu.dst.write = 1; 7809 alu.is_op3 = 1; 7810 if (i == lasti) 7811 alu.last = 1; 7812 r = r600_bytecode_add_alu(ctx->bc, &alu); 7813 if (r) 7814 return r; 7815 } 7816 return 0; 7817} 7818 7819static int tgsi_ucmp(struct r600_shader_ctx *ctx) 7820{ 7821 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7822 struct r600_bytecode_alu alu; 7823 int i, r; 7824 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7825 7826 for (i = 0; i < lasti + 1; i++) { 7827 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7828 continue; 7829 7830 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7831 alu.op = ALU_OP3_CNDE_INT; 7832 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7833 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7834 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 7835 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7836 alu.dst.chan = i; 7837 alu.dst.write = 1; 7838 alu.is_op3 = 1; 7839 if (i == lasti) 7840 alu.last = 1; 7841 r = r600_bytecode_add_alu(ctx->bc, &alu); 7842 if (r) 7843 return r; 7844 } 7845 return 0; 7846} 7847 7848static int tgsi_xpd(struct r600_shader_ctx *ctx) 7849{ 7850 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7851 static const unsigned int src0_swizzle[] = {2, 0, 1}; 7852 static const unsigned int src1_swizzle[] = {1, 2, 0}; 7853 struct r600_bytecode_alu alu; 7854 uint32_t use_temp = 0; 7855 int i, r; 7856 7857 if (inst->Dst[0].Register.WriteMask != 0xf) 7858 use_temp = 1; 7859 7860 for (i = 0; i < 4; i++) { 7861 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7862 alu.op = ALU_OP2_MUL; 7863 if (i < 3) { 7864 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 7865 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 7866 } else { 7867 alu.src[0].sel = V_SQ_ALU_SRC_0; 7868 alu.src[0].chan = i; 7869 alu.src[1].sel = V_SQ_ALU_SRC_0; 7870 alu.src[1].chan = i; 7871 } 7872 7873 alu.dst.sel = ctx->temp_reg; 7874 alu.dst.chan = i; 7875 alu.dst.write = 1; 7876 7877 if (i == 3) 7878 alu.last = 1; 7879 r = r600_bytecode_add_alu(ctx->bc, &alu); 7880 if (r) 7881 return r; 7882 } 7883 7884 for (i = 0; i < 4; i++) { 7885 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7886 alu.op = ALU_OP3_MULADD; 7887 7888 if (i < 3) { 7889 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 7890 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 7891 } else { 7892 alu.src[0].sel = V_SQ_ALU_SRC_0; 7893 alu.src[0].chan = i; 7894 alu.src[1].sel = V_SQ_ALU_SRC_0; 7895 alu.src[1].chan = i; 7896 } 7897 7898 alu.src[2].sel = ctx->temp_reg; 7899 alu.src[2].neg = 1; 7900 alu.src[2].chan = i; 7901 7902 if (use_temp) 7903 alu.dst.sel = ctx->temp_reg; 7904 else 7905 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7906 alu.dst.chan = i; 7907 alu.dst.write = 1; 7908 alu.is_op3 = 1; 7909 if (i == 3) 7910 alu.last = 1; 7911 r = r600_bytecode_add_alu(ctx->bc, &alu); 7912 if (r) 7913 return r; 7914 } 7915 if (use_temp) 7916 return tgsi_helper_copy(ctx, inst); 7917 return 0; 7918} 7919 7920static int tgsi_exp(struct r600_shader_ctx *ctx) 7921{ 7922 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7923 struct r600_bytecode_alu alu; 7924 int r; 7925 int i; 7926 7927 /* result.x = 2^floor(src); */ 7928 if (inst->Dst[0].Register.WriteMask & 1) { 7929 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7930 7931 alu.op = ALU_OP1_FLOOR; 7932 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7933 7934 alu.dst.sel = ctx->temp_reg; 7935 alu.dst.chan = 0; 7936 alu.dst.write = 1; 7937 alu.last = 1; 7938 r = r600_bytecode_add_alu(ctx->bc, &alu); 7939 if (r) 7940 return r; 7941 7942 if (ctx->bc->chip_class == CAYMAN) { 7943 for (i = 0; i < 3; i++) { 7944 alu.op = ALU_OP1_EXP_IEEE; 7945 alu.src[0].sel = ctx->temp_reg; 7946 alu.src[0].chan = 0; 7947 7948 alu.dst.sel = ctx->temp_reg; 7949 alu.dst.chan = i; 7950 alu.dst.write = i == 0; 7951 alu.last = i == 2; 7952 r = r600_bytecode_add_alu(ctx->bc, &alu); 7953 if (r) 7954 return r; 7955 } 7956 } else { 7957 alu.op = ALU_OP1_EXP_IEEE; 7958 alu.src[0].sel = ctx->temp_reg; 7959 alu.src[0].chan = 0; 7960 7961 alu.dst.sel = ctx->temp_reg; 7962 alu.dst.chan = 0; 7963 alu.dst.write = 1; 7964 alu.last = 1; 7965 r = r600_bytecode_add_alu(ctx->bc, &alu); 7966 if (r) 7967 return r; 7968 } 7969 } 7970 7971 /* result.y = tmp - floor(tmp); */ 7972 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 7973 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7974 7975 alu.op = ALU_OP1_FRACT; 7976 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7977 7978 alu.dst.sel = ctx->temp_reg; 7979#if 0 7980 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7981 if (r) 7982 return r; 7983#endif 7984 alu.dst.write = 1; 7985 alu.dst.chan = 1; 7986 7987 alu.last = 1; 7988 7989 r = r600_bytecode_add_alu(ctx->bc, &alu); 7990 if (r) 7991 return r; 7992 } 7993 7994 /* result.z = RoughApprox2ToX(tmp);*/ 7995 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 7996 if (ctx->bc->chip_class == CAYMAN) { 7997 for (i = 0; i < 3; i++) { 7998 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7999 alu.op = ALU_OP1_EXP_IEEE; 8000 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8001 8002 alu.dst.sel = ctx->temp_reg; 8003 alu.dst.chan = i; 8004 if (i == 2) { 8005 alu.dst.write = 1; 8006 alu.last = 1; 8007 } 8008 8009 r = r600_bytecode_add_alu(ctx->bc, &alu); 8010 if (r) 8011 return r; 8012 } 8013 } else { 8014 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8015 alu.op = ALU_OP1_EXP_IEEE; 8016 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8017 8018 alu.dst.sel = ctx->temp_reg; 8019 alu.dst.write = 1; 8020 alu.dst.chan = 2; 8021 8022 alu.last = 1; 8023 8024 r = r600_bytecode_add_alu(ctx->bc, &alu); 8025 if (r) 8026 return r; 8027 } 8028 } 8029 8030 /* result.w = 1.0;*/ 8031 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 8032 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8033 8034 alu.op = ALU_OP1_MOV; 8035 alu.src[0].sel = V_SQ_ALU_SRC_1; 8036 alu.src[0].chan = 0; 8037 8038 alu.dst.sel = ctx->temp_reg; 8039 alu.dst.chan = 3; 8040 alu.dst.write = 1; 8041 alu.last = 1; 8042 r = r600_bytecode_add_alu(ctx->bc, &alu); 8043 if (r) 8044 return r; 8045 } 8046 return tgsi_helper_copy(ctx, inst); 8047} 8048 8049static int tgsi_log(struct r600_shader_ctx *ctx) 8050{ 8051 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8052 struct r600_bytecode_alu alu; 8053 int r; 8054 int i; 8055 8056 /* result.x = floor(log2(|src|)); */ 8057 if (inst->Dst[0].Register.WriteMask & 1) { 8058 if (ctx->bc->chip_class == CAYMAN) { 8059 for (i = 0; i < 3; i++) { 8060 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8061 8062 alu.op = ALU_OP1_LOG_IEEE; 8063 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8064 r600_bytecode_src_set_abs(&alu.src[0]); 8065 8066 alu.dst.sel = ctx->temp_reg; 8067 alu.dst.chan = i; 8068 if (i == 0) 8069 alu.dst.write = 1; 8070 if (i == 2) 8071 alu.last = 1; 8072 r = r600_bytecode_add_alu(ctx->bc, &alu); 8073 if (r) 8074 return r; 8075 } 8076 8077 } else { 8078 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8079 8080 alu.op = ALU_OP1_LOG_IEEE; 8081 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8082 r600_bytecode_src_set_abs(&alu.src[0]); 8083 8084 alu.dst.sel = ctx->temp_reg; 8085 alu.dst.chan = 0; 8086 alu.dst.write = 1; 8087 alu.last = 1; 8088 r = r600_bytecode_add_alu(ctx->bc, &alu); 8089 if (r) 8090 return r; 8091 } 8092 8093 alu.op = ALU_OP1_FLOOR; 8094 alu.src[0].sel = ctx->temp_reg; 8095 alu.src[0].chan = 0; 8096 8097 alu.dst.sel = ctx->temp_reg; 8098 alu.dst.chan = 0; 8099 alu.dst.write = 1; 8100 alu.last = 1; 8101 8102 r = r600_bytecode_add_alu(ctx->bc, &alu); 8103 if (r) 8104 return r; 8105 } 8106 8107 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 8108 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 8109 8110 if (ctx->bc->chip_class == CAYMAN) { 8111 for (i = 0; i < 3; i++) { 8112 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8113 8114 alu.op = ALU_OP1_LOG_IEEE; 8115 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8116 r600_bytecode_src_set_abs(&alu.src[0]); 8117 8118 alu.dst.sel = ctx->temp_reg; 8119 alu.dst.chan = i; 8120 if (i == 1) 8121 alu.dst.write = 1; 8122 if (i == 2) 8123 alu.last = 1; 8124 8125 r = r600_bytecode_add_alu(ctx->bc, &alu); 8126 if (r) 8127 return r; 8128 } 8129 } else { 8130 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8131 8132 alu.op = ALU_OP1_LOG_IEEE; 8133 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8134 r600_bytecode_src_set_abs(&alu.src[0]); 8135 8136 alu.dst.sel = ctx->temp_reg; 8137 alu.dst.chan = 1; 8138 alu.dst.write = 1; 8139 alu.last = 1; 8140 8141 r = r600_bytecode_add_alu(ctx->bc, &alu); 8142 if (r) 8143 return r; 8144 } 8145 8146 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8147 8148 alu.op = ALU_OP1_FLOOR; 8149 alu.src[0].sel = ctx->temp_reg; 8150 alu.src[0].chan = 1; 8151 8152 alu.dst.sel = ctx->temp_reg; 8153 alu.dst.chan = 1; 8154 alu.dst.write = 1; 8155 alu.last = 1; 8156 8157 r = r600_bytecode_add_alu(ctx->bc, &alu); 8158 if (r) 8159 return r; 8160 8161 if (ctx->bc->chip_class == CAYMAN) { 8162 for (i = 0; i < 3; i++) { 8163 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8164 alu.op = ALU_OP1_EXP_IEEE; 8165 alu.src[0].sel = ctx->temp_reg; 8166 alu.src[0].chan = 1; 8167 8168 alu.dst.sel = ctx->temp_reg; 8169 alu.dst.chan = i; 8170 if (i == 1) 8171 alu.dst.write = 1; 8172 if (i == 2) 8173 alu.last = 1; 8174 8175 r = r600_bytecode_add_alu(ctx->bc, &alu); 8176 if (r) 8177 return r; 8178 } 8179 } else { 8180 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8181 alu.op = ALU_OP1_EXP_IEEE; 8182 alu.src[0].sel = ctx->temp_reg; 8183 alu.src[0].chan = 1; 8184 8185 alu.dst.sel = ctx->temp_reg; 8186 alu.dst.chan = 1; 8187 alu.dst.write = 1; 8188 alu.last = 1; 8189 8190 r = r600_bytecode_add_alu(ctx->bc, &alu); 8191 if (r) 8192 return r; 8193 } 8194 8195 if (ctx->bc->chip_class == CAYMAN) { 8196 for (i = 0; i < 3; i++) { 8197 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8198 alu.op = ALU_OP1_RECIP_IEEE; 8199 alu.src[0].sel = ctx->temp_reg; 8200 alu.src[0].chan = 1; 8201 8202 alu.dst.sel = ctx->temp_reg; 8203 alu.dst.chan = i; 8204 if (i == 1) 8205 alu.dst.write = 1; 8206 if (i == 2) 8207 alu.last = 1; 8208 8209 r = r600_bytecode_add_alu(ctx->bc, &alu); 8210 if (r) 8211 return r; 8212 } 8213 } else { 8214 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8215 alu.op = ALU_OP1_RECIP_IEEE; 8216 alu.src[0].sel = ctx->temp_reg; 8217 alu.src[0].chan = 1; 8218 8219 alu.dst.sel = ctx->temp_reg; 8220 alu.dst.chan = 1; 8221 alu.dst.write = 1; 8222 alu.last = 1; 8223 8224 r = r600_bytecode_add_alu(ctx->bc, &alu); 8225 if (r) 8226 return r; 8227 } 8228 8229 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8230 8231 alu.op = ALU_OP2_MUL; 8232 8233 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8234 r600_bytecode_src_set_abs(&alu.src[0]); 8235 8236 alu.src[1].sel = ctx->temp_reg; 8237 alu.src[1].chan = 1; 8238 8239 alu.dst.sel = ctx->temp_reg; 8240 alu.dst.chan = 1; 8241 alu.dst.write = 1; 8242 alu.last = 1; 8243 8244 r = r600_bytecode_add_alu(ctx->bc, &alu); 8245 if (r) 8246 return r; 8247 } 8248 8249 /* result.z = log2(|src|);*/ 8250 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 8251 if (ctx->bc->chip_class == CAYMAN) { 8252 for (i = 0; i < 3; i++) { 8253 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8254 8255 alu.op = ALU_OP1_LOG_IEEE; 8256 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8257 r600_bytecode_src_set_abs(&alu.src[0]); 8258 8259 alu.dst.sel = ctx->temp_reg; 8260 if (i == 2) 8261 alu.dst.write = 1; 8262 alu.dst.chan = i; 8263 if (i == 2) 8264 alu.last = 1; 8265 8266 r = r600_bytecode_add_alu(ctx->bc, &alu); 8267 if (r) 8268 return r; 8269 } 8270 } else { 8271 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8272 8273 alu.op = ALU_OP1_LOG_IEEE; 8274 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8275 r600_bytecode_src_set_abs(&alu.src[0]); 8276 8277 alu.dst.sel = ctx->temp_reg; 8278 alu.dst.write = 1; 8279 alu.dst.chan = 2; 8280 alu.last = 1; 8281 8282 r = r600_bytecode_add_alu(ctx->bc, &alu); 8283 if (r) 8284 return r; 8285 } 8286 } 8287 8288 /* result.w = 1.0; */ 8289 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 8290 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8291 8292 alu.op = ALU_OP1_MOV; 8293 alu.src[0].sel = V_SQ_ALU_SRC_1; 8294 alu.src[0].chan = 0; 8295 8296 alu.dst.sel = ctx->temp_reg; 8297 alu.dst.chan = 3; 8298 alu.dst.write = 1; 8299 alu.last = 1; 8300 8301 r = r600_bytecode_add_alu(ctx->bc, &alu); 8302 if (r) 8303 return r; 8304 } 8305 8306 return tgsi_helper_copy(ctx, inst); 8307} 8308 8309static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 8310{ 8311 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8312 struct r600_bytecode_alu alu; 8313 int r; 8314 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8315 unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index); 8316 8317 assert(inst->Dst[0].Register.Index < 3); 8318 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8319 8320 switch (inst->Instruction.Opcode) { 8321 case TGSI_OPCODE_ARL: 8322 alu.op = ALU_OP1_FLT_TO_INT_FLOOR; 8323 break; 8324 case TGSI_OPCODE_ARR: 8325 alu.op = ALU_OP1_FLT_TO_INT; 8326 break; 8327 case TGSI_OPCODE_UARL: 8328 alu.op = ALU_OP1_MOV; 8329 break; 8330 default: 8331 assert(0); 8332 return -1; 8333 } 8334 8335 for (i = 0; i <= lasti; ++i) { 8336 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8337 continue; 8338 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8339 alu.last = i == lasti; 8340 alu.dst.sel = reg; 8341 alu.dst.chan = i; 8342 alu.dst.write = 1; 8343 r = r600_bytecode_add_alu(ctx->bc, &alu); 8344 if (r) 8345 return r; 8346 } 8347 8348 if (inst->Dst[0].Register.Index > 0) 8349 ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0; 8350 else 8351 ctx->bc->ar_loaded = 0; 8352 8353 return 0; 8354} 8355static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 8356{ 8357 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8358 struct r600_bytecode_alu alu; 8359 int r; 8360 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8361 8362 switch (inst->Instruction.Opcode) { 8363 case TGSI_OPCODE_ARL: 8364 memset(&alu, 0, sizeof(alu)); 8365 alu.op = ALU_OP1_FLOOR; 8366 alu.dst.sel = ctx->bc->ar_reg; 8367 alu.dst.write = 1; 8368 for (i = 0; i <= lasti; ++i) { 8369 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8370 alu.dst.chan = i; 8371 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8372 alu.last = i == lasti; 8373 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8374 return r; 8375 } 8376 } 8377 8378 memset(&alu, 0, sizeof(alu)); 8379 alu.op = ALU_OP1_FLT_TO_INT; 8380 alu.src[0].sel = ctx->bc->ar_reg; 8381 alu.dst.sel = ctx->bc->ar_reg; 8382 alu.dst.write = 1; 8383 /* FLT_TO_INT is trans-only on r600/r700 */ 8384 alu.last = TRUE; 8385 for (i = 0; i <= lasti; ++i) { 8386 alu.dst.chan = i; 8387 alu.src[0].chan = i; 8388 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8389 return r; 8390 } 8391 break; 8392 case TGSI_OPCODE_ARR: 8393 memset(&alu, 0, sizeof(alu)); 8394 alu.op = ALU_OP1_FLT_TO_INT; 8395 alu.dst.sel = ctx->bc->ar_reg; 8396 alu.dst.write = 1; 8397 /* FLT_TO_INT is trans-only on r600/r700 */ 8398 alu.last = TRUE; 8399 for (i = 0; i <= lasti; ++i) { 8400 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8401 alu.dst.chan = i; 8402 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8403 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8404 return r; 8405 } 8406 } 8407 break; 8408 case TGSI_OPCODE_UARL: 8409 memset(&alu, 0, sizeof(alu)); 8410 alu.op = ALU_OP1_MOV; 8411 alu.dst.sel = ctx->bc->ar_reg; 8412 alu.dst.write = 1; 8413 for (i = 0; i <= lasti; ++i) { 8414 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8415 alu.dst.chan = i; 8416 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8417 alu.last = i == lasti; 8418 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8419 return r; 8420 } 8421 } 8422 break; 8423 default: 8424 assert(0); 8425 return -1; 8426 } 8427 8428 ctx->bc->ar_loaded = 0; 8429 return 0; 8430} 8431 8432static int tgsi_opdst(struct r600_shader_ctx *ctx) 8433{ 8434 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8435 struct r600_bytecode_alu alu; 8436 int i, r = 0; 8437 8438 for (i = 0; i < 4; i++) { 8439 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8440 8441 alu.op = ALU_OP2_MUL; 8442 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 8443 8444 if (i == 0 || i == 3) { 8445 alu.src[0].sel = V_SQ_ALU_SRC_1; 8446 } else { 8447 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8448 } 8449 8450 if (i == 0 || i == 2) { 8451 alu.src[1].sel = V_SQ_ALU_SRC_1; 8452 } else { 8453 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 8454 } 8455 if (i == 3) 8456 alu.last = 1; 8457 r = r600_bytecode_add_alu(ctx->bc, &alu); 8458 if (r) 8459 return r; 8460 } 8461 return 0; 8462} 8463 8464static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type) 8465{ 8466 struct r600_bytecode_alu alu; 8467 int r; 8468 8469 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8470 alu.op = opcode; 8471 alu.execute_mask = 1; 8472 alu.update_pred = 1; 8473 8474 alu.dst.sel = ctx->temp_reg; 8475 alu.dst.write = 1; 8476 alu.dst.chan = 0; 8477 8478 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8479 alu.src[1].sel = V_SQ_ALU_SRC_0; 8480 alu.src[1].chan = 0; 8481 8482 alu.last = 1; 8483 8484 r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); 8485 if (r) 8486 return r; 8487 return 0; 8488} 8489 8490static int pops(struct r600_shader_ctx *ctx, int pops) 8491{ 8492 unsigned force_pop = ctx->bc->force_add_cf; 8493 8494 if (!force_pop) { 8495 int alu_pop = 3; 8496 if (ctx->bc->cf_last) { 8497 if (ctx->bc->cf_last->op == CF_OP_ALU) 8498 alu_pop = 0; 8499 else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER) 8500 alu_pop = 1; 8501 } 8502 alu_pop += pops; 8503 if (alu_pop == 1) { 8504 ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER; 8505 ctx->bc->force_add_cf = 1; 8506 } else if (alu_pop == 2) { 8507 ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER; 8508 ctx->bc->force_add_cf = 1; 8509 } else { 8510 force_pop = 1; 8511 } 8512 } 8513 8514 if (force_pop) { 8515 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 8516 ctx->bc->cf_last->pop_count = pops; 8517 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 8518 } 8519 8520 return 0; 8521} 8522 8523static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, 8524 unsigned reason) 8525{ 8526 struct r600_stack_info *stack = &ctx->bc->stack; 8527 unsigned elements, entries; 8528 8529 unsigned entry_size = stack->entry_size; 8530 8531 elements = (stack->loop + stack->push_wqm ) * entry_size; 8532 elements += stack->push; 8533 8534 switch (ctx->bc->chip_class) { 8535 case R600: 8536 case R700: 8537 /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on 8538 * the stack must be reserved to hold the current active/continue 8539 * masks */ 8540 if (reason == FC_PUSH_VPM) { 8541 elements += 2; 8542 } 8543 break; 8544 8545 case CAYMAN: 8546 /* r9xx: any stack operation on empty stack consumes 2 additional 8547 * elements */ 8548 elements += 2; 8549 8550 /* fallthrough */ 8551 /* FIXME: do the two elements added above cover the cases for the 8552 * r8xx+ below? */ 8553 8554 case EVERGREEN: 8555 /* r8xx+: 2 extra elements are not always required, but one extra 8556 * element must be added for each of the following cases: 8557 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest 8558 * stack usage. 8559 * (Currently we don't use ALU_ELSE_AFTER.) 8560 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM 8561 * PUSH instruction executed. 8562 * 8563 * NOTE: it seems we also need to reserve additional element in some 8564 * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, 8565 * then STACK_SIZE should be 2 instead of 1 */ 8566 if (reason == FC_PUSH_VPM) { 8567 elements += 1; 8568 } 8569 break; 8570 8571 default: 8572 assert(0); 8573 break; 8574 } 8575 8576 /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4 8577 * for all chips, so we use 4 in the final formula, not the real entry_size 8578 * for the chip */ 8579 entry_size = 4; 8580 8581 entries = (elements + (entry_size - 1)) / entry_size; 8582 8583 if (entries > stack->max_entries) 8584 stack->max_entries = entries; 8585} 8586 8587static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) 8588{ 8589 switch(reason) { 8590 case FC_PUSH_VPM: 8591 --ctx->bc->stack.push; 8592 assert(ctx->bc->stack.push >= 0); 8593 break; 8594 case FC_PUSH_WQM: 8595 --ctx->bc->stack.push_wqm; 8596 assert(ctx->bc->stack.push_wqm >= 0); 8597 break; 8598 case FC_LOOP: 8599 --ctx->bc->stack.loop; 8600 assert(ctx->bc->stack.loop >= 0); 8601 break; 8602 default: 8603 assert(0); 8604 break; 8605 } 8606} 8607 8608static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) 8609{ 8610 switch (reason) { 8611 case FC_PUSH_VPM: 8612 ++ctx->bc->stack.push; 8613 break; 8614 case FC_PUSH_WQM: 8615 ++ctx->bc->stack.push_wqm; 8616 case FC_LOOP: 8617 ++ctx->bc->stack.loop; 8618 break; 8619 default: 8620 assert(0); 8621 } 8622 8623 callstack_update_max_depth(ctx, reason); 8624} 8625 8626static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 8627{ 8628 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 8629 8630 sp->mid = realloc((void *)sp->mid, 8631 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 8632 sp->mid[sp->num_mid] = ctx->bc->cf_last; 8633 sp->num_mid++; 8634} 8635 8636static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 8637{ 8638 ctx->bc->fc_sp++; 8639 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 8640 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 8641} 8642 8643static void fc_poplevel(struct r600_shader_ctx *ctx) 8644{ 8645 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 8646 free(sp->mid); 8647 sp->mid = NULL; 8648 sp->num_mid = 0; 8649 sp->start = NULL; 8650 sp->type = 0; 8651 ctx->bc->fc_sp--; 8652} 8653 8654#if 0 8655static int emit_return(struct r600_shader_ctx *ctx) 8656{ 8657 r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN)); 8658 return 0; 8659} 8660 8661static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 8662{ 8663 8664 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP)); 8665 ctx->bc->cf_last->pop_count = pops; 8666 /* XXX work out offset */ 8667 return 0; 8668} 8669 8670static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 8671{ 8672 return 0; 8673} 8674 8675static void emit_testflag(struct r600_shader_ctx *ctx) 8676{ 8677 8678} 8679 8680static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 8681{ 8682 emit_testflag(ctx); 8683 emit_jump_to_offset(ctx, 1, 4); 8684 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 8685 pops(ctx, ifidx + 1); 8686 emit_return(ctx); 8687} 8688 8689static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 8690{ 8691 emit_testflag(ctx); 8692 8693 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8694 ctx->bc->cf_last->pop_count = 1; 8695 8696 fc_set_mid(ctx, fc_sp); 8697 8698 pops(ctx, 1); 8699} 8700#endif 8701 8702static int emit_if(struct r600_shader_ctx *ctx, int opcode) 8703{ 8704 int alu_type = CF_OP_ALU_PUSH_BEFORE; 8705 8706 /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by 8707 * LOOP_STARTxxx for nested loops may put the branch stack into a state 8708 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this 8709 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ 8710 if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) { 8711 r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); 8712 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 8713 alu_type = CF_OP_ALU; 8714 } 8715 8716 emit_logic_pred(ctx, opcode, alu_type); 8717 8718 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 8719 8720 fc_pushlevel(ctx, FC_IF); 8721 8722 callstack_push(ctx, FC_PUSH_VPM); 8723 return 0; 8724} 8725 8726static int tgsi_if(struct r600_shader_ctx *ctx) 8727{ 8728 return emit_if(ctx, ALU_OP2_PRED_SETNE); 8729} 8730 8731static int tgsi_uif(struct r600_shader_ctx *ctx) 8732{ 8733 return emit_if(ctx, ALU_OP2_PRED_SETNE_INT); 8734} 8735 8736static int tgsi_else(struct r600_shader_ctx *ctx) 8737{ 8738 r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); 8739 ctx->bc->cf_last->pop_count = 1; 8740 8741 fc_set_mid(ctx, ctx->bc->fc_sp); 8742 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 8743 return 0; 8744} 8745 8746static int tgsi_endif(struct r600_shader_ctx *ctx) 8747{ 8748 pops(ctx, 1); 8749 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 8750 R600_ERR("if/endif unbalanced in shader\n"); 8751 return -1; 8752 } 8753 8754 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 8755 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 8756 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 8757 } else { 8758 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 8759 } 8760 fc_poplevel(ctx); 8761 8762 callstack_pop(ctx, FC_PUSH_VPM); 8763 return 0; 8764} 8765 8766static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 8767{ 8768 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not 8769 * limited to 4096 iterations, like the other LOOP_* instructions. */ 8770 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10); 8771 8772 fc_pushlevel(ctx, FC_LOOP); 8773 8774 /* check stack depth */ 8775 callstack_push(ctx, FC_LOOP); 8776 return 0; 8777} 8778 8779static int tgsi_endloop(struct r600_shader_ctx *ctx) 8780{ 8781 int i; 8782 8783 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); 8784 8785 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 8786 R600_ERR("loop/endloop in shader code are not paired.\n"); 8787 return -EINVAL; 8788 } 8789 8790 /* fixup loop pointers - from r600isa 8791 LOOP END points to CF after LOOP START, 8792 LOOP START point to CF after LOOP END 8793 BRK/CONT point to LOOP END CF 8794 */ 8795 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 8796 8797 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 8798 8799 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 8800 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 8801 } 8802 /* XXX add LOOPRET support */ 8803 fc_poplevel(ctx); 8804 callstack_pop(ctx, FC_LOOP); 8805 return 0; 8806} 8807 8808static int tgsi_loop_breakc(struct r600_shader_ctx *ctx) 8809{ 8810 int r; 8811 unsigned int fscp; 8812 8813 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 8814 { 8815 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 8816 break; 8817 } 8818 if (fscp == 0) { 8819 R600_ERR("BREAKC not inside loop/endloop pair\n"); 8820 return -EINVAL; 8821 } 8822 8823 if (ctx->bc->chip_class == EVERGREEN && 8824 ctx->bc->family != CHIP_CYPRESS && 8825 ctx->bc->family != CHIP_JUNIPER) { 8826 /* HW bug: ALU_BREAK does not save the active mask correctly */ 8827 r = tgsi_uif(ctx); 8828 if (r) 8829 return r; 8830 8831 r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK); 8832 if (r) 8833 return r; 8834 fc_set_mid(ctx, fscp); 8835 8836 return tgsi_endif(ctx); 8837 } else { 8838 r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK); 8839 if (r) 8840 return r; 8841 fc_set_mid(ctx, fscp); 8842 } 8843 8844 return 0; 8845} 8846 8847static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 8848{ 8849 unsigned int fscp; 8850 8851 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 8852 { 8853 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 8854 break; 8855 } 8856 8857 if (fscp == 0) { 8858 R600_ERR("Break not inside loop/endloop pair\n"); 8859 return -EINVAL; 8860 } 8861 8862 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8863 8864 fc_set_mid(ctx, fscp); 8865 8866 return 0; 8867} 8868 8869static int tgsi_gs_emit(struct r600_shader_ctx *ctx) 8870{ 8871 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8872 int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX]; 8873 int r; 8874 8875 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 8876 emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE); 8877 8878 r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8879 if (!r) { 8880 ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream 8881 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 8882 return emit_inc_ring_offset(ctx, stream, TRUE); 8883 } 8884 return r; 8885} 8886 8887static int tgsi_umad(struct r600_shader_ctx *ctx) 8888{ 8889 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8890 struct r600_bytecode_alu alu; 8891 int i, j, k, r; 8892 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8893 8894 /* src0 * src1 */ 8895 for (i = 0; i < lasti + 1; i++) { 8896 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8897 continue; 8898 8899 if (ctx->bc->chip_class == CAYMAN) { 8900 for (j = 0 ; j < 4; j++) { 8901 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8902 8903 alu.op = ALU_OP2_MULLO_UINT; 8904 for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { 8905 r600_bytecode_src(&alu.src[k], &ctx->src[k], i); 8906 } 8907 alu.dst.chan = j; 8908 alu.dst.sel = ctx->temp_reg; 8909 alu.dst.write = (j == i); 8910 if (j == 3) 8911 alu.last = 1; 8912 r = r600_bytecode_add_alu(ctx->bc, &alu); 8913 if (r) 8914 return r; 8915 } 8916 } else { 8917 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8918 8919 alu.dst.chan = i; 8920 alu.dst.sel = ctx->temp_reg; 8921 alu.dst.write = 1; 8922 8923 alu.op = ALU_OP2_MULLO_UINT; 8924 for (j = 0; j < 2; j++) { 8925 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 8926 } 8927 8928 alu.last = 1; 8929 r = r600_bytecode_add_alu(ctx->bc, &alu); 8930 if (r) 8931 return r; 8932 } 8933 } 8934 8935 8936 for (i = 0; i < lasti + 1; i++) { 8937 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8938 continue; 8939 8940 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8941 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 8942 8943 alu.op = ALU_OP2_ADD_INT; 8944 8945 alu.src[0].sel = ctx->temp_reg; 8946 alu.src[0].chan = i; 8947 8948 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 8949 if (i == lasti) { 8950 alu.last = 1; 8951 } 8952 r = r600_bytecode_add_alu(ctx->bc, &alu); 8953 if (r) 8954 return r; 8955 } 8956 return 0; 8957} 8958 8959static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 8960 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, 8961 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 8962 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 8963 8964 /* XXX: 8965 * For state trackers other than OpenGL, we'll want to use 8966 * _RECIP_IEEE instead. 8967 */ 8968 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 8969 8970 [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, 8971 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 8972 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 8973 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 8974 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 8975 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 8976 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 8977 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 8978 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 8979 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 8980 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 8981 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 8982 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 8983 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 8984 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 8985 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 8986 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 8987 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 8988 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 8989 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 8990 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 8991 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 8992 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 8993 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 8994 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 8995 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 8996 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 8997 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 8998 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 8999 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 9000 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 9001 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 9002 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 9003 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9004 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9005 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 9006 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9007 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9008 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9009 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9010 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 9011 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 9012 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 9013 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 9014 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 9015 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 9016 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 9017 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 9018 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 9019 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 9020 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 9021 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9022 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9023 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9024 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9025 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 9026 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 9027 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_r600_arl}, 9028 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 9029 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 9030 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 9031 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 9032 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 9033 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 9034 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9035 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 9036 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 9037 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 9038 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9039 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 9040 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 9041 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 9042 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 9043 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 9044 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 9045 [TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 9046 [TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 9047 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 9048 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 9049 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 9050 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 9051 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 9052 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 9053 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2_trans}, 9054 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9055 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9056 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9057 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9058 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9059 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9060 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9061 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9062 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9063 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9064 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9065 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9066 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9067 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9068 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9069 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9070 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9071 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 9072 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9073 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9074 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9075 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9076 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9077 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9078 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 9079 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9080 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9081 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc}, 9082 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9083 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9084 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 9085 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, 9086 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9087 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9088 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9089 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9090 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9091 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2_trans}, 9092 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9093 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, 9094 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 9095 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9096 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9097 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9098 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9099 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9100 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9101 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 9102 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9103 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9104 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2_trans}, 9105 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9106 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2_swap}, 9107 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9108 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9109 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9110 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9111 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9112 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9113 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9114 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9115 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9116 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9117 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9118 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9119 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9120 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9121 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9122 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9123 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_r600_arl}, 9124 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9125 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9126 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9127 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9128 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9129 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9130 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9131 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9132 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 9133 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9134 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9135 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9136 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9137 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9138 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9139 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9140 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9141 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9142 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9143 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9144 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9145 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9146 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 9147 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 9148 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_unsupported}, 9149 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_unsupported}, 9150 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_unsupported}, 9151 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_unsupported}, 9152 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_unsupported}, 9153 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_unsupported}, 9154 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_unsupported}, 9155 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_unsupported}, 9156 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_unsupported}, 9157 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_unsupported}, 9158 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_unsupported}, 9159 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_unsupported}, 9160 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_unsupported}, 9161 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9162}; 9163 9164static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 9165 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 9166 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 9167 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 9168 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 9169 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, 9170 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 9171 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 9172 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 9173 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 9174 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 9175 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 9176 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 9177 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 9178 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 9179 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 9180 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 9181 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 9182 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 9183 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 9184 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 9185 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 9186 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 9187 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 9188 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 9189 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 9190 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 9191 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 9192 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 9193 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 9194 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 9195 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 9196 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 9197 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 9198 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 9199 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 9200 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 9201 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 9202 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9203 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9204 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 9205 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9206 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9207 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9208 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9209 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 9210 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 9211 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 9212 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 9213 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 9214 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 9215 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 9216 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 9217 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 9218 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 9219 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 9220 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9221 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9222 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9223 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9224 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 9225 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 9226 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 9227 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 9228 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 9229 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 9230 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 9231 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 9232 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 9233 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9234 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 9235 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 9236 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 9237 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9238 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 9239 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 9240 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 9241 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 9242 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 9243 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 9244 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9245 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9246 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 9247 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 9248 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 9249 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 9250 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 9251 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 9252 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 9253 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9254 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9255 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9256 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9257 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9258 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9259 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9260 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9261 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9262 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9263 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9264 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9265 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9266 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9267 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9268 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9269 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9270 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 9271 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9272 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9273 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9274 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9275 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9276 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9277 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 9278 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9279 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9280 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 9281 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9282 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9283 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 9284 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i}, 9285 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9286 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9287 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9288 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9289 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9290 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 9291 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9292 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_f2i}, 9293 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 9294 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9295 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9296 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9297 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9298 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9299 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9300 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 9301 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9302 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9303 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 9304 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9305 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 9306 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9307 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9308 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9309 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9310 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9311 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9312 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9313 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9314 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9315 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9316 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9317 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9318 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9319 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9320 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9321 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9322 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 9323 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9324 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9325 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9326 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9327 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9328 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9329 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9330 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9331 [TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 9332 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9333 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9334 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9335 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9336 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9337 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9338 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9339 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9340 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9341 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9342 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9343 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9344 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9345 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 9346 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 9347 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 9348 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 9349 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 9350 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 9351 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 9352 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 9353 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 9354 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 9355 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 9356 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 9357 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9358 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9359 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9360 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 9361 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 9362 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 9363 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 9364 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 9365 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 9366 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 9367 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 9368 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 9369 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 9370 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 9371 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 9372 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 9373 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 9374 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 9375 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 9376 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 9377 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 9378 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 9379 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 9380 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 9381 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 9382 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 9383 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9384}; 9385 9386static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 9387 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 9388 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 9389 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 9390 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, 9391 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, 9392 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 9393 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 9394 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 9395 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 9396 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 9397 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 9398 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 9399 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 9400 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 9401 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 9402 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 9403 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 9404 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 9405 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 9406 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 9407 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, 9408 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 9409 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 9410 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 9411 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 9412 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 9413 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 9414 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 9415 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, 9416 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, 9417 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, 9418 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 9419 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 9420 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 9421 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 9422 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 9423 [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig}, 9424 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9425 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9426 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 9427 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9428 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9429 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9430 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9431 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 9432 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 9433 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 9434 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 9435 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, cayman_trig}, 9436 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 9437 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 9438 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 9439 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 9440 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 9441 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 9442 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9443 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9444 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9445 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9446 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 9447 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 9448 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 9449 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 9450 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 9451 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 9452 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 9453 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 9454 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 9455 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9456 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 9457 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 9458 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 9459 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9460 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 9461 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 9462 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 9463 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 9464 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 9465 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 9466 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9467 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9468 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 9469 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 9470 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 9471 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2}, 9472 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 9473 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 9474 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 9475 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9476 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9477 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9478 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9479 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9480 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9481 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9482 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9483 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9484 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9485 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9486 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9487 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9488 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9489 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9490 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9491 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9492 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 9493 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9494 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9495 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9496 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9497 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9498 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9499 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 9500 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9501 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9502 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 9503 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9504 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9505 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 9506 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2}, 9507 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9508 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9509 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9510 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9511 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9512 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 9513 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9514 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2}, 9515 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2}, 9516 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9517 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9518 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9519 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9520 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9521 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9522 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_INT, cayman_mul_int_instr}, 9523 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9524 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9525 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 9526 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9527 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 9528 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9529 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9530 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9531 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9532 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9533 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9534 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9535 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9536 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9537 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9538 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9539 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9540 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9541 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9542 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9543 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9544 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 9545 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9546 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9547 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9548 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9549 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9550 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9551 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9552 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9553 [TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 9554 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9555 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9556 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9557 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9558 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9559 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9560 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9561 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9562 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9563 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9564 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9565 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9566 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9567 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, cayman_mul_int_instr}, 9568 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, cayman_mul_int_instr}, 9569 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 9570 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 9571 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 9572 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 9573 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 9574 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 9575 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 9576 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 9577 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 9578 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 9579 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9580 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9581 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9582 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 9583 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 9584 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 9585 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 9586 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 9587 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 9588 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 9589 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 9590 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 9591 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 9592 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 9593 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 9594 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 9595 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 9596 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 9597 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 9598 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 9599 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 9600 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 9601 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 9602 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 9603 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 9604 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 9605 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9606}; 9607