r600_shader.c revision f9caabe8f1bff86d19b53d9ecba5c72b238d9e23
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "r600_sq.h" 24#include "r600_llvm.h" 25#include "r600_formats.h" 26#include "r600_opcodes.h" 27#include "r600_shader.h" 28#include "r600d.h" 29 30#include "sb/sb_public.h" 31 32#include "pipe/p_shader_tokens.h" 33#include "tgsi/tgsi_info.h" 34#include "tgsi/tgsi_parse.h" 35#include "tgsi/tgsi_scan.h" 36#include "tgsi/tgsi_dump.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39#include <stdio.h> 40#include <errno.h> 41 42/* CAYMAN notes 43Why CAYMAN got loops for lots of instructions is explained here. 44 45-These 8xx t-slot only ops are implemented in all vector slots. 46MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 47These 8xx t-slot only opcodes become vector ops, with all four 48slots expecting the arguments on sources a and b. Result is 49broadcast to all channels. 50MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64 51These 8xx t-slot only opcodes become vector ops in the z, y, and 52x slots. 53EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 54RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 55SQRT_IEEE/_64 56SIN/COS 57The w slot may have an independent co-issued operation, or if the 58result is required to be in the w slot, the opcode above may be 59issued in the w slot as well. 60The compiler must issue the source argument to slots z, y, and x 61*/ 62 63#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) 64static int r600_shader_from_tgsi(struct r600_context *rctx, 65 struct r600_pipe_shader *pipeshader, 66 union r600_shader_key key); 67 68 69static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, 70 int size, unsigned comp_mask) { 71 72 if (!size) 73 return; 74 75 if (ps->num_arrays == ps->max_arrays) { 76 ps->max_arrays += 64; 77 ps->arrays = realloc(ps->arrays, ps->max_arrays * 78 sizeof(struct r600_shader_array)); 79 } 80 81 int n = ps->num_arrays; 82 ++ps->num_arrays; 83 84 ps->arrays[n].comp_mask = comp_mask; 85 ps->arrays[n].gpr_start = start_gpr; 86 ps->arrays[n].gpr_count = size; 87} 88 89static void r600_dump_streamout(struct pipe_stream_output_info *so) 90{ 91 unsigned i; 92 93 fprintf(stderr, "STREAMOUT\n"); 94 for (i = 0; i < so->num_outputs; i++) { 95 unsigned mask = ((1 << so->output[i].num_components) - 1) << 96 so->output[i].start_component; 97 fprintf(stderr, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", 98 i, 99 so->output[i].stream, 100 so->output[i].output_buffer, 101 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, 102 so->output[i].register_index, 103 mask & 1 ? "x" : "", 104 mask & 2 ? "y" : "", 105 mask & 4 ? "z" : "", 106 mask & 8 ? "w" : "", 107 so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); 108 } 109} 110 111static int store_shader(struct pipe_context *ctx, 112 struct r600_pipe_shader *shader) 113{ 114 struct r600_context *rctx = (struct r600_context *)ctx; 115 uint32_t *ptr, i; 116 117 if (shader->bo == NULL) { 118 shader->bo = (struct r600_resource*) 119 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); 120 if (shader->bo == NULL) { 121 return -ENOMEM; 122 } 123 ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); 124 if (R600_BIG_ENDIAN) { 125 for (i = 0; i < shader->shader.bc.ndw; ++i) { 126 ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); 127 } 128 } else { 129 memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); 130 } 131 rctx->b.ws->buffer_unmap(shader->bo->cs_buf); 132 } 133 134 return 0; 135} 136 137int r600_pipe_shader_create(struct pipe_context *ctx, 138 struct r600_pipe_shader *shader, 139 union r600_shader_key key) 140{ 141 struct r600_context *rctx = (struct r600_context *)ctx; 142 struct r600_pipe_shader_selector *sel = shader->selector; 143 int r; 144 bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens); 145 unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); 146 unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 147 unsigned export_shader; 148 149 shader->shader.bc.isa = rctx->isa; 150 151 if (dump) { 152 fprintf(stderr, "--------------------------------------------------------------\n"); 153 tgsi_dump(sel->tokens, 0); 154 155 if (sel->so.num_outputs) { 156 r600_dump_streamout(&sel->so); 157 } 158 } 159 r = r600_shader_from_tgsi(rctx, shader, key); 160 if (r) { 161 R600_ERR("translation from TGSI failed !\n"); 162 goto error; 163 } 164 165 /* disable SB for geom shaders on R6xx/R7xx due to some mysterious gs piglit regressions with it enabled. */ 166 if (rctx->b.chip_class <= R700) { 167 use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY); 168 } 169 /* disable SB for shaders using CF_INDEX_0/1 (sampler/ubo array indexing) as it doesn't handle those currently */ 170 use_sb &= !shader->shader.uses_index_registers; 171 /* disable SB for shaders using doubles */ 172 use_sb &= !shader->shader.uses_doubles; 173 174 /* Check if the bytecode has already been built. When using the llvm 175 * backend, r600_shader_from_tgsi() will take care of building the 176 * bytecode. 177 */ 178 if (!shader->shader.bc.bytecode) { 179 r = r600_bytecode_build(&shader->shader.bc); 180 if (r) { 181 R600_ERR("building bytecode failed !\n"); 182 goto error; 183 } 184 } 185 186 if (dump && !sb_disasm) { 187 fprintf(stderr, "--------------------------------------------------------------\n"); 188 r600_bytecode_disasm(&shader->shader.bc); 189 fprintf(stderr, "______________________________________________________________\n"); 190 } else if ((dump && sb_disasm) || use_sb) { 191 r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, 192 dump, use_sb); 193 if (r) { 194 R600_ERR("r600_sb_bytecode_process failed !\n"); 195 goto error; 196 } 197 } 198 199 if (shader->gs_copy_shader) { 200 if (dump) { 201 // dump copy shader 202 r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, 203 &shader->gs_copy_shader->shader, dump, 0); 204 if (r) 205 goto error; 206 } 207 208 if ((r = store_shader(ctx, shader->gs_copy_shader))) 209 goto error; 210 } 211 212 /* Store the shader in a buffer. */ 213 if ((r = store_shader(ctx, shader))) 214 goto error; 215 216 /* Build state. */ 217 switch (shader->shader.processor_type) { 218 case TGSI_PROCESSOR_GEOMETRY: 219 if (rctx->b.chip_class >= EVERGREEN) { 220 evergreen_update_gs_state(ctx, shader); 221 evergreen_update_vs_state(ctx, shader->gs_copy_shader); 222 } else { 223 r600_update_gs_state(ctx, shader); 224 r600_update_vs_state(ctx, shader->gs_copy_shader); 225 } 226 break; 227 case TGSI_PROCESSOR_VERTEX: 228 export_shader = key.vs.as_es; 229 if (rctx->b.chip_class >= EVERGREEN) { 230 if (export_shader) 231 evergreen_update_es_state(ctx, shader); 232 else 233 evergreen_update_vs_state(ctx, shader); 234 } else { 235 if (export_shader) 236 r600_update_es_state(ctx, shader); 237 else 238 r600_update_vs_state(ctx, shader); 239 } 240 break; 241 case TGSI_PROCESSOR_FRAGMENT: 242 if (rctx->b.chip_class >= EVERGREEN) { 243 evergreen_update_ps_state(ctx, shader); 244 } else { 245 r600_update_ps_state(ctx, shader); 246 } 247 break; 248 default: 249 r = -EINVAL; 250 goto error; 251 } 252 return 0; 253 254error: 255 r600_pipe_shader_destroy(ctx, shader); 256 return r; 257} 258 259void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 260{ 261 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 262 r600_bytecode_clear(&shader->shader.bc); 263 r600_release_command_buffer(&shader->command_buffer); 264} 265 266/* 267 * tgsi -> r600 shader 268 */ 269struct r600_shader_tgsi_instruction; 270 271struct r600_shader_src { 272 unsigned sel; 273 unsigned swizzle[4]; 274 unsigned neg; 275 unsigned abs; 276 unsigned rel; 277 unsigned kc_bank; 278 boolean kc_rel; /* true if cache bank is indexed */ 279 uint32_t value[4]; 280}; 281 282struct eg_interp { 283 boolean enabled; 284 unsigned ij_index; 285}; 286 287struct r600_shader_ctx { 288 struct tgsi_shader_info info; 289 struct tgsi_parse_context parse; 290 const struct tgsi_token *tokens; 291 unsigned type; 292 unsigned file_offset[TGSI_FILE_COUNT]; 293 unsigned temp_reg; 294 const struct r600_shader_tgsi_instruction *inst_info; 295 struct r600_bytecode *bc; 296 struct r600_shader *shader; 297 struct r600_shader_src src[4]; 298 uint32_t *literals; 299 uint32_t nliterals; 300 uint32_t max_driver_temp_used; 301 boolean use_llvm; 302 /* needed for evergreen interpolation */ 303 struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid 304 /* evergreen/cayman also store sample mask in face register */ 305 int face_gpr; 306 /* sample id is .w component stored in fixed point position register */ 307 int fixed_pt_position_gpr; 308 int colors_used; 309 boolean clip_vertex_write; 310 unsigned cv_output; 311 unsigned edgeflag_output; 312 int fragcoord_input; 313 int native_integers; 314 int next_ring_offset; 315 int gs_out_ring_offset; 316 int gs_next_vertex; 317 struct r600_shader *gs_for_vs; 318 int gs_export_gpr_tregs[4]; 319 const struct pipe_stream_output_info *gs_stream_output_info; 320 unsigned enabled_stream_buffers_mask; 321}; 322 323struct r600_shader_tgsi_instruction { 324 unsigned op; 325 int (*process)(struct r600_shader_ctx *ctx); 326}; 327 328static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind); 329static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 330static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 331static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); 332static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 333static int tgsi_else(struct r600_shader_ctx *ctx); 334static int tgsi_endif(struct r600_shader_ctx *ctx); 335static int tgsi_bgnloop(struct r600_shader_ctx *ctx); 336static int tgsi_endloop(struct r600_shader_ctx *ctx); 337static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 338static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 339 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 340 unsigned int dst_reg); 341static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 342 const struct r600_shader_src *shader_src, 343 unsigned chan); 344 345static int tgsi_is_supported(struct r600_shader_ctx *ctx) 346{ 347 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 348 int j; 349 350 if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) { 351 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 352 return -EINVAL; 353 } 354 if (i->Instruction.Predicate) { 355 R600_ERR("predicate unsupported\n"); 356 return -EINVAL; 357 } 358#if 0 359 if (i->Instruction.Label) { 360 R600_ERR("label unsupported\n"); 361 return -EINVAL; 362 } 363#endif 364 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 365 if (i->Src[j].Register.Dimension) { 366 switch (i->Src[j].Register.File) { 367 case TGSI_FILE_CONSTANT: 368 break; 369 case TGSI_FILE_INPUT: 370 if (ctx->type == TGSI_PROCESSOR_GEOMETRY) 371 break; 372 default: 373 R600_ERR("unsupported src %d (dimension %d)\n", j, 374 i->Src[j].Register.Dimension); 375 return -EINVAL; 376 } 377 } 378 } 379 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 380 if (i->Dst[j].Register.Dimension) { 381 R600_ERR("unsupported dst (dimension)\n"); 382 return -EINVAL; 383 } 384 } 385 return 0; 386} 387 388int eg_get_interpolator_index(unsigned interpolate, unsigned location) 389{ 390 if (interpolate == TGSI_INTERPOLATE_COLOR || 391 interpolate == TGSI_INTERPOLATE_LINEAR || 392 interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 393 { 394 int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR; 395 int loc; 396 397 switch(location) { 398 case TGSI_INTERPOLATE_LOC_CENTER: 399 loc = 1; 400 break; 401 case TGSI_INTERPOLATE_LOC_CENTROID: 402 loc = 2; 403 break; 404 case TGSI_INTERPOLATE_LOC_SAMPLE: 405 default: 406 loc = 0; break; 407 } 408 409 return is_linear * 3 + loc; 410 } 411 412 return -1; 413} 414 415static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, 416 int input) 417{ 418 int i = eg_get_interpolator_index( 419 ctx->shader->input[input].interpolate, 420 ctx->shader->input[input].interpolate_location); 421 assert(i >= 0); 422 ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index; 423} 424 425static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 426{ 427 int i, r; 428 struct r600_bytecode_alu alu; 429 int gpr = 0, base_chan = 0; 430 int ij_index = ctx->shader->input[input].ij_index; 431 432 /* work out gpr and base_chan from index */ 433 gpr = ij_index / 2; 434 base_chan = (2 * (ij_index % 2)) + 1; 435 436 for (i = 0; i < 8; i++) { 437 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 438 439 if (i < 4) 440 alu.op = ALU_OP2_INTERP_ZW; 441 else 442 alu.op = ALU_OP2_INTERP_XY; 443 444 if ((i > 1) && (i < 6)) { 445 alu.dst.sel = ctx->shader->input[input].gpr; 446 alu.dst.write = 1; 447 } 448 449 alu.dst.chan = i % 4; 450 451 alu.src[0].sel = gpr; 452 alu.src[0].chan = (base_chan - (i % 2)); 453 454 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 455 456 alu.bank_swizzle_force = SQ_ALU_VEC_210; 457 if ((i % 4) == 3) 458 alu.last = 1; 459 r = r600_bytecode_add_alu(ctx->bc, &alu); 460 if (r) 461 return r; 462 } 463 return 0; 464} 465 466static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 467{ 468 int i, r; 469 struct r600_bytecode_alu alu; 470 471 for (i = 0; i < 4; i++) { 472 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 473 474 alu.op = ALU_OP1_INTERP_LOAD_P0; 475 476 alu.dst.sel = ctx->shader->input[input].gpr; 477 alu.dst.write = 1; 478 479 alu.dst.chan = i; 480 481 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 482 alu.src[0].chan = i; 483 484 if (i == 3) 485 alu.last = 1; 486 r = r600_bytecode_add_alu(ctx->bc, &alu); 487 if (r) 488 return r; 489 } 490 return 0; 491} 492 493/* 494 * Special export handling in shaders 495 * 496 * shader export ARRAY_BASE for EXPORT_POS: 497 * 60 is position 498 * 61 is misc vector 499 * 62, 63 are clip distance vectors 500 * 501 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 502 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 503 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 504 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 505 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 506 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 507 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 508 * exclusive from render target index) 509 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 510 * 511 * 512 * shader export ARRAY_BASE for EXPORT_PIXEL: 513 * 0-7 CB targets 514 * 61 computed Z vector 515 * 516 * The use of the values exported in the computed Z vector are controlled 517 * by DB_SHADER_CONTROL: 518 * Z_EXPORT_ENABLE - Z as a float in RED 519 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 520 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 521 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 522 * DB_SOURCE_FORMAT - export control restrictions 523 * 524 */ 525 526 527/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 528static int r600_spi_sid(struct r600_shader_io * io) 529{ 530 int index, name = io->name; 531 532 /* These params are handled differently, they don't need 533 * semantic indices, so we'll use 0 for them. 534 */ 535 if (name == TGSI_SEMANTIC_POSITION || 536 name == TGSI_SEMANTIC_PSIZE || 537 name == TGSI_SEMANTIC_EDGEFLAG || 538 name == TGSI_SEMANTIC_FACE || 539 name == TGSI_SEMANTIC_SAMPLEMASK) 540 index = 0; 541 else { 542 if (name == TGSI_SEMANTIC_GENERIC) { 543 /* For generic params simply use sid from tgsi */ 544 index = io->sid; 545 } else { 546 /* For non-generic params - pack name and sid into 8 bits */ 547 index = 0x80 | (name<<3) | (io->sid); 548 } 549 550 /* Make sure that all really used indices have nonzero value, so 551 * we can just compare it to 0 later instead of comparing the name 552 * with different values to detect special cases. */ 553 index++; 554 } 555 556 return index; 557}; 558 559/* turn input into interpolate on EG */ 560static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 561{ 562 int r = 0; 563 564 if (ctx->shader->input[index].spi_sid) { 565 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 566 if (ctx->shader->input[index].interpolate > 0) { 567 evergreen_interp_assign_ij_index(ctx, index); 568 if (!ctx->use_llvm) 569 r = evergreen_interp_alu(ctx, index); 570 } else { 571 if (!ctx->use_llvm) 572 r = evergreen_interp_flat(ctx, index); 573 } 574 } 575 return r; 576} 577 578static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 579{ 580 struct r600_bytecode_alu alu; 581 int i, r; 582 int gpr_front = ctx->shader->input[front].gpr; 583 int gpr_back = ctx->shader->input[back].gpr; 584 585 for (i = 0; i < 4; i++) { 586 memset(&alu, 0, sizeof(alu)); 587 alu.op = ALU_OP3_CNDGT; 588 alu.is_op3 = 1; 589 alu.dst.write = 1; 590 alu.dst.sel = gpr_front; 591 alu.src[0].sel = ctx->face_gpr; 592 alu.src[1].sel = gpr_front; 593 alu.src[2].sel = gpr_back; 594 595 alu.dst.chan = i; 596 alu.src[1].chan = i; 597 alu.src[2].chan = i; 598 alu.last = (i==3); 599 600 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 601 return r; 602 } 603 604 return 0; 605} 606 607static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid) 608{ 609 int i; 610 i = ctx->shader->noutput++; 611 ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID; 612 ctx->shader->output[i].sid = 0; 613 ctx->shader->output[i].gpr = 0; 614 ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT; 615 ctx->shader->output[i].write_mask = 0x4; 616 ctx->shader->output[i].spi_sid = prim_id_sid; 617 618 return 0; 619} 620 621static int tgsi_declaration(struct r600_shader_ctx *ctx) 622{ 623 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 624 int r, i, j, count = d->Range.Last - d->Range.First + 1; 625 626 switch (d->Declaration.File) { 627 case TGSI_FILE_INPUT: 628 for (j = 0; j < count; j++) { 629 i = ctx->shader->ninput + j; 630 assert(i < Elements(ctx->shader->input)); 631 ctx->shader->input[i].name = d->Semantic.Name; 632 ctx->shader->input[i].sid = d->Semantic.Index + j; 633 ctx->shader->input[i].interpolate = d->Interp.Interpolate; 634 ctx->shader->input[i].interpolate_location = d->Interp.Location; 635 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j; 636 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 637 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 638 switch (ctx->shader->input[i].name) { 639 case TGSI_SEMANTIC_FACE: 640 if (ctx->face_gpr != -1) 641 ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ 642 else 643 ctx->face_gpr = ctx->shader->input[i].gpr; 644 break; 645 case TGSI_SEMANTIC_COLOR: 646 ctx->colors_used++; 647 break; 648 case TGSI_SEMANTIC_POSITION: 649 ctx->fragcoord_input = i; 650 break; 651 case TGSI_SEMANTIC_PRIMID: 652 /* set this for now */ 653 ctx->shader->gs_prim_id_input = true; 654 ctx->shader->ps_prim_id_input = i; 655 break; 656 } 657 if (ctx->bc->chip_class >= EVERGREEN) { 658 if ((r = evergreen_interp_input(ctx, i))) 659 return r; 660 } 661 } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 662 /* FIXME probably skip inputs if they aren't passed in the ring */ 663 ctx->shader->input[i].ring_offset = ctx->next_ring_offset; 664 ctx->next_ring_offset += 16; 665 if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) 666 ctx->shader->gs_prim_id_input = true; 667 } 668 } 669 ctx->shader->ninput += count; 670 break; 671 case TGSI_FILE_OUTPUT: 672 for (j = 0; j < count; j++) { 673 i = ctx->shader->noutput + j; 674 assert(i < Elements(ctx->shader->output)); 675 ctx->shader->output[i].name = d->Semantic.Name; 676 ctx->shader->output[i].sid = d->Semantic.Index + j; 677 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j; 678 ctx->shader->output[i].interpolate = d->Interp.Interpolate; 679 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 680 if (ctx->type == TGSI_PROCESSOR_VERTEX || 681 ctx->type == TGSI_PROCESSOR_GEOMETRY) { 682 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 683 switch (d->Semantic.Name) { 684 case TGSI_SEMANTIC_CLIPDIST: 685 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << 686 ((d->Semantic.Index + j) << 2); 687 break; 688 case TGSI_SEMANTIC_PSIZE: 689 ctx->shader->vs_out_misc_write = 1; 690 ctx->shader->vs_out_point_size = 1; 691 break; 692 case TGSI_SEMANTIC_EDGEFLAG: 693 ctx->shader->vs_out_misc_write = 1; 694 ctx->shader->vs_out_edgeflag = 1; 695 ctx->edgeflag_output = i; 696 break; 697 case TGSI_SEMANTIC_VIEWPORT_INDEX: 698 ctx->shader->vs_out_misc_write = 1; 699 ctx->shader->vs_out_viewport = 1; 700 break; 701 case TGSI_SEMANTIC_LAYER: 702 ctx->shader->vs_out_misc_write = 1; 703 ctx->shader->vs_out_layer = 1; 704 break; 705 case TGSI_SEMANTIC_CLIPVERTEX: 706 ctx->clip_vertex_write = TRUE; 707 ctx->cv_output = i; 708 break; 709 } 710 if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 711 ctx->gs_out_ring_offset += 16; 712 } 713 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 714 switch (d->Semantic.Name) { 715 case TGSI_SEMANTIC_COLOR: 716 ctx->shader->nr_ps_max_color_exports++; 717 break; 718 } 719 } 720 } 721 ctx->shader->noutput += count; 722 break; 723 case TGSI_FILE_TEMPORARY: 724 if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 725 if (d->Array.ArrayID) { 726 r600_add_gpr_array(ctx->shader, 727 ctx->file_offset[TGSI_FILE_TEMPORARY] + 728 d->Range.First, 729 d->Range.Last - d->Range.First + 1, 0x0F); 730 } 731 } 732 break; 733 734 case TGSI_FILE_CONSTANT: 735 case TGSI_FILE_SAMPLER: 736 case TGSI_FILE_SAMPLER_VIEW: 737 case TGSI_FILE_ADDRESS: 738 break; 739 740 case TGSI_FILE_SYSTEM_VALUE: 741 if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK || 742 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID || 743 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) { 744 break; /* Already handled from allocate_system_value_inputs */ 745 } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 746 if (!ctx->native_integers) { 747 struct r600_bytecode_alu alu; 748 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 749 750 alu.op = ALU_OP1_INT_TO_FLT; 751 alu.src[0].sel = 0; 752 alu.src[0].chan = 3; 753 754 alu.dst.sel = 0; 755 alu.dst.chan = 3; 756 alu.dst.write = 1; 757 alu.last = 1; 758 759 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 760 return r; 761 } 762 break; 763 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 764 break; 765 else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID) 766 break; 767 default: 768 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 769 return -EINVAL; 770 } 771 return 0; 772} 773 774static int r600_get_temp(struct r600_shader_ctx *ctx) 775{ 776 return ctx->temp_reg + ctx->max_driver_temp_used++; 777} 778 779static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset) 780{ 781 struct tgsi_parse_context parse; 782 struct { 783 boolean enabled; 784 int *reg; 785 unsigned name, alternate_name; 786 } inputs[2] = { 787 { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */ 788 789 { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */ 790 }; 791 int i, k, num_regs = 0; 792 793 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 794 return 0; 795 } 796 797 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 798 while (!tgsi_parse_end_of_tokens(&parse)) { 799 tgsi_parse_token(&parse); 800 801 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 802 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 803 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 804 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 805 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 806 { 807 int interpolate, location, k; 808 809 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 810 location = TGSI_INTERPOLATE_LOC_CENTER; 811 inputs[1].enabled = true; /* needs SAMPLEID */ 812 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 813 location = TGSI_INTERPOLATE_LOC_CENTER; 814 /* Needs sample positions, currently those are always available */ 815 } else { 816 location = TGSI_INTERPOLATE_LOC_CENTROID; 817 } 818 819 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 820 k = eg_get_interpolator_index(interpolate, location); 821 ctx->eg_interpolators[k].enabled = true; 822 } 823 } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { 824 struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration; 825 if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 826 for (k = 0; k < Elements(inputs); k++) { 827 if (d->Semantic.Name == inputs[k].name || 828 d->Semantic.Name == inputs[k].alternate_name) { 829 inputs[k].enabled = true; 830 } 831 } 832 } 833 } 834 } 835 836 tgsi_parse_free(&parse); 837 838 for (i = 0; i < Elements(inputs); i++) { 839 boolean enabled = inputs[i].enabled; 840 int *reg = inputs[i].reg; 841 unsigned name = inputs[i].name; 842 843 if (enabled) { 844 int gpr = gpr_offset + num_regs++; 845 846 // add to inputs, allocate a gpr 847 k = ctx->shader->ninput ++; 848 ctx->shader->input[k].name = name; 849 ctx->shader->input[k].sid = 0; 850 ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT; 851 ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER; 852 *reg = ctx->shader->input[k].gpr = gpr; 853 } 854 } 855 856 return gpr_offset + num_regs; 857} 858 859/* 860 * for evergreen we need to scan the shader to find the number of GPRs we need to 861 * reserve for interpolation and system values 862 * 863 * we need to know if we are going to emit 864 * any sample or centroid inputs 865 * if perspective and linear are required 866*/ 867static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 868{ 869 int i; 870 int num_baryc; 871 struct tgsi_parse_context parse; 872 873 memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators)); 874 875 for (i = 0; i < ctx->info.num_inputs; i++) { 876 int k; 877 /* skip position/face/mask/sampleid */ 878 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 879 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE || 880 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK || 881 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID) 882 continue; 883 884 k = eg_get_interpolator_index( 885 ctx->info.input_interpolate[i], 886 ctx->info.input_interpolate_loc[i]); 887 if (k >= 0) 888 ctx->eg_interpolators[k].enabled = TRUE; 889 } 890 891 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 892 return 0; 893 } 894 895 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 896 while (!tgsi_parse_end_of_tokens(&parse)) { 897 tgsi_parse_token(&parse); 898 899 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 900 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 901 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 902 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 903 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 904 { 905 int interpolate, location, k; 906 907 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 908 location = TGSI_INTERPOLATE_LOC_CENTER; 909 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 910 location = TGSI_INTERPOLATE_LOC_CENTER; 911 } else { 912 location = TGSI_INTERPOLATE_LOC_CENTROID; 913 } 914 915 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 916 k = eg_get_interpolator_index(interpolate, location); 917 ctx->eg_interpolators[k].enabled = true; 918 } 919 } 920 } 921 922 tgsi_parse_free(&parse); 923 924 /* assign gpr to each interpolator according to priority */ 925 num_baryc = 0; 926 for (i = 0; i < Elements(ctx->eg_interpolators); i++) { 927 if (ctx->eg_interpolators[i].enabled) { 928 ctx->eg_interpolators[i].ij_index = num_baryc; 929 num_baryc ++; 930 } 931 } 932 933 /* XXX PULL MODEL and LINE STIPPLE */ 934 935 num_baryc = (num_baryc + 1) >> 1; 936 return allocate_system_value_inputs(ctx, num_baryc); 937} 938 939/* sample_id_sel == NULL means fetch for current sample */ 940static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel) 941{ 942 struct r600_bytecode_vtx vtx; 943 int r, t1; 944 945 assert(ctx->fixed_pt_position_gpr != -1); 946 947 t1 = r600_get_temp(ctx); 948 949 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 950 vtx.op = FETCH_OP_VFETCH; 951 vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 952 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 953 if (sample_id == NULL) { 954 vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; 955 vtx.src_sel_x = 3; 956 } 957 else { 958 struct r600_bytecode_alu alu; 959 960 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 961 alu.op = ALU_OP1_MOV; 962 r600_bytecode_src(&alu.src[0], sample_id, chan_sel); 963 alu.dst.sel = t1; 964 alu.dst.write = 1; 965 alu.last = 1; 966 r = r600_bytecode_add_alu(ctx->bc, &alu); 967 if (r) 968 return r; 969 970 vtx.src_gpr = t1; 971 vtx.src_sel_x = 0; 972 } 973 vtx.mega_fetch_count = 16; 974 vtx.dst_gpr = t1; 975 vtx.dst_sel_x = 0; 976 vtx.dst_sel_y = 1; 977 vtx.dst_sel_z = 2; 978 vtx.dst_sel_w = 3; 979 vtx.data_format = FMT_32_32_32_32_FLOAT; 980 vtx.num_format_all = 2; 981 vtx.format_comp_all = 1; 982 vtx.use_const_fields = 0; 983 vtx.offset = 1; // first element is size of buffer 984 vtx.endian = r600_endian_swap(32); 985 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 986 987 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 988 if (r) 989 return r; 990 991 return t1; 992} 993 994static void tgsi_src(struct r600_shader_ctx *ctx, 995 const struct tgsi_full_src_register *tgsi_src, 996 struct r600_shader_src *r600_src) 997{ 998 memset(r600_src, 0, sizeof(*r600_src)); 999 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 1000 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 1001 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 1002 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 1003 r600_src->neg = tgsi_src->Register.Negate; 1004 r600_src->abs = tgsi_src->Register.Absolute; 1005 1006 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 1007 int index; 1008 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 1009 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 1010 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 1011 1012 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 1013 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 1014 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 1015 return; 1016 } 1017 index = tgsi_src->Register.Index; 1018 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 1019 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 1020 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 1021 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) { 1022 r600_src->swizzle[0] = 2; // Z value 1023 r600_src->swizzle[1] = 2; 1024 r600_src->swizzle[2] = 2; 1025 r600_src->swizzle[3] = 2; 1026 r600_src->sel = ctx->face_gpr; 1027 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) { 1028 r600_src->swizzle[0] = 3; // W value 1029 r600_src->swizzle[1] = 3; 1030 r600_src->swizzle[2] = 3; 1031 r600_src->swizzle[3] = 3; 1032 r600_src->sel = ctx->fixed_pt_position_gpr; 1033 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) { 1034 r600_src->swizzle[0] = 0; 1035 r600_src->swizzle[1] = 1; 1036 r600_src->swizzle[2] = 4; 1037 r600_src->swizzle[3] = 4; 1038 r600_src->sel = load_sample_position(ctx, NULL, -1); 1039 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 1040 r600_src->swizzle[0] = 3; 1041 r600_src->swizzle[1] = 3; 1042 r600_src->swizzle[2] = 3; 1043 r600_src->swizzle[3] = 3; 1044 r600_src->sel = 0; 1045 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 1046 r600_src->swizzle[0] = 0; 1047 r600_src->swizzle[1] = 0; 1048 r600_src->swizzle[2] = 0; 1049 r600_src->swizzle[3] = 0; 1050 r600_src->sel = 0; 1051 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1052 r600_src->swizzle[0] = 3; 1053 r600_src->swizzle[1] = 3; 1054 r600_src->swizzle[2] = 3; 1055 r600_src->swizzle[3] = 3; 1056 r600_src->sel = 1; 1057 } 1058 } else { 1059 if (tgsi_src->Register.Indirect) 1060 r600_src->rel = V_SQ_REL_RELATIVE; 1061 r600_src->sel = tgsi_src->Register.Index; 1062 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 1063 } 1064 if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { 1065 if (tgsi_src->Register.Dimension) { 1066 r600_src->kc_bank = tgsi_src->Dimension.Index; 1067 if (tgsi_src->Dimension.Indirect) { 1068 r600_src->kc_rel = 1; 1069 } 1070 } 1071 } 1072} 1073 1074static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 1075 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 1076 unsigned int dst_reg) 1077{ 1078 struct r600_bytecode_vtx vtx; 1079 unsigned int ar_reg; 1080 int r; 1081 1082 if (offset) { 1083 struct r600_bytecode_alu alu; 1084 1085 memset(&alu, 0, sizeof(alu)); 1086 1087 alu.op = ALU_OP2_ADD_INT; 1088 alu.src[0].sel = ctx->bc->ar_reg; 1089 alu.src[0].chan = ar_chan; 1090 1091 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1092 alu.src[1].value = offset; 1093 1094 alu.dst.sel = dst_reg; 1095 alu.dst.chan = ar_chan; 1096 alu.dst.write = 1; 1097 alu.last = 1; 1098 1099 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1100 return r; 1101 1102 ar_reg = dst_reg; 1103 } else { 1104 ar_reg = ctx->bc->ar_reg; 1105 } 1106 1107 memset(&vtx, 0, sizeof(vtx)); 1108 vtx.buffer_id = cb_idx; 1109 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1110 vtx.src_gpr = ar_reg; 1111 vtx.src_sel_x = ar_chan; 1112 vtx.mega_fetch_count = 16; 1113 vtx.dst_gpr = dst_reg; 1114 vtx.dst_sel_x = 0; /* SEL_X */ 1115 vtx.dst_sel_y = 1; /* SEL_Y */ 1116 vtx.dst_sel_z = 2; /* SEL_Z */ 1117 vtx.dst_sel_w = 3; /* SEL_W */ 1118 vtx.data_format = FMT_32_32_32_32_FLOAT; 1119 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 1120 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 1121 vtx.endian = r600_endian_swap(32); 1122 vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE; 1123 1124 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1125 return r; 1126 1127 return 0; 1128} 1129 1130static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1131{ 1132 struct r600_bytecode_vtx vtx; 1133 int r; 1134 unsigned index = src->Register.Index; 1135 unsigned vtx_id = src->Dimension.Index; 1136 int offset_reg = vtx_id / 3; 1137 int offset_chan = vtx_id % 3; 1138 1139 /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, 1140 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ 1141 1142 if (offset_reg == 0 && offset_chan == 2) 1143 offset_chan = 3; 1144 1145 if (src->Dimension.Indirect) { 1146 int treg[3]; 1147 int t2; 1148 struct r600_bytecode_alu alu; 1149 int r, i; 1150 1151 /* you have got to be shitting me - 1152 we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt. 1153 at least this is what fglrx seems to do. */ 1154 for (i = 0; i < 3; i++) { 1155 treg[i] = r600_get_temp(ctx); 1156 } 1157 r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F); 1158 1159 t2 = r600_get_temp(ctx); 1160 for (i = 0; i < 3; i++) { 1161 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1162 alu.op = ALU_OP1_MOV; 1163 alu.src[0].sel = 0; 1164 alu.src[0].chan = i == 2 ? 3 : i; 1165 alu.dst.sel = treg[i]; 1166 alu.dst.chan = 0; 1167 alu.dst.write = 1; 1168 alu.last = 1; 1169 r = r600_bytecode_add_alu(ctx->bc, &alu); 1170 if (r) 1171 return r; 1172 } 1173 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1174 alu.op = ALU_OP1_MOV; 1175 alu.src[0].sel = treg[0]; 1176 alu.src[0].rel = 1; 1177 alu.dst.sel = t2; 1178 alu.dst.write = 1; 1179 alu.last = 1; 1180 r = r600_bytecode_add_alu(ctx->bc, &alu); 1181 if (r) 1182 return r; 1183 offset_reg = t2; 1184 } 1185 1186 1187 memset(&vtx, 0, sizeof(vtx)); 1188 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1189 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1190 vtx.src_gpr = offset_reg; 1191 vtx.src_sel_x = offset_chan; 1192 vtx.offset = index * 16; /*bytes*/ 1193 vtx.mega_fetch_count = 16; 1194 vtx.dst_gpr = dst_reg; 1195 vtx.dst_sel_x = 0; /* SEL_X */ 1196 vtx.dst_sel_y = 1; /* SEL_Y */ 1197 vtx.dst_sel_z = 2; /* SEL_Z */ 1198 vtx.dst_sel_w = 3; /* SEL_W */ 1199 if (ctx->bc->chip_class >= EVERGREEN) { 1200 vtx.use_const_fields = 1; 1201 } else { 1202 vtx.data_format = FMT_32_32_32_32_FLOAT; 1203 } 1204 1205 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1206 return r; 1207 1208 return 0; 1209} 1210 1211static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) 1212{ 1213 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1214 int i; 1215 1216 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1217 struct tgsi_full_src_register *src = &inst->Src[i]; 1218 1219 if (src->Register.File == TGSI_FILE_INPUT) { 1220 if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) { 1221 /* primitive id is in R0.z */ 1222 ctx->src[i].sel = 0; 1223 ctx->src[i].swizzle[0] = 2; 1224 } 1225 } 1226 if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { 1227 int treg = r600_get_temp(ctx); 1228 1229 fetch_gs_input(ctx, src, treg); 1230 ctx->src[i].sel = treg; 1231 } 1232 } 1233 return 0; 1234} 1235 1236static int tgsi_split_constant(struct r600_shader_ctx *ctx) 1237{ 1238 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1239 struct r600_bytecode_alu alu; 1240 int i, j, k, nconst, r; 1241 1242 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 1243 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 1244 nconst++; 1245 } 1246 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 1247 } 1248 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 1249 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 1250 continue; 1251 } 1252 1253 if (ctx->src[i].kc_rel) 1254 ctx->shader->uses_index_registers = true; 1255 1256 if (ctx->src[i].rel) { 1257 int chan = inst->Src[i].Indirect.Swizzle; 1258 int treg = r600_get_temp(ctx); 1259 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg))) 1260 return r; 1261 1262 ctx->src[i].kc_bank = 0; 1263 ctx->src[i].kc_rel = 0; 1264 ctx->src[i].sel = treg; 1265 ctx->src[i].rel = 0; 1266 j--; 1267 } else if (j > 0) { 1268 int treg = r600_get_temp(ctx); 1269 for (k = 0; k < 4; k++) { 1270 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1271 alu.op = ALU_OP1_MOV; 1272 alu.src[0].sel = ctx->src[i].sel; 1273 alu.src[0].chan = k; 1274 alu.src[0].rel = ctx->src[i].rel; 1275 alu.src[0].kc_bank = ctx->src[i].kc_bank; 1276 alu.src[0].kc_rel = ctx->src[i].kc_rel; 1277 alu.dst.sel = treg; 1278 alu.dst.chan = k; 1279 alu.dst.write = 1; 1280 if (k == 3) 1281 alu.last = 1; 1282 r = r600_bytecode_add_alu(ctx->bc, &alu); 1283 if (r) 1284 return r; 1285 } 1286 ctx->src[i].sel = treg; 1287 ctx->src[i].rel =0; 1288 j--; 1289 } 1290 } 1291 return 0; 1292} 1293 1294/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 1295static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 1296{ 1297 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1298 struct r600_bytecode_alu alu; 1299 int i, j, k, nliteral, r; 1300 1301 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 1302 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1303 nliteral++; 1304 } 1305 } 1306 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 1307 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1308 int treg = r600_get_temp(ctx); 1309 for (k = 0; k < 4; k++) { 1310 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1311 alu.op = ALU_OP1_MOV; 1312 alu.src[0].sel = ctx->src[i].sel; 1313 alu.src[0].chan = k; 1314 alu.src[0].value = ctx->src[i].value[k]; 1315 alu.dst.sel = treg; 1316 alu.dst.chan = k; 1317 alu.dst.write = 1; 1318 if (k == 3) 1319 alu.last = 1; 1320 r = r600_bytecode_add_alu(ctx->bc, &alu); 1321 if (r) 1322 return r; 1323 } 1324 ctx->src[i].sel = treg; 1325 j--; 1326 } 1327 } 1328 return 0; 1329} 1330 1331static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 1332{ 1333 int i, r, count = ctx->shader->ninput; 1334 1335 for (i = 0; i < count; i++) { 1336 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1337 r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); 1338 if (r) 1339 return r; 1340 } 1341 } 1342 return 0; 1343} 1344 1345static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so, 1346 int stream, unsigned *stream_item_size) 1347{ 1348 unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; 1349 unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; 1350 int i, j, r; 1351 1352 /* Sanity checking. */ 1353 if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) { 1354 R600_ERR("Too many stream outputs: %d\n", so->num_outputs); 1355 r = -EINVAL; 1356 goto out_err; 1357 } 1358 for (i = 0; i < so->num_outputs; i++) { 1359 if (so->output[i].output_buffer >= 4) { 1360 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", 1361 so->output[i].output_buffer); 1362 r = -EINVAL; 1363 goto out_err; 1364 } 1365 } 1366 1367 /* Initialize locations where the outputs are stored. */ 1368 for (i = 0; i < so->num_outputs; i++) { 1369 1370 so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr; 1371 start_comp[i] = so->output[i].start_component; 1372 /* Lower outputs with dst_offset < start_component. 1373 * 1374 * We can only output 4D vectors with a write mask, e.g. we can 1375 * only output the W component at offset 3, etc. If we want 1376 * to store Y, Z, or W at buffer offset 0, we need to use MOV 1377 * to move it to X and output X. */ 1378 if (so->output[i].dst_offset < so->output[i].start_component) { 1379 unsigned tmp = r600_get_temp(ctx); 1380 1381 for (j = 0; j < so->output[i].num_components; j++) { 1382 struct r600_bytecode_alu alu; 1383 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1384 alu.op = ALU_OP1_MOV; 1385 alu.src[0].sel = so_gpr[i]; 1386 alu.src[0].chan = so->output[i].start_component + j; 1387 1388 alu.dst.sel = tmp; 1389 alu.dst.chan = j; 1390 alu.dst.write = 1; 1391 if (j == so->output[i].num_components - 1) 1392 alu.last = 1; 1393 r = r600_bytecode_add_alu(ctx->bc, &alu); 1394 if (r) 1395 return r; 1396 } 1397 start_comp[i] = 0; 1398 so_gpr[i] = tmp; 1399 } 1400 } 1401 1402 /* Write outputs to buffers. */ 1403 for (i = 0; i < so->num_outputs; i++) { 1404 struct r600_bytecode_output output; 1405 1406 if (stream != -1 && stream != so->output[i].output_buffer) 1407 continue; 1408 1409 memset(&output, 0, sizeof(struct r600_bytecode_output)); 1410 output.gpr = so_gpr[i]; 1411 output.elem_size = so->output[i].num_components - 1; 1412 if (output.elem_size == 2) 1413 output.elem_size = 3; // 3 not supported, write 4 with junk at end 1414 output.array_base = so->output[i].dst_offset - start_comp[i]; 1415 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 1416 output.burst_count = 1; 1417 /* array_size is an upper limit for the burst_count 1418 * with MEM_STREAM instructions */ 1419 output.array_size = 0xFFF; 1420 output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i]; 1421 1422 if (ctx->bc->chip_class >= EVERGREEN) { 1423 switch (so->output[i].output_buffer) { 1424 case 0: 1425 output.op = CF_OP_MEM_STREAM0_BUF0; 1426 break; 1427 case 1: 1428 output.op = CF_OP_MEM_STREAM0_BUF1; 1429 break; 1430 case 2: 1431 output.op = CF_OP_MEM_STREAM0_BUF2; 1432 break; 1433 case 3: 1434 output.op = CF_OP_MEM_STREAM0_BUF3; 1435 break; 1436 } 1437 output.op += so->output[i].stream * 4; 1438 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); 1439 ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4; 1440 } else { 1441 switch (so->output[i].output_buffer) { 1442 case 0: 1443 output.op = CF_OP_MEM_STREAM0; 1444 break; 1445 case 1: 1446 output.op = CF_OP_MEM_STREAM1; 1447 break; 1448 case 2: 1449 output.op = CF_OP_MEM_STREAM2; 1450 break; 1451 case 3: 1452 output.op = CF_OP_MEM_STREAM3; 1453 break; 1454 } 1455 ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer; 1456 } 1457 r = r600_bytecode_add_output(ctx->bc, &output); 1458 if (r) 1459 goto out_err; 1460 } 1461 return 0; 1462out_err: 1463 return r; 1464} 1465 1466static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) 1467{ 1468 struct r600_bytecode_alu alu; 1469 unsigned reg; 1470 1471 if (!ctx->shader->vs_out_edgeflag) 1472 return; 1473 1474 reg = ctx->shader->output[ctx->edgeflag_output].gpr; 1475 1476 /* clamp(x, 0, 1) */ 1477 memset(&alu, 0, sizeof(alu)); 1478 alu.op = ALU_OP1_MOV; 1479 alu.src[0].sel = reg; 1480 alu.dst.sel = reg; 1481 alu.dst.write = 1; 1482 alu.dst.clamp = 1; 1483 alu.last = 1; 1484 r600_bytecode_add_alu(ctx->bc, &alu); 1485 1486 memset(&alu, 0, sizeof(alu)); 1487 alu.op = ALU_OP1_FLT_TO_INT; 1488 alu.src[0].sel = reg; 1489 alu.dst.sel = reg; 1490 alu.dst.write = 1; 1491 alu.last = 1; 1492 r600_bytecode_add_alu(ctx->bc, &alu); 1493} 1494 1495static int generate_gs_copy_shader(struct r600_context *rctx, 1496 struct r600_pipe_shader *gs, 1497 struct pipe_stream_output_info *so) 1498{ 1499 struct r600_shader_ctx ctx = {}; 1500 struct r600_shader *gs_shader = &gs->shader; 1501 struct r600_pipe_shader *cshader; 1502 int ocnt = gs_shader->noutput; 1503 struct r600_bytecode_alu alu; 1504 struct r600_bytecode_vtx vtx; 1505 struct r600_bytecode_output output; 1506 struct r600_bytecode_cf *cf_jump, *cf_pop, 1507 *last_exp_pos = NULL, *last_exp_param = NULL; 1508 int i, j, next_clip_pos = 61, next_param = 0; 1509 int ring; 1510 1511 cshader = calloc(1, sizeof(struct r600_pipe_shader)); 1512 if (!cshader) 1513 return 0; 1514 1515 memcpy(cshader->shader.output, gs_shader->output, ocnt * 1516 sizeof(struct r600_shader_io)); 1517 1518 cshader->shader.noutput = ocnt; 1519 1520 ctx.shader = &cshader->shader; 1521 ctx.bc = &ctx.shader->bc; 1522 ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX; 1523 1524 r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, 1525 rctx->screen->has_compressed_msaa_texturing); 1526 1527 ctx.bc->isa = rctx->isa; 1528 1529 cf_jump = NULL; 1530 memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes)); 1531 1532 /* R0.x = R0.x & 0x3fffffff */ 1533 memset(&alu, 0, sizeof(alu)); 1534 alu.op = ALU_OP2_AND_INT; 1535 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1536 alu.src[1].value = 0x3fffffff; 1537 alu.dst.write = 1; 1538 r600_bytecode_add_alu(ctx.bc, &alu); 1539 1540 /* R0.y = R0.x >> 30 */ 1541 memset(&alu, 0, sizeof(alu)); 1542 alu.op = ALU_OP2_LSHR_INT; 1543 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1544 alu.src[1].value = 0x1e; 1545 alu.dst.chan = 1; 1546 alu.dst.write = 1; 1547 alu.last = 1; 1548 r600_bytecode_add_alu(ctx.bc, &alu); 1549 1550 /* fetch vertex data from GSVS ring */ 1551 for (i = 0; i < ocnt; ++i) { 1552 struct r600_shader_io *out = &ctx.shader->output[i]; 1553 1554 out->gpr = i + 1; 1555 out->ring_offset = i * 16; 1556 1557 memset(&vtx, 0, sizeof(vtx)); 1558 vtx.op = FETCH_OP_VFETCH; 1559 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1560 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1561 vtx.offset = out->ring_offset; 1562 vtx.dst_gpr = out->gpr; 1563 vtx.src_gpr = 0; 1564 vtx.dst_sel_x = 0; 1565 vtx.dst_sel_y = 1; 1566 vtx.dst_sel_z = 2; 1567 vtx.dst_sel_w = 3; 1568 if (rctx->b.chip_class >= EVERGREEN) { 1569 vtx.use_const_fields = 1; 1570 } else { 1571 vtx.data_format = FMT_32_32_32_32_FLOAT; 1572 } 1573 1574 r600_bytecode_add_vtx(ctx.bc, &vtx); 1575 } 1576 ctx.temp_reg = i + 1; 1577 for (ring = 3; ring >= 0; --ring) { 1578 bool enabled = false; 1579 for (i = 0; i < so->num_outputs; i++) { 1580 if (so->output[i].stream == ring) { 1581 enabled = true; 1582 break; 1583 } 1584 } 1585 if (ring != 0 && !enabled) { 1586 cshader->shader.ring_item_sizes[ring] = 0; 1587 continue; 1588 } 1589 1590 if (cf_jump) { 1591 // Patch up jump label 1592 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 1593 cf_pop = ctx.bc->cf_last; 1594 1595 cf_jump->cf_addr = cf_pop->id + 2; 1596 cf_jump->pop_count = 1; 1597 cf_pop->cf_addr = cf_pop->id + 2; 1598 cf_pop->pop_count = 1; 1599 } 1600 1601 /* PRED_SETE_INT __, R0.y, ring */ 1602 memset(&alu, 0, sizeof(alu)); 1603 alu.op = ALU_OP2_PRED_SETE_INT; 1604 alu.src[0].chan = 1; 1605 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1606 alu.src[1].value = ring; 1607 alu.execute_mask = 1; 1608 alu.update_pred = 1; 1609 alu.last = 1; 1610 r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); 1611 1612 r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); 1613 cf_jump = ctx.bc->cf_last; 1614 1615 if (enabled) 1616 emit_streamout(&ctx, so, ring, &cshader->shader.ring_item_sizes[ring]); 1617 cshader->shader.ring_item_sizes[ring] = ocnt * 16; 1618 } 1619 1620 /* export vertex data */ 1621 /* XXX factor out common code with r600_shader_from_tgsi ? */ 1622 for (i = 0; i < ocnt; ++i) { 1623 struct r600_shader_io *out = &ctx.shader->output[i]; 1624 bool instream0 = true; 1625 if (out->name == TGSI_SEMANTIC_CLIPVERTEX) 1626 continue; 1627 1628 for (j = 0; j < so->num_outputs; j++) { 1629 if (so->output[j].register_index == i) { 1630 if (so->output[j].stream == 0) 1631 break; 1632 if (so->output[j].stream > 0) 1633 instream0 = false; 1634 } 1635 } 1636 if (!instream0) 1637 continue; 1638 memset(&output, 0, sizeof(output)); 1639 output.gpr = out->gpr; 1640 output.elem_size = 3; 1641 output.swizzle_x = 0; 1642 output.swizzle_y = 1; 1643 output.swizzle_z = 2; 1644 output.swizzle_w = 3; 1645 output.burst_count = 1; 1646 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1647 output.op = CF_OP_EXPORT; 1648 switch (out->name) { 1649 case TGSI_SEMANTIC_POSITION: 1650 output.array_base = 60; 1651 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1652 break; 1653 1654 case TGSI_SEMANTIC_PSIZE: 1655 output.array_base = 61; 1656 if (next_clip_pos == 61) 1657 next_clip_pos = 62; 1658 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1659 output.swizzle_y = 7; 1660 output.swizzle_z = 7; 1661 output.swizzle_w = 7; 1662 ctx.shader->vs_out_misc_write = 1; 1663 ctx.shader->vs_out_point_size = 1; 1664 break; 1665 case TGSI_SEMANTIC_LAYER: 1666 if (out->spi_sid) { 1667 /* duplicate it as PARAM to pass to the pixel shader */ 1668 output.array_base = next_param++; 1669 r600_bytecode_add_output(ctx.bc, &output); 1670 last_exp_param = ctx.bc->cf_last; 1671 } 1672 output.array_base = 61; 1673 if (next_clip_pos == 61) 1674 next_clip_pos = 62; 1675 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1676 output.swizzle_x = 7; 1677 output.swizzle_y = 7; 1678 output.swizzle_z = 0; 1679 output.swizzle_w = 7; 1680 ctx.shader->vs_out_misc_write = 1; 1681 ctx.shader->vs_out_layer = 1; 1682 break; 1683 case TGSI_SEMANTIC_VIEWPORT_INDEX: 1684 if (out->spi_sid) { 1685 /* duplicate it as PARAM to pass to the pixel shader */ 1686 output.array_base = next_param++; 1687 r600_bytecode_add_output(ctx.bc, &output); 1688 last_exp_param = ctx.bc->cf_last; 1689 } 1690 output.array_base = 61; 1691 if (next_clip_pos == 61) 1692 next_clip_pos = 62; 1693 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1694 ctx.shader->vs_out_misc_write = 1; 1695 ctx.shader->vs_out_viewport = 1; 1696 output.swizzle_x = 7; 1697 output.swizzle_y = 7; 1698 output.swizzle_z = 7; 1699 output.swizzle_w = 0; 1700 break; 1701 case TGSI_SEMANTIC_CLIPDIST: 1702 /* spi_sid is 0 for clipdistance outputs that were generated 1703 * for clipvertex - we don't need to pass them to PS */ 1704 ctx.shader->clip_dist_write = gs->shader.clip_dist_write; 1705 if (out->spi_sid) { 1706 /* duplicate it as PARAM to pass to the pixel shader */ 1707 output.array_base = next_param++; 1708 r600_bytecode_add_output(ctx.bc, &output); 1709 last_exp_param = ctx.bc->cf_last; 1710 } 1711 output.array_base = next_clip_pos++; 1712 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1713 break; 1714 case TGSI_SEMANTIC_FOG: 1715 output.swizzle_y = 4; /* 0 */ 1716 output.swizzle_z = 4; /* 0 */ 1717 output.swizzle_w = 5; /* 1 */ 1718 break; 1719 default: 1720 output.array_base = next_param++; 1721 break; 1722 } 1723 r600_bytecode_add_output(ctx.bc, &output); 1724 if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) 1725 last_exp_param = ctx.bc->cf_last; 1726 else 1727 last_exp_pos = ctx.bc->cf_last; 1728 } 1729 1730 if (!last_exp_pos) { 1731 memset(&output, 0, sizeof(output)); 1732 output.gpr = 0; 1733 output.elem_size = 3; 1734 output.swizzle_x = 7; 1735 output.swizzle_y = 7; 1736 output.swizzle_z = 7; 1737 output.swizzle_w = 7; 1738 output.burst_count = 1; 1739 output.type = 2; 1740 output.op = CF_OP_EXPORT; 1741 output.array_base = 60; 1742 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1743 r600_bytecode_add_output(ctx.bc, &output); 1744 last_exp_pos = ctx.bc->cf_last; 1745 } 1746 1747 if (!last_exp_param) { 1748 memset(&output, 0, sizeof(output)); 1749 output.gpr = 0; 1750 output.elem_size = 3; 1751 output.swizzle_x = 7; 1752 output.swizzle_y = 7; 1753 output.swizzle_z = 7; 1754 output.swizzle_w = 7; 1755 output.burst_count = 1; 1756 output.type = 2; 1757 output.op = CF_OP_EXPORT; 1758 output.array_base = next_param++; 1759 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1760 r600_bytecode_add_output(ctx.bc, &output); 1761 last_exp_param = ctx.bc->cf_last; 1762 } 1763 1764 last_exp_pos->op = CF_OP_EXPORT_DONE; 1765 last_exp_param->op = CF_OP_EXPORT_DONE; 1766 1767 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 1768 cf_pop = ctx.bc->cf_last; 1769 1770 cf_jump->cf_addr = cf_pop->id + 2; 1771 cf_jump->pop_count = 1; 1772 cf_pop->cf_addr = cf_pop->id + 2; 1773 cf_pop->pop_count = 1; 1774 1775 if (ctx.bc->chip_class == CAYMAN) 1776 cm_bytecode_add_cf_end(ctx.bc); 1777 else { 1778 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 1779 ctx.bc->cf_last->end_of_program = 1; 1780 } 1781 1782 gs->gs_copy_shader = cshader; 1783 cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 1784 1785 ctx.bc->nstack = 1; 1786 1787 return r600_bytecode_build(ctx.bc); 1788} 1789 1790static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind) 1791{ 1792 struct r600_bytecode_output output; 1793 int i, k, ring_offset; 1794 int effective_stream = stream == -1 ? 0 : stream; 1795 int idx = 0; 1796 1797 for (i = 0; i < ctx->shader->noutput; i++) { 1798 if (ctx->gs_for_vs) { 1799 /* for ES we need to lookup corresponding ring offset expected by GS 1800 * (map this output to GS input by name and sid) */ 1801 /* FIXME precompute offsets */ 1802 ring_offset = -1; 1803 for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { 1804 struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; 1805 struct r600_shader_io *out = &ctx->shader->output[i]; 1806 if (in->name == out->name && in->sid == out->sid) 1807 ring_offset = in->ring_offset; 1808 } 1809 1810 if (ring_offset == -1) 1811 continue; 1812 } else { 1813 ring_offset = idx * 16; 1814 idx++; 1815 } 1816 1817 if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) 1818 continue; 1819 /* next_ring_offset after parsing input decls contains total size of 1820 * single vertex data, gs_next_vertex - current vertex index */ 1821 if (!ind) 1822 ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex; 1823 1824 memset(&output, 0, sizeof(struct r600_bytecode_output)); 1825 output.gpr = ctx->shader->output[i].gpr; 1826 output.elem_size = 3; 1827 output.comp_mask = 0xF; 1828 output.burst_count = 1; 1829 1830 if (ind) 1831 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 1832 else 1833 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 1834 1835 switch (stream) { 1836 default: 1837 case 0: 1838 output.op = CF_OP_MEM_RING; break; 1839 case 1: 1840 output.op = CF_OP_MEM_RING1; break; 1841 case 2: 1842 output.op = CF_OP_MEM_RING2; break; 1843 case 3: 1844 output.op = CF_OP_MEM_RING3; break; 1845 } 1846 1847 if (ind) { 1848 output.array_base = ring_offset >> 2; /* in dwords */ 1849 output.array_size = 0xfff; 1850 output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream]; 1851 } else 1852 output.array_base = ring_offset >> 2; /* in dwords */ 1853 r600_bytecode_add_output(ctx->bc, &output); 1854 } 1855 1856 if (ind) { 1857 /* get a temp and add the ring offset to the next vertex base in the shader */ 1858 struct r600_bytecode_alu alu; 1859 int r; 1860 1861 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1862 alu.op = ALU_OP2_ADD_INT; 1863 alu.src[0].sel = ctx->gs_export_gpr_tregs[effective_stream]; 1864 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1865 alu.src[1].value = ctx->gs_out_ring_offset >> 4; 1866 alu.dst.sel = ctx->gs_export_gpr_tregs[effective_stream]; 1867 alu.dst.write = 1; 1868 alu.last = 1; 1869 r = r600_bytecode_add_alu(ctx->bc, &alu); 1870 if (r) 1871 return r; 1872 } 1873 ++ctx->gs_next_vertex; 1874 return 0; 1875} 1876 1877static int r600_shader_from_tgsi(struct r600_context *rctx, 1878 struct r600_pipe_shader *pipeshader, 1879 union r600_shader_key key) 1880{ 1881 struct r600_screen *rscreen = rctx->screen; 1882 struct r600_shader *shader = &pipeshader->shader; 1883 struct tgsi_token *tokens = pipeshader->selector->tokens; 1884 struct pipe_stream_output_info so = pipeshader->selector->so; 1885 struct tgsi_full_immediate *immediate; 1886 struct r600_shader_ctx ctx; 1887 struct r600_bytecode_output output[32]; 1888 unsigned output_done, noutput; 1889 unsigned opcode; 1890 int i, j, k, r = 0; 1891 int next_param_base = 0, next_clip_base; 1892 int max_color_exports = MAX2(key.ps.nr_cbufs, 1); 1893 /* Declarations used by llvm code */ 1894 bool use_llvm = false; 1895 bool indirect_gprs; 1896 bool ring_outputs = false; 1897 bool pos_emitted = false; 1898 1899#ifdef R600_USE_LLVM 1900 use_llvm = rscreen->b.debug_flags & DBG_LLVM; 1901#endif 1902 ctx.bc = &shader->bc; 1903 ctx.shader = shader; 1904 ctx.native_integers = true; 1905 1906 1907 r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, 1908 rscreen->has_compressed_msaa_texturing); 1909 ctx.tokens = tokens; 1910 tgsi_scan_shader(tokens, &ctx.info); 1911 shader->indirect_files = ctx.info.indirect_files; 1912 1913 shader->uses_doubles = ctx.info.uses_doubles; 1914 1915 indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT); 1916 tgsi_parse_init(&ctx.parse, tokens); 1917 ctx.type = ctx.info.processor; 1918 shader->processor_type = ctx.type; 1919 ctx.bc->type = shader->processor_type; 1920 1921 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 1922 shader->vs_as_gs_a = key.vs.as_gs_a; 1923 shader->vs_as_es = key.vs.as_es; 1924 } 1925 1926 ring_outputs = shader->vs_as_es || ctx.type == TGSI_PROCESSOR_GEOMETRY; 1927 1928 if (shader->vs_as_es) { 1929 ctx.gs_for_vs = &rctx->gs_shader->current->shader; 1930 } else { 1931 ctx.gs_for_vs = NULL; 1932 } 1933 1934 ctx.next_ring_offset = 0; 1935 ctx.gs_out_ring_offset = 0; 1936 ctx.gs_next_vertex = 0; 1937 ctx.gs_stream_output_info = &so; 1938 1939 shader->uses_index_registers = false; 1940 ctx.face_gpr = -1; 1941 ctx.fixed_pt_position_gpr = -1; 1942 ctx.fragcoord_input = -1; 1943 ctx.colors_used = 0; 1944 ctx.clip_vertex_write = 0; 1945 1946 shader->nr_ps_color_exports = 0; 1947 shader->nr_ps_max_color_exports = 0; 1948 1949 if (ctx.type == TGSI_PROCESSOR_FRAGMENT) 1950 shader->two_side = key.ps.color_two_side; 1951 1952 /* register allocations */ 1953 /* Values [0,127] correspond to GPR[0..127]. 1954 * Values [128,159] correspond to constant buffer bank 0 1955 * Values [160,191] correspond to constant buffer bank 1 1956 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 1957 * Values [256,287] correspond to constant buffer bank 2 (EG) 1958 * Values [288,319] correspond to constant buffer bank 3 (EG) 1959 * Other special values are shown in the list below. 1960 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 1961 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 1962 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 1963 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 1964 * 248 SQ_ALU_SRC_0: special constant 0.0. 1965 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 1966 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 1967 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 1968 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 1969 * 253 SQ_ALU_SRC_LITERAL: literal constant. 1970 * 254 SQ_ALU_SRC_PV: previous vector result. 1971 * 255 SQ_ALU_SRC_PS: previous scalar result. 1972 */ 1973 for (i = 0; i < TGSI_FILE_COUNT; i++) { 1974 ctx.file_offset[i] = 0; 1975 } 1976 1977#ifdef R600_USE_LLVM 1978 if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) { 1979 fprintf(stderr, "Warning: R600 LLVM backend does not support " 1980 "indirect adressing. Falling back to TGSI " 1981 "backend.\n"); 1982 use_llvm = 0; 1983 } 1984#endif 1985 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 1986 ctx.file_offset[TGSI_FILE_INPUT] = 1; 1987 if (!use_llvm) { 1988 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); 1989 } 1990 } 1991 if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { 1992 if (ctx.bc->chip_class >= EVERGREEN) 1993 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 1994 else 1995 ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); 1996 } 1997 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 1998 /* FIXME 1 would be enough in some cases (3 or less input vertices) */ 1999 ctx.file_offset[TGSI_FILE_INPUT] = 2; 2000 } 2001 ctx.use_llvm = use_llvm; 2002 2003 if (use_llvm) { 2004 ctx.file_offset[TGSI_FILE_OUTPUT] = 2005 ctx.file_offset[TGSI_FILE_INPUT]; 2006 } else { 2007 ctx.file_offset[TGSI_FILE_OUTPUT] = 2008 ctx.file_offset[TGSI_FILE_INPUT] + 2009 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 2010 } 2011 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 2012 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 2013 2014 /* Outside the GPR range. This will be translated to one of the 2015 * kcache banks later. */ 2016 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 2017 2018 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 2019 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 2020 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 2021 ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1; 2022 ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2; 2023 2024 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2025 ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3; 2026 ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4; 2027 ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5; 2028 ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6; 2029 ctx.temp_reg = ctx.bc->ar_reg + 7; 2030 } else { 2031 ctx.temp_reg = ctx.bc->ar_reg + 3; 2032 } 2033 2034 shader->max_arrays = 0; 2035 shader->num_arrays = 0; 2036 if (indirect_gprs) { 2037 2038 if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { 2039 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], 2040 ctx.file_offset[TGSI_FILE_OUTPUT] - 2041 ctx.file_offset[TGSI_FILE_INPUT], 2042 0x0F); 2043 } 2044 if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2045 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT], 2046 ctx.file_offset[TGSI_FILE_TEMPORARY] - 2047 ctx.file_offset[TGSI_FILE_OUTPUT], 2048 0x0F); 2049 } 2050 } 2051 2052 ctx.nliterals = 0; 2053 ctx.literals = NULL; 2054 2055 shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]; 2056 shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 2057 2058 if (shader->vs_as_gs_a) 2059 vs_add_primid_output(&ctx, key.vs.prim_id_out); 2060 2061 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 2062 tgsi_parse_token(&ctx.parse); 2063 switch (ctx.parse.FullToken.Token.Type) { 2064 case TGSI_TOKEN_TYPE_IMMEDIATE: 2065 immediate = &ctx.parse.FullToken.FullImmediate; 2066 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 2067 if(ctx.literals == NULL) { 2068 r = -ENOMEM; 2069 goto out_err; 2070 } 2071 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 2072 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 2073 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 2074 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 2075 ctx.nliterals++; 2076 break; 2077 case TGSI_TOKEN_TYPE_DECLARATION: 2078 r = tgsi_declaration(&ctx); 2079 if (r) 2080 goto out_err; 2081 break; 2082 case TGSI_TOKEN_TYPE_INSTRUCTION: 2083 case TGSI_TOKEN_TYPE_PROPERTY: 2084 break; 2085 default: 2086 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 2087 r = -EINVAL; 2088 goto out_err; 2089 } 2090 } 2091 2092 shader->ring_item_sizes[0] = ctx.next_ring_offset; 2093 shader->ring_item_sizes[1] = 0; 2094 shader->ring_item_sizes[2] = 0; 2095 shader->ring_item_sizes[3] = 0; 2096 2097 /* Process two side if needed */ 2098 if (shader->two_side && ctx.colors_used) { 2099 int i, count = ctx.shader->ninput; 2100 unsigned next_lds_loc = ctx.shader->nlds; 2101 2102 /* additional inputs will be allocated right after the existing inputs, 2103 * we won't need them after the color selection, so we don't need to 2104 * reserve these gprs for the rest of the shader code and to adjust 2105 * output offsets etc. */ 2106 int gpr = ctx.file_offset[TGSI_FILE_INPUT] + 2107 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 2108 2109 /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */ 2110 if (ctx.face_gpr == -1) { 2111 i = ctx.shader->ninput++; 2112 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; 2113 ctx.shader->input[i].spi_sid = 0; 2114 ctx.shader->input[i].gpr = gpr++; 2115 ctx.face_gpr = ctx.shader->input[i].gpr; 2116 } 2117 2118 for (i = 0; i < count; i++) { 2119 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { 2120 int ni = ctx.shader->ninput++; 2121 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); 2122 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 2123 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); 2124 ctx.shader->input[ni].gpr = gpr++; 2125 // TGSI to LLVM needs to know the lds position of inputs. 2126 // Non LLVM path computes it later (in process_twoside_color) 2127 ctx.shader->input[ni].lds_pos = next_lds_loc++; 2128 ctx.shader->input[i].back_color_input = ni; 2129 if (ctx.bc->chip_class >= EVERGREEN) { 2130 if ((r = evergreen_interp_input(&ctx, ni))) 2131 return r; 2132 } 2133 } 2134 } 2135 } 2136 2137/* LLVM backend setup */ 2138#ifdef R600_USE_LLVM 2139 if (use_llvm) { 2140 struct radeon_llvm_context radeon_llvm_ctx; 2141 LLVMModuleRef mod; 2142 bool dump = r600_can_dump_shader(&rscreen->b, tokens); 2143 boolean use_kill = false; 2144 2145 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); 2146 radeon_llvm_ctx.type = ctx.type; 2147 radeon_llvm_ctx.two_side = shader->two_side; 2148 radeon_llvm_ctx.face_gpr = ctx.face_gpr; 2149 radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1; 2150 radeon_llvm_ctx.r600_inputs = ctx.shader->input; 2151 radeon_llvm_ctx.r600_outputs = ctx.shader->output; 2152 radeon_llvm_ctx.color_buffer_count = max_color_exports; 2153 radeon_llvm_ctx.chip_class = ctx.bc->chip_class; 2154 radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN); 2155 radeon_llvm_ctx.stream_outputs = &so; 2156 radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one; 2157 radeon_llvm_ctx.has_compressed_msaa_texturing = 2158 ctx.bc->has_compressed_msaa_texturing; 2159 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); 2160 ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp; 2161 ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers; 2162 2163 if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, dump)) { 2164 radeon_llvm_dispose(&radeon_llvm_ctx); 2165 use_llvm = 0; 2166 fprintf(stderr, "R600 LLVM backend failed to compile " 2167 "shader. Falling back to TGSI\n"); 2168 } else { 2169 ctx.file_offset[TGSI_FILE_OUTPUT] = 2170 ctx.file_offset[TGSI_FILE_INPUT]; 2171 } 2172 if (use_kill) 2173 ctx.shader->uses_kill = use_kill; 2174 radeon_llvm_dispose(&radeon_llvm_ctx); 2175 } 2176#endif 2177/* End of LLVM backend setup */ 2178 2179 if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) 2180 shader->nr_ps_max_color_exports = 8; 2181 2182 if (!use_llvm) { 2183 if (ctx.fragcoord_input >= 0) { 2184 if (ctx.bc->chip_class == CAYMAN) { 2185 for (j = 0 ; j < 4; j++) { 2186 struct r600_bytecode_alu alu; 2187 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2188 alu.op = ALU_OP1_RECIP_IEEE; 2189 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 2190 alu.src[0].chan = 3; 2191 2192 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 2193 alu.dst.chan = j; 2194 alu.dst.write = (j == 3); 2195 alu.last = 1; 2196 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 2197 return r; 2198 } 2199 } else { 2200 struct r600_bytecode_alu alu; 2201 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2202 alu.op = ALU_OP1_RECIP_IEEE; 2203 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 2204 alu.src[0].chan = 3; 2205 2206 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 2207 alu.dst.chan = 3; 2208 alu.dst.write = 1; 2209 alu.last = 1; 2210 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 2211 return r; 2212 } 2213 } 2214 2215 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2216 struct r600_bytecode_alu alu; 2217 int r; 2218 for (j = 0; j < 4; j++) { 2219 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2220 alu.op = ALU_OP1_MOV; 2221 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 2222 alu.src[0].value = 0; 2223 alu.dst.sel = ctx.gs_export_gpr_tregs[j]; 2224 alu.dst.write = 1; 2225 alu.last = 1; 2226 r = r600_bytecode_add_alu(ctx.bc, &alu); 2227 if (r) 2228 return r; 2229 } 2230 } 2231 if (shader->two_side && ctx.colors_used) { 2232 if ((r = process_twoside_color_inputs(&ctx))) 2233 return r; 2234 } 2235 2236 tgsi_parse_init(&ctx.parse, tokens); 2237 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 2238 tgsi_parse_token(&ctx.parse); 2239 switch (ctx.parse.FullToken.Token.Type) { 2240 case TGSI_TOKEN_TYPE_INSTRUCTION: 2241 r = tgsi_is_supported(&ctx); 2242 if (r) 2243 goto out_err; 2244 ctx.max_driver_temp_used = 0; 2245 /* reserve first tmp for everyone */ 2246 r600_get_temp(&ctx); 2247 2248 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 2249 if ((r = tgsi_split_constant(&ctx))) 2250 goto out_err; 2251 if ((r = tgsi_split_literal_constant(&ctx))) 2252 goto out_err; 2253 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) 2254 if ((r = tgsi_split_gs_inputs(&ctx))) 2255 goto out_err; 2256 if (ctx.bc->chip_class == CAYMAN) 2257 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 2258 else if (ctx.bc->chip_class >= EVERGREEN) 2259 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 2260 else 2261 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 2262 r = ctx.inst_info->process(&ctx); 2263 if (r) 2264 goto out_err; 2265 break; 2266 default: 2267 break; 2268 } 2269 } 2270 } 2271 2272 /* Reset the temporary register counter. */ 2273 ctx.max_driver_temp_used = 0; 2274 2275 noutput = shader->noutput; 2276 2277 if (!ring_outputs && ctx.clip_vertex_write) { 2278 unsigned clipdist_temp[2]; 2279 2280 clipdist_temp[0] = r600_get_temp(&ctx); 2281 clipdist_temp[1] = r600_get_temp(&ctx); 2282 2283 /* need to convert a clipvertex write into clipdistance writes and not export 2284 the clip vertex anymore */ 2285 2286 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 2287 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 2288 shader->output[noutput].gpr = clipdist_temp[0]; 2289 noutput++; 2290 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 2291 shader->output[noutput].gpr = clipdist_temp[1]; 2292 noutput++; 2293 2294 /* reset spi_sid for clipvertex output to avoid confusing spi */ 2295 shader->output[ctx.cv_output].spi_sid = 0; 2296 2297 shader->clip_dist_write = 0xFF; 2298 2299 for (i = 0; i < 8; i++) { 2300 int oreg = i >> 2; 2301 int ochan = i & 3; 2302 2303 for (j = 0; j < 4; j++) { 2304 struct r600_bytecode_alu alu; 2305 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2306 alu.op = ALU_OP2_DOT4; 2307 alu.src[0].sel = shader->output[ctx.cv_output].gpr; 2308 alu.src[0].chan = j; 2309 2310 alu.src[1].sel = 512 + i; 2311 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 2312 alu.src[1].chan = j; 2313 2314 alu.dst.sel = clipdist_temp[oreg]; 2315 alu.dst.chan = j; 2316 alu.dst.write = (j == ochan); 2317 if (j == 3) 2318 alu.last = 1; 2319 if (!use_llvm) 2320 r = r600_bytecode_add_alu(ctx.bc, &alu); 2321 if (r) 2322 return r; 2323 } 2324 } 2325 } 2326 2327 /* Add stream outputs. */ 2328 if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX && 2329 so.num_outputs && !use_llvm) 2330 emit_streamout(&ctx, &so, -1, NULL); 2331 2332 pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 2333 convert_edgeflag_to_int(&ctx); 2334 2335 if (ring_outputs) { 2336 if (shader->vs_as_es) { 2337 ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx); 2338 ctx.gs_export_gpr_tregs[1] = -1; 2339 ctx.gs_export_gpr_tregs[2] = -1; 2340 ctx.gs_export_gpr_tregs[3] = -1; 2341 2342 emit_gs_ring_writes(&ctx, &so, -1, FALSE); 2343 } 2344 } else { 2345 /* Export output */ 2346 next_clip_base = shader->vs_out_misc_write ? 62 : 61; 2347 2348 for (i = 0, j = 0; i < noutput; i++, j++) { 2349 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2350 output[j].gpr = shader->output[i].gpr; 2351 output[j].elem_size = 3; 2352 output[j].swizzle_x = 0; 2353 output[j].swizzle_y = 1; 2354 output[j].swizzle_z = 2; 2355 output[j].swizzle_w = 3; 2356 output[j].burst_count = 1; 2357 output[j].type = -1; 2358 output[j].op = CF_OP_EXPORT; 2359 switch (ctx.type) { 2360 case TGSI_PROCESSOR_VERTEX: 2361 switch (shader->output[i].name) { 2362 case TGSI_SEMANTIC_POSITION: 2363 output[j].array_base = 60; 2364 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2365 pos_emitted = true; 2366 break; 2367 2368 case TGSI_SEMANTIC_PSIZE: 2369 output[j].array_base = 61; 2370 output[j].swizzle_y = 7; 2371 output[j].swizzle_z = 7; 2372 output[j].swizzle_w = 7; 2373 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2374 pos_emitted = true; 2375 break; 2376 case TGSI_SEMANTIC_EDGEFLAG: 2377 output[j].array_base = 61; 2378 output[j].swizzle_x = 7; 2379 output[j].swizzle_y = 0; 2380 output[j].swizzle_z = 7; 2381 output[j].swizzle_w = 7; 2382 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2383 pos_emitted = true; 2384 break; 2385 case TGSI_SEMANTIC_LAYER: 2386 /* spi_sid is 0 for outputs that are 2387 * not consumed by PS */ 2388 if (shader->output[i].spi_sid) { 2389 output[j].array_base = next_param_base++; 2390 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2391 j++; 2392 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 2393 } 2394 output[j].array_base = 61; 2395 output[j].swizzle_x = 7; 2396 output[j].swizzle_y = 7; 2397 output[j].swizzle_z = 0; 2398 output[j].swizzle_w = 7; 2399 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2400 pos_emitted = true; 2401 break; 2402 case TGSI_SEMANTIC_VIEWPORT_INDEX: 2403 /* spi_sid is 0 for outputs that are 2404 * not consumed by PS */ 2405 if (shader->output[i].spi_sid) { 2406 output[j].array_base = next_param_base++; 2407 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2408 j++; 2409 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 2410 } 2411 output[j].array_base = 61; 2412 output[j].swizzle_x = 7; 2413 output[j].swizzle_y = 7; 2414 output[j].swizzle_z = 7; 2415 output[j].swizzle_w = 0; 2416 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2417 pos_emitted = true; 2418 break; 2419 case TGSI_SEMANTIC_CLIPVERTEX: 2420 j--; 2421 break; 2422 case TGSI_SEMANTIC_CLIPDIST: 2423 output[j].array_base = next_clip_base++; 2424 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2425 pos_emitted = true; 2426 /* spi_sid is 0 for clipdistance outputs that were generated 2427 * for clipvertex - we don't need to pass them to PS */ 2428 if (shader->output[i].spi_sid) { 2429 j++; 2430 /* duplicate it as PARAM to pass to the pixel shader */ 2431 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 2432 output[j].array_base = next_param_base++; 2433 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2434 } 2435 break; 2436 case TGSI_SEMANTIC_FOG: 2437 output[j].swizzle_y = 4; /* 0 */ 2438 output[j].swizzle_z = 4; /* 0 */ 2439 output[j].swizzle_w = 5; /* 1 */ 2440 break; 2441 case TGSI_SEMANTIC_PRIMID: 2442 output[j].swizzle_x = 2; 2443 output[j].swizzle_y = 4; /* 0 */ 2444 output[j].swizzle_z = 4; /* 0 */ 2445 output[j].swizzle_w = 4; /* 0 */ 2446 break; 2447 } 2448 2449 break; 2450 case TGSI_PROCESSOR_FRAGMENT: 2451 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 2452 /* never export more colors than the number of CBs */ 2453 if (shader->output[i].sid >= max_color_exports) { 2454 /* skip export */ 2455 j--; 2456 continue; 2457 } 2458 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 2459 output[j].array_base = shader->output[i].sid; 2460 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2461 shader->nr_ps_color_exports++; 2462 if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { 2463 for (k = 1; k < max_color_exports; k++) { 2464 j++; 2465 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2466 output[j].gpr = shader->output[i].gpr; 2467 output[j].elem_size = 3; 2468 output[j].swizzle_x = 0; 2469 output[j].swizzle_y = 1; 2470 output[j].swizzle_z = 2; 2471 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 2472 output[j].burst_count = 1; 2473 output[j].array_base = k; 2474 output[j].op = CF_OP_EXPORT; 2475 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2476 shader->nr_ps_color_exports++; 2477 } 2478 } 2479 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 2480 output[j].array_base = 61; 2481 output[j].swizzle_x = 2; 2482 output[j].swizzle_y = 7; 2483 output[j].swizzle_z = output[j].swizzle_w = 7; 2484 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2485 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 2486 output[j].array_base = 61; 2487 output[j].swizzle_x = 7; 2488 output[j].swizzle_y = 1; 2489 output[j].swizzle_z = output[j].swizzle_w = 7; 2490 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2491 } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 2492 output[j].array_base = 61; 2493 output[j].swizzle_x = 7; 2494 output[j].swizzle_y = 7; 2495 output[j].swizzle_z = 0; 2496 output[j].swizzle_w = 7; 2497 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2498 } else { 2499 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 2500 r = -EINVAL; 2501 goto out_err; 2502 } 2503 break; 2504 default: 2505 R600_ERR("unsupported processor type %d\n", ctx.type); 2506 r = -EINVAL; 2507 goto out_err; 2508 } 2509 2510 if (output[j].type==-1) { 2511 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2512 output[j].array_base = next_param_base++; 2513 } 2514 } 2515 2516 /* add fake position export */ 2517 if (ctx.type == TGSI_PROCESSOR_VERTEX && pos_emitted == false) { 2518 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2519 output[j].gpr = 0; 2520 output[j].elem_size = 3; 2521 output[j].swizzle_x = 7; 2522 output[j].swizzle_y = 7; 2523 output[j].swizzle_z = 7; 2524 output[j].swizzle_w = 7; 2525 output[j].burst_count = 1; 2526 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2527 output[j].array_base = 60; 2528 output[j].op = CF_OP_EXPORT; 2529 j++; 2530 } 2531 2532 /* add fake param output for vertex shader if no param is exported */ 2533 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { 2534 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2535 output[j].gpr = 0; 2536 output[j].elem_size = 3; 2537 output[j].swizzle_x = 7; 2538 output[j].swizzle_y = 7; 2539 output[j].swizzle_z = 7; 2540 output[j].swizzle_w = 7; 2541 output[j].burst_count = 1; 2542 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2543 output[j].array_base = 0; 2544 output[j].op = CF_OP_EXPORT; 2545 j++; 2546 } 2547 2548 /* add fake pixel export */ 2549 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { 2550 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2551 output[j].gpr = 0; 2552 output[j].elem_size = 3; 2553 output[j].swizzle_x = 7; 2554 output[j].swizzle_y = 7; 2555 output[j].swizzle_z = 7; 2556 output[j].swizzle_w = 7; 2557 output[j].burst_count = 1; 2558 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2559 output[j].array_base = 0; 2560 output[j].op = CF_OP_EXPORT; 2561 j++; 2562 shader->nr_ps_color_exports++; 2563 } 2564 2565 noutput = j; 2566 2567 /* set export done on last export of each type */ 2568 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 2569 if (!(output_done & (1 << output[i].type))) { 2570 output_done |= (1 << output[i].type); 2571 output[i].op = CF_OP_EXPORT_DONE; 2572 } 2573 } 2574 /* add output to bytecode */ 2575 if (!use_llvm) { 2576 for (i = 0; i < noutput; i++) { 2577 r = r600_bytecode_add_output(ctx.bc, &output[i]); 2578 if (r) 2579 goto out_err; 2580 } 2581 } 2582 } 2583 2584 /* add program end */ 2585 if (!use_llvm) { 2586 if (ctx.bc->chip_class == CAYMAN) 2587 cm_bytecode_add_cf_end(ctx.bc); 2588 else { 2589 const struct cf_op_info *last = NULL; 2590 2591 if (ctx.bc->cf_last) 2592 last = r600_isa_cf(ctx.bc->cf_last->op); 2593 2594 /* alu clause instructions don't have EOP bit, so add NOP */ 2595 if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS) 2596 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2597 2598 ctx.bc->cf_last->end_of_program = 1; 2599 } 2600 } 2601 2602 /* check GPR limit - we have 124 = 128 - 4 2603 * (4 are reserved as alu clause temporary registers) */ 2604 if (ctx.bc->ngpr > 124) { 2605 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 2606 r = -ENOMEM; 2607 goto out_err; 2608 } 2609 2610 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2611 if ((r = generate_gs_copy_shader(rctx, pipeshader, &so))) 2612 return r; 2613 } 2614 2615 free(ctx.literals); 2616 tgsi_parse_free(&ctx.parse); 2617 return 0; 2618out_err: 2619 free(ctx.literals); 2620 tgsi_parse_free(&ctx.parse); 2621 return r; 2622} 2623 2624static int tgsi_unsupported(struct r600_shader_ctx *ctx) 2625{ 2626 const unsigned tgsi_opcode = 2627 ctx->parse.FullToken.FullInstruction.Instruction.Opcode; 2628 R600_ERR("%s tgsi opcode unsupported\n", 2629 tgsi_get_opcode_name(tgsi_opcode)); 2630 return -EINVAL; 2631} 2632 2633static int tgsi_end(struct r600_shader_ctx *ctx) 2634{ 2635 return 0; 2636} 2637 2638static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 2639 const struct r600_shader_src *shader_src, 2640 unsigned chan) 2641{ 2642 bc_src->sel = shader_src->sel; 2643 bc_src->chan = shader_src->swizzle[chan]; 2644 bc_src->neg = shader_src->neg; 2645 bc_src->abs = shader_src->abs; 2646 bc_src->rel = shader_src->rel; 2647 bc_src->value = shader_src->value[bc_src->chan]; 2648 bc_src->kc_bank = shader_src->kc_bank; 2649 bc_src->kc_rel = shader_src->kc_rel; 2650} 2651 2652static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 2653{ 2654 bc_src->abs = 1; 2655 bc_src->neg = 0; 2656} 2657 2658static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 2659{ 2660 bc_src->neg = !bc_src->neg; 2661} 2662 2663static void tgsi_dst(struct r600_shader_ctx *ctx, 2664 const struct tgsi_full_dst_register *tgsi_dst, 2665 unsigned swizzle, 2666 struct r600_bytecode_alu_dst *r600_dst) 2667{ 2668 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2669 2670 r600_dst->sel = tgsi_dst->Register.Index; 2671 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 2672 r600_dst->chan = swizzle; 2673 r600_dst->write = 1; 2674 if (tgsi_dst->Register.Indirect) 2675 r600_dst->rel = V_SQ_REL_RELATIVE; 2676 if (inst->Instruction.Saturate) { 2677 r600_dst->clamp = 1; 2678 } 2679} 2680 2681static int tgsi_last_instruction(unsigned writemask) 2682{ 2683 int i, lasti = 0; 2684 2685 for (i = 0; i < 4; i++) { 2686 if (writemask & (1 << i)) { 2687 lasti = i; 2688 } 2689 } 2690 return lasti; 2691} 2692 2693 2694 2695static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap) 2696{ 2697 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2698 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2699 struct r600_bytecode_alu alu; 2700 int i, j, r, lasti = tgsi_last_instruction(write_mask); 2701 int use_tmp = 0; 2702 2703 if (singledest) { 2704 switch (write_mask) { 2705 case 0x1: 2706 write_mask = 0x3; 2707 break; 2708 case 0x2: 2709 use_tmp = 1; 2710 write_mask = 0x3; 2711 break; 2712 case 0x4: 2713 write_mask = 0xc; 2714 break; 2715 case 0x8: 2716 write_mask = 0xc; 2717 use_tmp = 3; 2718 break; 2719 } 2720 } 2721 2722 lasti = tgsi_last_instruction(write_mask); 2723 for (i = 0; i <= lasti; i++) { 2724 2725 if (!(write_mask & (1 << i))) 2726 continue; 2727 2728 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2729 2730 if (singledest) { 2731 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2732 if (use_tmp) { 2733 alu.dst.sel = ctx->temp_reg; 2734 alu.dst.chan = i; 2735 alu.dst.write = 1; 2736 } 2737 if (i == 1 || i == 3) 2738 alu.dst.write = 0; 2739 } else 2740 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2741 2742 alu.op = ctx->inst_info->op; 2743 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) { 2744 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2745 } else if (!swap) { 2746 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2747 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 2748 } 2749 } else { 2750 r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i)); 2751 r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i)); 2752 } 2753 2754 /* handle some special cases */ 2755 if (i == 1 || i == 3) { 2756 switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { 2757 case TGSI_OPCODE_SUB: 2758 r600_bytecode_src_toggle_neg(&alu.src[1]); 2759 break; 2760 case TGSI_OPCODE_DABS: 2761 r600_bytecode_src_set_abs(&alu.src[0]); 2762 break; 2763 default: 2764 break; 2765 } 2766 } 2767 if (i == lasti) { 2768 alu.last = 1; 2769 } 2770 r = r600_bytecode_add_alu(ctx->bc, &alu); 2771 if (r) 2772 return r; 2773 } 2774 2775 if (use_tmp) { 2776 write_mask = inst->Dst[0].Register.WriteMask; 2777 2778 /* move result from temp to dst */ 2779 for (i = 0; i <= lasti; i++) { 2780 if (!(write_mask & (1 << i))) 2781 continue; 2782 2783 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2784 alu.op = ALU_OP1_MOV; 2785 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2786 alu.src[0].sel = ctx->temp_reg; 2787 alu.src[0].chan = use_tmp - 1; 2788 alu.last = (i == lasti); 2789 2790 r = r600_bytecode_add_alu(ctx->bc, &alu); 2791 if (r) 2792 return r; 2793 } 2794 } 2795 return 0; 2796} 2797 2798static int tgsi_op2_64(struct r600_shader_ctx *ctx) 2799{ 2800 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2801 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2802 /* confirm writemasking */ 2803 if ((write_mask & 0x3) != 0x3 && 2804 (write_mask & 0xc) != 0xc) { 2805 fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask); 2806 return -1; 2807 } 2808 return tgsi_op2_64_params(ctx, false, false); 2809} 2810 2811static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx) 2812{ 2813 return tgsi_op2_64_params(ctx, true, false); 2814} 2815 2816static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx) 2817{ 2818 return tgsi_op2_64_params(ctx, true, true); 2819} 2820 2821static int tgsi_op3_64(struct r600_shader_ctx *ctx) 2822{ 2823 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2824 struct r600_bytecode_alu alu; 2825 int i, j, r; 2826 int lasti = 3; 2827 int tmp = r600_get_temp(ctx); 2828 2829 for (i = 0; i < lasti + 1; i++) { 2830 2831 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2832 alu.op = ctx->inst_info->op; 2833 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2834 r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1); 2835 } 2836 2837 if (inst->Dst[0].Register.WriteMask & (1 << i)) 2838 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2839 else 2840 alu.dst.sel = tmp; 2841 2842 alu.dst.chan = i; 2843 alu.is_op3 = 1; 2844 if (i == lasti) { 2845 alu.last = 1; 2846 } 2847 r = r600_bytecode_add_alu(ctx->bc, &alu); 2848 if (r) 2849 return r; 2850 } 2851 return 0; 2852} 2853 2854static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 2855{ 2856 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2857 struct r600_bytecode_alu alu; 2858 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2859 int i, j, r, lasti = tgsi_last_instruction(write_mask); 2860 /* use temp register if trans_only and more than one dst component */ 2861 int use_tmp = trans_only && (write_mask ^ (1 << lasti)); 2862 2863 for (i = 0; i <= lasti; i++) { 2864 if (!(write_mask & (1 << i))) 2865 continue; 2866 2867 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2868 if (use_tmp) { 2869 alu.dst.sel = ctx->temp_reg; 2870 alu.dst.chan = i; 2871 alu.dst.write = 1; 2872 } else 2873 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2874 2875 alu.op = ctx->inst_info->op; 2876 if (!swap) { 2877 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2878 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 2879 } 2880 } else { 2881 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2882 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2883 } 2884 /* handle some special cases */ 2885 switch (inst->Instruction.Opcode) { 2886 case TGSI_OPCODE_SUB: 2887 r600_bytecode_src_toggle_neg(&alu.src[1]); 2888 break; 2889 case TGSI_OPCODE_ABS: 2890 r600_bytecode_src_set_abs(&alu.src[0]); 2891 break; 2892 default: 2893 break; 2894 } 2895 if (i == lasti || trans_only) { 2896 alu.last = 1; 2897 } 2898 r = r600_bytecode_add_alu(ctx->bc, &alu); 2899 if (r) 2900 return r; 2901 } 2902 2903 if (use_tmp) { 2904 /* move result from temp to dst */ 2905 for (i = 0; i <= lasti; i++) { 2906 if (!(write_mask & (1 << i))) 2907 continue; 2908 2909 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2910 alu.op = ALU_OP1_MOV; 2911 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2912 alu.src[0].sel = ctx->temp_reg; 2913 alu.src[0].chan = i; 2914 alu.last = (i == lasti); 2915 2916 r = r600_bytecode_add_alu(ctx->bc, &alu); 2917 if (r) 2918 return r; 2919 } 2920 } 2921 return 0; 2922} 2923 2924static int tgsi_op2(struct r600_shader_ctx *ctx) 2925{ 2926 return tgsi_op2_s(ctx, 0, 0); 2927} 2928 2929static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 2930{ 2931 return tgsi_op2_s(ctx, 1, 0); 2932} 2933 2934static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 2935{ 2936 return tgsi_op2_s(ctx, 0, 1); 2937} 2938 2939static int tgsi_ineg(struct r600_shader_ctx *ctx) 2940{ 2941 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2942 struct r600_bytecode_alu alu; 2943 int i, r; 2944 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2945 2946 for (i = 0; i < lasti + 1; i++) { 2947 2948 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2949 continue; 2950 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2951 alu.op = ctx->inst_info->op; 2952 2953 alu.src[0].sel = V_SQ_ALU_SRC_0; 2954 2955 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2956 2957 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2958 2959 if (i == lasti) { 2960 alu.last = 1; 2961 } 2962 r = r600_bytecode_add_alu(ctx->bc, &alu); 2963 if (r) 2964 return r; 2965 } 2966 return 0; 2967 2968} 2969 2970static int tgsi_dneg(struct r600_shader_ctx *ctx) 2971{ 2972 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2973 struct r600_bytecode_alu alu; 2974 int i, r; 2975 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2976 2977 for (i = 0; i < lasti + 1; i++) { 2978 2979 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2980 continue; 2981 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2982 alu.op = ALU_OP1_MOV; 2983 2984 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2985 2986 if (i == 1 || i == 3) 2987 r600_bytecode_src_toggle_neg(&alu.src[0]); 2988 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2989 2990 if (i == lasti) { 2991 alu.last = 1; 2992 } 2993 r = r600_bytecode_add_alu(ctx->bc, &alu); 2994 if (r) 2995 return r; 2996 } 2997 return 0; 2998 2999} 3000 3001static int tgsi_dfracexp(struct r600_shader_ctx *ctx) 3002{ 3003 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3004 struct r600_bytecode_alu alu; 3005 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3006 int i, j, r; 3007 int firsti = write_mask == 0xc ? 2 : 0; 3008 3009 for (i = 0; i <= 3; i++) { 3010 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3011 alu.op = ctx->inst_info->op; 3012 3013 alu.dst.sel = ctx->temp_reg; 3014 alu.dst.chan = i; 3015 alu.dst.write = 1; 3016 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3017 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 3018 } 3019 3020 if (i == 3) 3021 alu.last = 1; 3022 3023 r = r600_bytecode_add_alu(ctx->bc, &alu); 3024 if (r) 3025 return r; 3026 } 3027 3028 /* MOV first two channels to writemask dst0 */ 3029 for (i = 0; i <= 1; i++) { 3030 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3031 alu.op = ALU_OP1_MOV; 3032 alu.src[0].chan = i + 2; 3033 alu.src[0].sel = ctx->temp_reg; 3034 3035 tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst); 3036 alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1; 3037 alu.last = 1; 3038 r = r600_bytecode_add_alu(ctx->bc, &alu); 3039 if (r) 3040 return r; 3041 } 3042 3043 for (i = 0; i <= 3; i++) { 3044 if (inst->Dst[1].Register.WriteMask & (1 << i)) { 3045 /* MOV third channels to writemask dst1 */ 3046 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3047 alu.op = ALU_OP1_MOV; 3048 alu.src[0].chan = 1; 3049 alu.src[0].sel = ctx->temp_reg; 3050 3051 tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst); 3052 alu.last = 1; 3053 r = r600_bytecode_add_alu(ctx->bc, &alu); 3054 if (r) 3055 return r; 3056 break; 3057 } 3058 } 3059 return 0; 3060} 3061 3062static int cayman_emit_double_instr(struct r600_shader_ctx *ctx) 3063{ 3064 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3065 int i, r; 3066 struct r600_bytecode_alu alu; 3067 int last_slot = 3; 3068 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3069 int t1 = ctx->temp_reg; 3070 3071 /* these have to write the result to X/Y by the looks of it */ 3072 for (i = 0 ; i < last_slot; i++) { 3073 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3074 alu.op = ctx->inst_info->op; 3075 3076 /* should only be one src regs */ 3077 assert (inst->Instruction.NumSrcRegs == 1); 3078 3079 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 3080 r600_bytecode_src(&alu.src[1], &ctx->src[0], 0); 3081 3082 /* RSQ should take the absolute value of src */ 3083 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ || 3084 ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) { 3085 r600_bytecode_src_set_abs(&alu.src[1]); 3086 } 3087 alu.dst.sel = t1; 3088 alu.dst.chan = i; 3089 alu.dst.write = (i == 0 || i == 1); 3090 3091 if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1) 3092 alu.last = 1; 3093 r = r600_bytecode_add_alu(ctx->bc, &alu); 3094 if (r) 3095 return r; 3096 } 3097 3098 for (i = 0 ; i <= lasti; i++) { 3099 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3100 continue; 3101 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3102 alu.op = ALU_OP1_MOV; 3103 alu.src[0].sel = t1; 3104 alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1; 3105 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3106 alu.dst.write = 1; 3107 if (i == lasti) 3108 alu.last = 1; 3109 r = r600_bytecode_add_alu(ctx->bc, &alu); 3110 if (r) 3111 return r; 3112 } 3113 return 0; 3114} 3115 3116static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 3117{ 3118 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3119 int i, j, r; 3120 struct r600_bytecode_alu alu; 3121 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 3122 3123 for (i = 0 ; i < last_slot; i++) { 3124 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3125 alu.op = ctx->inst_info->op; 3126 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3127 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 3128 3129 /* RSQ should take the absolute value of src */ 3130 if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) { 3131 r600_bytecode_src_set_abs(&alu.src[j]); 3132 } 3133 } 3134 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3135 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3136 3137 if (i == last_slot - 1) 3138 alu.last = 1; 3139 r = r600_bytecode_add_alu(ctx->bc, &alu); 3140 if (r) 3141 return r; 3142 } 3143 return 0; 3144} 3145 3146static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 3147{ 3148 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3149 int i, j, k, r; 3150 struct r600_bytecode_alu alu; 3151 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3152 int t1 = ctx->temp_reg; 3153 3154 for (k = 0; k <= lasti; k++) { 3155 if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 3156 continue; 3157 3158 for (i = 0 ; i < 4; i++) { 3159 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3160 alu.op = ctx->inst_info->op; 3161 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3162 r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 3163 } 3164 alu.dst.sel = t1; 3165 alu.dst.chan = i; 3166 alu.dst.write = (i == k); 3167 if (i == 3) 3168 alu.last = 1; 3169 r = r600_bytecode_add_alu(ctx->bc, &alu); 3170 if (r) 3171 return r; 3172 } 3173 } 3174 3175 for (i = 0 ; i <= lasti; i++) { 3176 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3177 continue; 3178 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3179 alu.op = ALU_OP1_MOV; 3180 alu.src[0].sel = t1; 3181 alu.src[0].chan = i; 3182 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3183 alu.dst.write = 1; 3184 if (i == lasti) 3185 alu.last = 1; 3186 r = r600_bytecode_add_alu(ctx->bc, &alu); 3187 if (r) 3188 return r; 3189 } 3190 3191 return 0; 3192} 3193 3194 3195static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) 3196{ 3197 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3198 int i, j, k, r; 3199 struct r600_bytecode_alu alu; 3200 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3201 int t1 = ctx->temp_reg; 3202 3203 for (k = 0; k < 2; k++) { 3204 if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2)))) 3205 continue; 3206 3207 for (i = 0; i < 4; i++) { 3208 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3209 alu.op = ctx->inst_info->op; 3210 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3211 r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));; 3212 } 3213 alu.dst.sel = t1; 3214 alu.dst.chan = i; 3215 alu.dst.write = 1; 3216 if (i == 3) 3217 alu.last = 1; 3218 r = r600_bytecode_add_alu(ctx->bc, &alu); 3219 if (r) 3220 return r; 3221 } 3222 } 3223 3224 for (i = 0; i <= lasti; i++) { 3225 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3226 continue; 3227 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3228 alu.op = ALU_OP1_MOV; 3229 alu.src[0].sel = t1; 3230 alu.src[0].chan = i; 3231 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3232 alu.dst.write = 1; 3233 if (i == lasti) 3234 alu.last = 1; 3235 r = r600_bytecode_add_alu(ctx->bc, &alu); 3236 if (r) 3237 return r; 3238 } 3239 3240 return 0; 3241} 3242 3243/* 3244 * r600 - trunc to -PI..PI range 3245 * r700 - normalize by dividing by 2PI 3246 * see fdo bug 27901 3247 */ 3248static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 3249{ 3250 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 3251 static float double_pi = 3.1415926535 * 2; 3252 static float neg_pi = -3.1415926535; 3253 3254 int r; 3255 struct r600_bytecode_alu alu; 3256 3257 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3258 alu.op = ALU_OP3_MULADD; 3259 alu.is_op3 = 1; 3260 3261 alu.dst.chan = 0; 3262 alu.dst.sel = ctx->temp_reg; 3263 alu.dst.write = 1; 3264 3265 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3266 3267 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 3268 alu.src[1].chan = 0; 3269 alu.src[1].value = *(uint32_t *)&half_inv_pi; 3270 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 3271 alu.src[2].chan = 0; 3272 alu.last = 1; 3273 r = r600_bytecode_add_alu(ctx->bc, &alu); 3274 if (r) 3275 return r; 3276 3277 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3278 alu.op = ALU_OP1_FRACT; 3279 3280 alu.dst.chan = 0; 3281 alu.dst.sel = ctx->temp_reg; 3282 alu.dst.write = 1; 3283 3284 alu.src[0].sel = ctx->temp_reg; 3285 alu.src[0].chan = 0; 3286 alu.last = 1; 3287 r = r600_bytecode_add_alu(ctx->bc, &alu); 3288 if (r) 3289 return r; 3290 3291 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3292 alu.op = ALU_OP3_MULADD; 3293 alu.is_op3 = 1; 3294 3295 alu.dst.chan = 0; 3296 alu.dst.sel = ctx->temp_reg; 3297 alu.dst.write = 1; 3298 3299 alu.src[0].sel = ctx->temp_reg; 3300 alu.src[0].chan = 0; 3301 3302 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 3303 alu.src[1].chan = 0; 3304 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 3305 alu.src[2].chan = 0; 3306 3307 if (ctx->bc->chip_class == R600) { 3308 alu.src[1].value = *(uint32_t *)&double_pi; 3309 alu.src[2].value = *(uint32_t *)&neg_pi; 3310 } else { 3311 alu.src[1].sel = V_SQ_ALU_SRC_1; 3312 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 3313 alu.src[2].neg = 1; 3314 } 3315 3316 alu.last = 1; 3317 r = r600_bytecode_add_alu(ctx->bc, &alu); 3318 if (r) 3319 return r; 3320 return 0; 3321} 3322 3323static int cayman_trig(struct r600_shader_ctx *ctx) 3324{ 3325 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3326 struct r600_bytecode_alu alu; 3327 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 3328 int i, r; 3329 3330 r = tgsi_setup_trig(ctx); 3331 if (r) 3332 return r; 3333 3334 3335 for (i = 0; i < last_slot; i++) { 3336 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3337 alu.op = ctx->inst_info->op; 3338 alu.dst.chan = i; 3339 3340 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3341 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3342 3343 alu.src[0].sel = ctx->temp_reg; 3344 alu.src[0].chan = 0; 3345 if (i == last_slot - 1) 3346 alu.last = 1; 3347 r = r600_bytecode_add_alu(ctx->bc, &alu); 3348 if (r) 3349 return r; 3350 } 3351 return 0; 3352} 3353 3354static int tgsi_trig(struct r600_shader_ctx *ctx) 3355{ 3356 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3357 struct r600_bytecode_alu alu; 3358 int i, r; 3359 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3360 3361 r = tgsi_setup_trig(ctx); 3362 if (r) 3363 return r; 3364 3365 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3366 alu.op = ctx->inst_info->op; 3367 alu.dst.chan = 0; 3368 alu.dst.sel = ctx->temp_reg; 3369 alu.dst.write = 1; 3370 3371 alu.src[0].sel = ctx->temp_reg; 3372 alu.src[0].chan = 0; 3373 alu.last = 1; 3374 r = r600_bytecode_add_alu(ctx->bc, &alu); 3375 if (r) 3376 return r; 3377 3378 /* replicate result */ 3379 for (i = 0; i < lasti + 1; i++) { 3380 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3381 continue; 3382 3383 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3384 alu.op = ALU_OP1_MOV; 3385 3386 alu.src[0].sel = ctx->temp_reg; 3387 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3388 if (i == lasti) 3389 alu.last = 1; 3390 r = r600_bytecode_add_alu(ctx->bc, &alu); 3391 if (r) 3392 return r; 3393 } 3394 return 0; 3395} 3396 3397static int tgsi_scs(struct r600_shader_ctx *ctx) 3398{ 3399 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3400 struct r600_bytecode_alu alu; 3401 int i, r; 3402 3403 /* We'll only need the trig stuff if we are going to write to the 3404 * X or Y components of the destination vector. 3405 */ 3406 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 3407 r = tgsi_setup_trig(ctx); 3408 if (r) 3409 return r; 3410 } 3411 3412 /* dst.x = COS */ 3413 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3414 if (ctx->bc->chip_class == CAYMAN) { 3415 for (i = 0 ; i < 3; i++) { 3416 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3417 alu.op = ALU_OP1_COS; 3418 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3419 3420 if (i == 0) 3421 alu.dst.write = 1; 3422 else 3423 alu.dst.write = 0; 3424 alu.src[0].sel = ctx->temp_reg; 3425 alu.src[0].chan = 0; 3426 if (i == 2) 3427 alu.last = 1; 3428 r = r600_bytecode_add_alu(ctx->bc, &alu); 3429 if (r) 3430 return r; 3431 } 3432 } else { 3433 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3434 alu.op = ALU_OP1_COS; 3435 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 3436 3437 alu.src[0].sel = ctx->temp_reg; 3438 alu.src[0].chan = 0; 3439 alu.last = 1; 3440 r = r600_bytecode_add_alu(ctx->bc, &alu); 3441 if (r) 3442 return r; 3443 } 3444 } 3445 3446 /* dst.y = SIN */ 3447 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3448 if (ctx->bc->chip_class == CAYMAN) { 3449 for (i = 0 ; i < 3; i++) { 3450 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3451 alu.op = ALU_OP1_SIN; 3452 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3453 if (i == 1) 3454 alu.dst.write = 1; 3455 else 3456 alu.dst.write = 0; 3457 alu.src[0].sel = ctx->temp_reg; 3458 alu.src[0].chan = 0; 3459 if (i == 2) 3460 alu.last = 1; 3461 r = r600_bytecode_add_alu(ctx->bc, &alu); 3462 if (r) 3463 return r; 3464 } 3465 } else { 3466 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3467 alu.op = ALU_OP1_SIN; 3468 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 3469 3470 alu.src[0].sel = ctx->temp_reg; 3471 alu.src[0].chan = 0; 3472 alu.last = 1; 3473 r = r600_bytecode_add_alu(ctx->bc, &alu); 3474 if (r) 3475 return r; 3476 } 3477 } 3478 3479 /* dst.z = 0.0; */ 3480 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3481 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3482 3483 alu.op = ALU_OP1_MOV; 3484 3485 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 3486 3487 alu.src[0].sel = V_SQ_ALU_SRC_0; 3488 alu.src[0].chan = 0; 3489 3490 alu.last = 1; 3491 3492 r = r600_bytecode_add_alu(ctx->bc, &alu); 3493 if (r) 3494 return r; 3495 } 3496 3497 /* dst.w = 1.0; */ 3498 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3499 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3500 3501 alu.op = ALU_OP1_MOV; 3502 3503 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 3504 3505 alu.src[0].sel = V_SQ_ALU_SRC_1; 3506 alu.src[0].chan = 0; 3507 3508 alu.last = 1; 3509 3510 r = r600_bytecode_add_alu(ctx->bc, &alu); 3511 if (r) 3512 return r; 3513 } 3514 3515 return 0; 3516} 3517 3518static int tgsi_kill(struct r600_shader_ctx *ctx) 3519{ 3520 const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3521 struct r600_bytecode_alu alu; 3522 int i, r; 3523 3524 for (i = 0; i < 4; i++) { 3525 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3526 alu.op = ctx->inst_info->op; 3527 3528 alu.dst.chan = i; 3529 3530 alu.src[0].sel = V_SQ_ALU_SRC_0; 3531 3532 if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) { 3533 alu.src[1].sel = V_SQ_ALU_SRC_1; 3534 alu.src[1].neg = 1; 3535 } else { 3536 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3537 } 3538 if (i == 3) { 3539 alu.last = 1; 3540 } 3541 r = r600_bytecode_add_alu(ctx->bc, &alu); 3542 if (r) 3543 return r; 3544 } 3545 3546 /* kill must be last in ALU */ 3547 ctx->bc->force_add_cf = 1; 3548 ctx->shader->uses_kill = TRUE; 3549 return 0; 3550} 3551 3552static int tgsi_lit(struct r600_shader_ctx *ctx) 3553{ 3554 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3555 struct r600_bytecode_alu alu; 3556 int r; 3557 3558 /* tmp.x = max(src.y, 0.0) */ 3559 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3560 alu.op = ALU_OP2_MAX; 3561 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 3562 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 3563 alu.src[1].chan = 1; 3564 3565 alu.dst.sel = ctx->temp_reg; 3566 alu.dst.chan = 0; 3567 alu.dst.write = 1; 3568 3569 alu.last = 1; 3570 r = r600_bytecode_add_alu(ctx->bc, &alu); 3571 if (r) 3572 return r; 3573 3574 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 3575 { 3576 int chan; 3577 int sel; 3578 int i; 3579 3580 if (ctx->bc->chip_class == CAYMAN) { 3581 for (i = 0; i < 3; i++) { 3582 /* tmp.z = log(tmp.x) */ 3583 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3584 alu.op = ALU_OP1_LOG_CLAMPED; 3585 alu.src[0].sel = ctx->temp_reg; 3586 alu.src[0].chan = 0; 3587 alu.dst.sel = ctx->temp_reg; 3588 alu.dst.chan = i; 3589 if (i == 2) { 3590 alu.dst.write = 1; 3591 alu.last = 1; 3592 } else 3593 alu.dst.write = 0; 3594 3595 r = r600_bytecode_add_alu(ctx->bc, &alu); 3596 if (r) 3597 return r; 3598 } 3599 } else { 3600 /* tmp.z = log(tmp.x) */ 3601 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3602 alu.op = ALU_OP1_LOG_CLAMPED; 3603 alu.src[0].sel = ctx->temp_reg; 3604 alu.src[0].chan = 0; 3605 alu.dst.sel = ctx->temp_reg; 3606 alu.dst.chan = 2; 3607 alu.dst.write = 1; 3608 alu.last = 1; 3609 r = r600_bytecode_add_alu(ctx->bc, &alu); 3610 if (r) 3611 return r; 3612 } 3613 3614 chan = alu.dst.chan; 3615 sel = alu.dst.sel; 3616 3617 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 3618 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3619 alu.op = ALU_OP3_MUL_LIT; 3620 alu.src[0].sel = sel; 3621 alu.src[0].chan = chan; 3622 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 3623 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 3624 alu.dst.sel = ctx->temp_reg; 3625 alu.dst.chan = 0; 3626 alu.dst.write = 1; 3627 alu.is_op3 = 1; 3628 alu.last = 1; 3629 r = r600_bytecode_add_alu(ctx->bc, &alu); 3630 if (r) 3631 return r; 3632 3633 if (ctx->bc->chip_class == CAYMAN) { 3634 for (i = 0; i < 3; i++) { 3635 /* dst.z = exp(tmp.x) */ 3636 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3637 alu.op = ALU_OP1_EXP_IEEE; 3638 alu.src[0].sel = ctx->temp_reg; 3639 alu.src[0].chan = 0; 3640 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3641 if (i == 2) { 3642 alu.dst.write = 1; 3643 alu.last = 1; 3644 } else 3645 alu.dst.write = 0; 3646 r = r600_bytecode_add_alu(ctx->bc, &alu); 3647 if (r) 3648 return r; 3649 } 3650 } else { 3651 /* dst.z = exp(tmp.x) */ 3652 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3653 alu.op = ALU_OP1_EXP_IEEE; 3654 alu.src[0].sel = ctx->temp_reg; 3655 alu.src[0].chan = 0; 3656 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 3657 alu.last = 1; 3658 r = r600_bytecode_add_alu(ctx->bc, &alu); 3659 if (r) 3660 return r; 3661 } 3662 } 3663 3664 /* dst.x, <- 1.0 */ 3665 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3666 alu.op = ALU_OP1_MOV; 3667 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 3668 alu.src[0].chan = 0; 3669 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 3670 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 3671 r = r600_bytecode_add_alu(ctx->bc, &alu); 3672 if (r) 3673 return r; 3674 3675 /* dst.y = max(src.x, 0.0) */ 3676 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3677 alu.op = ALU_OP2_MAX; 3678 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3679 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 3680 alu.src[1].chan = 0; 3681 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 3682 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 3683 r = r600_bytecode_add_alu(ctx->bc, &alu); 3684 if (r) 3685 return r; 3686 3687 /* dst.w, <- 1.0 */ 3688 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3689 alu.op = ALU_OP1_MOV; 3690 alu.src[0].sel = V_SQ_ALU_SRC_1; 3691 alu.src[0].chan = 0; 3692 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 3693 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 3694 alu.last = 1; 3695 r = r600_bytecode_add_alu(ctx->bc, &alu); 3696 if (r) 3697 return r; 3698 3699 return 0; 3700} 3701 3702static int tgsi_rsq(struct r600_shader_ctx *ctx) 3703{ 3704 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3705 struct r600_bytecode_alu alu; 3706 int i, r; 3707 3708 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3709 3710 /* XXX: 3711 * For state trackers other than OpenGL, we'll want to use 3712 * _RECIPSQRT_IEEE instead. 3713 */ 3714 alu.op = ALU_OP1_RECIPSQRT_CLAMPED; 3715 3716 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 3717 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 3718 r600_bytecode_src_set_abs(&alu.src[i]); 3719 } 3720 alu.dst.sel = ctx->temp_reg; 3721 alu.dst.write = 1; 3722 alu.last = 1; 3723 r = r600_bytecode_add_alu(ctx->bc, &alu); 3724 if (r) 3725 return r; 3726 /* replicate result */ 3727 return tgsi_helper_tempx_replicate(ctx); 3728} 3729 3730static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 3731{ 3732 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3733 struct r600_bytecode_alu alu; 3734 int i, r; 3735 3736 for (i = 0; i < 4; i++) { 3737 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3738 alu.src[0].sel = ctx->temp_reg; 3739 alu.op = ALU_OP1_MOV; 3740 alu.dst.chan = i; 3741 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3742 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3743 if (i == 3) 3744 alu.last = 1; 3745 r = r600_bytecode_add_alu(ctx->bc, &alu); 3746 if (r) 3747 return r; 3748 } 3749 return 0; 3750} 3751 3752static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 3753{ 3754 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3755 struct r600_bytecode_alu alu; 3756 int i, r; 3757 3758 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3759 alu.op = ctx->inst_info->op; 3760 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 3761 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 3762 } 3763 alu.dst.sel = ctx->temp_reg; 3764 alu.dst.write = 1; 3765 alu.last = 1; 3766 r = r600_bytecode_add_alu(ctx->bc, &alu); 3767 if (r) 3768 return r; 3769 /* replicate result */ 3770 return tgsi_helper_tempx_replicate(ctx); 3771} 3772 3773static int cayman_pow(struct r600_shader_ctx *ctx) 3774{ 3775 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3776 int i, r; 3777 struct r600_bytecode_alu alu; 3778 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 3779 3780 for (i = 0; i < 3; i++) { 3781 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3782 alu.op = ALU_OP1_LOG_IEEE; 3783 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3784 alu.dst.sel = ctx->temp_reg; 3785 alu.dst.chan = i; 3786 alu.dst.write = 1; 3787 if (i == 2) 3788 alu.last = 1; 3789 r = r600_bytecode_add_alu(ctx->bc, &alu); 3790 if (r) 3791 return r; 3792 } 3793 3794 /* b * LOG2(a) */ 3795 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3796 alu.op = ALU_OP2_MUL; 3797 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 3798 alu.src[1].sel = ctx->temp_reg; 3799 alu.dst.sel = ctx->temp_reg; 3800 alu.dst.write = 1; 3801 alu.last = 1; 3802 r = r600_bytecode_add_alu(ctx->bc, &alu); 3803 if (r) 3804 return r; 3805 3806 for (i = 0; i < last_slot; i++) { 3807 /* POW(a,b) = EXP2(b * LOG2(a))*/ 3808 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3809 alu.op = ALU_OP1_EXP_IEEE; 3810 alu.src[0].sel = ctx->temp_reg; 3811 3812 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3813 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3814 if (i == last_slot - 1) 3815 alu.last = 1; 3816 r = r600_bytecode_add_alu(ctx->bc, &alu); 3817 if (r) 3818 return r; 3819 } 3820 return 0; 3821} 3822 3823static int tgsi_pow(struct r600_shader_ctx *ctx) 3824{ 3825 struct r600_bytecode_alu alu; 3826 int r; 3827 3828 /* LOG2(a) */ 3829 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3830 alu.op = ALU_OP1_LOG_IEEE; 3831 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3832 alu.dst.sel = ctx->temp_reg; 3833 alu.dst.write = 1; 3834 alu.last = 1; 3835 r = r600_bytecode_add_alu(ctx->bc, &alu); 3836 if (r) 3837 return r; 3838 /* b * LOG2(a) */ 3839 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3840 alu.op = ALU_OP2_MUL; 3841 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 3842 alu.src[1].sel = ctx->temp_reg; 3843 alu.dst.sel = ctx->temp_reg; 3844 alu.dst.write = 1; 3845 alu.last = 1; 3846 r = r600_bytecode_add_alu(ctx->bc, &alu); 3847 if (r) 3848 return r; 3849 /* POW(a,b) = EXP2(b * LOG2(a))*/ 3850 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3851 alu.op = ALU_OP1_EXP_IEEE; 3852 alu.src[0].sel = ctx->temp_reg; 3853 alu.dst.sel = ctx->temp_reg; 3854 alu.dst.write = 1; 3855 alu.last = 1; 3856 r = r600_bytecode_add_alu(ctx->bc, &alu); 3857 if (r) 3858 return r; 3859 return tgsi_helper_tempx_replicate(ctx); 3860} 3861 3862static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 3863{ 3864 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3865 struct r600_bytecode_alu alu; 3866 int i, r, j; 3867 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3868 int tmp0 = ctx->temp_reg; 3869 int tmp1 = r600_get_temp(ctx); 3870 int tmp2 = r600_get_temp(ctx); 3871 int tmp3 = r600_get_temp(ctx); 3872 /* Unsigned path: 3873 * 3874 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 3875 * 3876 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 3877 * 2. tmp0.z = lo (tmp0.x * src2) 3878 * 3. tmp0.w = -tmp0.z 3879 * 4. tmp0.y = hi (tmp0.x * src2) 3880 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 3881 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 3882 * 7. tmp1.x = tmp0.x - tmp0.w 3883 * 8. tmp1.y = tmp0.x + tmp0.w 3884 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 3885 * 10. tmp0.z = hi(tmp0.x * src1) = q 3886 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 3887 * 3888 * 12. tmp0.w = src1 - tmp0.y = r 3889 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 3890 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 3891 * 3892 * if DIV 3893 * 3894 * 15. tmp1.z = tmp0.z + 1 = q + 1 3895 * 16. tmp1.w = tmp0.z - 1 = q - 1 3896 * 3897 * else MOD 3898 * 3899 * 15. tmp1.z = tmp0.w - src2 = r - src2 3900 * 16. tmp1.w = tmp0.w + src2 = r + src2 3901 * 3902 * endif 3903 * 3904 * 17. tmp1.x = tmp1.x & tmp1.y 3905 * 3906 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 3907 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 3908 * 3909 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 3910 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 3911 * 3912 * Signed path: 3913 * 3914 * Same as unsigned, using abs values of the operands, 3915 * and fixing the sign of the result in the end. 3916 */ 3917 3918 for (i = 0; i < 4; i++) { 3919 if (!(write_mask & (1<<i))) 3920 continue; 3921 3922 if (signed_op) { 3923 3924 /* tmp2.x = -src0 */ 3925 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3926 alu.op = ALU_OP2_SUB_INT; 3927 3928 alu.dst.sel = tmp2; 3929 alu.dst.chan = 0; 3930 alu.dst.write = 1; 3931 3932 alu.src[0].sel = V_SQ_ALU_SRC_0; 3933 3934 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3935 3936 alu.last = 1; 3937 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3938 return r; 3939 3940 /* tmp2.y = -src1 */ 3941 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3942 alu.op = ALU_OP2_SUB_INT; 3943 3944 alu.dst.sel = tmp2; 3945 alu.dst.chan = 1; 3946 alu.dst.write = 1; 3947 3948 alu.src[0].sel = V_SQ_ALU_SRC_0; 3949 3950 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3951 3952 alu.last = 1; 3953 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3954 return r; 3955 3956 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 3957 /* it will be a sign of the quotient */ 3958 if (!mod) { 3959 3960 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3961 alu.op = ALU_OP2_XOR_INT; 3962 3963 alu.dst.sel = tmp2; 3964 alu.dst.chan = 2; 3965 alu.dst.write = 1; 3966 3967 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3968 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3969 3970 alu.last = 1; 3971 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3972 return r; 3973 } 3974 3975 /* tmp2.x = |src0| */ 3976 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3977 alu.op = ALU_OP3_CNDGE_INT; 3978 alu.is_op3 = 1; 3979 3980 alu.dst.sel = tmp2; 3981 alu.dst.chan = 0; 3982 alu.dst.write = 1; 3983 3984 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3985 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3986 alu.src[2].sel = tmp2; 3987 alu.src[2].chan = 0; 3988 3989 alu.last = 1; 3990 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3991 return r; 3992 3993 /* tmp2.y = |src1| */ 3994 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3995 alu.op = ALU_OP3_CNDGE_INT; 3996 alu.is_op3 = 1; 3997 3998 alu.dst.sel = tmp2; 3999 alu.dst.chan = 1; 4000 alu.dst.write = 1; 4001 4002 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4003 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4004 alu.src[2].sel = tmp2; 4005 alu.src[2].chan = 1; 4006 4007 alu.last = 1; 4008 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4009 return r; 4010 4011 } 4012 4013 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 4014 if (ctx->bc->chip_class == CAYMAN) { 4015 /* tmp3.x = u2f(src2) */ 4016 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4017 alu.op = ALU_OP1_UINT_TO_FLT; 4018 4019 alu.dst.sel = tmp3; 4020 alu.dst.chan = 0; 4021 alu.dst.write = 1; 4022 4023 if (signed_op) { 4024 alu.src[0].sel = tmp2; 4025 alu.src[0].chan = 1; 4026 } else { 4027 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4028 } 4029 4030 alu.last = 1; 4031 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4032 return r; 4033 4034 /* tmp0.x = recip(tmp3.x) */ 4035 for (j = 0 ; j < 3; j++) { 4036 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4037 alu.op = ALU_OP1_RECIP_IEEE; 4038 4039 alu.dst.sel = tmp0; 4040 alu.dst.chan = j; 4041 alu.dst.write = (j == 0); 4042 4043 alu.src[0].sel = tmp3; 4044 alu.src[0].chan = 0; 4045 4046 if (j == 2) 4047 alu.last = 1; 4048 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4049 return r; 4050 } 4051 4052 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4053 alu.op = ALU_OP2_MUL; 4054 4055 alu.src[0].sel = tmp0; 4056 alu.src[0].chan = 0; 4057 4058 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4059 alu.src[1].value = 0x4f800000; 4060 4061 alu.dst.sel = tmp3; 4062 alu.dst.write = 1; 4063 alu.last = 1; 4064 r = r600_bytecode_add_alu(ctx->bc, &alu); 4065 if (r) 4066 return r; 4067 4068 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4069 alu.op = ALU_OP1_FLT_TO_UINT; 4070 4071 alu.dst.sel = tmp0; 4072 alu.dst.chan = 0; 4073 alu.dst.write = 1; 4074 4075 alu.src[0].sel = tmp3; 4076 alu.src[0].chan = 0; 4077 4078 alu.last = 1; 4079 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4080 return r; 4081 4082 } else { 4083 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4084 alu.op = ALU_OP1_RECIP_UINT; 4085 4086 alu.dst.sel = tmp0; 4087 alu.dst.chan = 0; 4088 alu.dst.write = 1; 4089 4090 if (signed_op) { 4091 alu.src[0].sel = tmp2; 4092 alu.src[0].chan = 1; 4093 } else { 4094 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4095 } 4096 4097 alu.last = 1; 4098 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4099 return r; 4100 } 4101 4102 /* 2. tmp0.z = lo (tmp0.x * src2) */ 4103 if (ctx->bc->chip_class == CAYMAN) { 4104 for (j = 0 ; j < 4; j++) { 4105 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4106 alu.op = ALU_OP2_MULLO_UINT; 4107 4108 alu.dst.sel = tmp0; 4109 alu.dst.chan = j; 4110 alu.dst.write = (j == 2); 4111 4112 alu.src[0].sel = tmp0; 4113 alu.src[0].chan = 0; 4114 if (signed_op) { 4115 alu.src[1].sel = tmp2; 4116 alu.src[1].chan = 1; 4117 } else { 4118 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4119 } 4120 4121 alu.last = (j == 3); 4122 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4123 return r; 4124 } 4125 } else { 4126 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4127 alu.op = ALU_OP2_MULLO_UINT; 4128 4129 alu.dst.sel = tmp0; 4130 alu.dst.chan = 2; 4131 alu.dst.write = 1; 4132 4133 alu.src[0].sel = tmp0; 4134 alu.src[0].chan = 0; 4135 if (signed_op) { 4136 alu.src[1].sel = tmp2; 4137 alu.src[1].chan = 1; 4138 } else { 4139 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4140 } 4141 4142 alu.last = 1; 4143 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4144 return r; 4145 } 4146 4147 /* 3. tmp0.w = -tmp0.z */ 4148 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4149 alu.op = ALU_OP2_SUB_INT; 4150 4151 alu.dst.sel = tmp0; 4152 alu.dst.chan = 3; 4153 alu.dst.write = 1; 4154 4155 alu.src[0].sel = V_SQ_ALU_SRC_0; 4156 alu.src[1].sel = tmp0; 4157 alu.src[1].chan = 2; 4158 4159 alu.last = 1; 4160 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4161 return r; 4162 4163 /* 4. tmp0.y = hi (tmp0.x * src2) */ 4164 if (ctx->bc->chip_class == CAYMAN) { 4165 for (j = 0 ; j < 4; j++) { 4166 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4167 alu.op = ALU_OP2_MULHI_UINT; 4168 4169 alu.dst.sel = tmp0; 4170 alu.dst.chan = j; 4171 alu.dst.write = (j == 1); 4172 4173 alu.src[0].sel = tmp0; 4174 alu.src[0].chan = 0; 4175 4176 if (signed_op) { 4177 alu.src[1].sel = tmp2; 4178 alu.src[1].chan = 1; 4179 } else { 4180 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4181 } 4182 alu.last = (j == 3); 4183 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4184 return r; 4185 } 4186 } else { 4187 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4188 alu.op = ALU_OP2_MULHI_UINT; 4189 4190 alu.dst.sel = tmp0; 4191 alu.dst.chan = 1; 4192 alu.dst.write = 1; 4193 4194 alu.src[0].sel = tmp0; 4195 alu.src[0].chan = 0; 4196 4197 if (signed_op) { 4198 alu.src[1].sel = tmp2; 4199 alu.src[1].chan = 1; 4200 } else { 4201 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4202 } 4203 4204 alu.last = 1; 4205 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4206 return r; 4207 } 4208 4209 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 4210 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4211 alu.op = ALU_OP3_CNDE_INT; 4212 alu.is_op3 = 1; 4213 4214 alu.dst.sel = tmp0; 4215 alu.dst.chan = 2; 4216 alu.dst.write = 1; 4217 4218 alu.src[0].sel = tmp0; 4219 alu.src[0].chan = 1; 4220 alu.src[1].sel = tmp0; 4221 alu.src[1].chan = 3; 4222 alu.src[2].sel = tmp0; 4223 alu.src[2].chan = 2; 4224 4225 alu.last = 1; 4226 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4227 return r; 4228 4229 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 4230 if (ctx->bc->chip_class == CAYMAN) { 4231 for (j = 0 ; j < 4; j++) { 4232 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4233 alu.op = ALU_OP2_MULHI_UINT; 4234 4235 alu.dst.sel = tmp0; 4236 alu.dst.chan = j; 4237 alu.dst.write = (j == 3); 4238 4239 alu.src[0].sel = tmp0; 4240 alu.src[0].chan = 2; 4241 4242 alu.src[1].sel = tmp0; 4243 alu.src[1].chan = 0; 4244 4245 alu.last = (j == 3); 4246 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4247 return r; 4248 } 4249 } else { 4250 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4251 alu.op = ALU_OP2_MULHI_UINT; 4252 4253 alu.dst.sel = tmp0; 4254 alu.dst.chan = 3; 4255 alu.dst.write = 1; 4256 4257 alu.src[0].sel = tmp0; 4258 alu.src[0].chan = 2; 4259 4260 alu.src[1].sel = tmp0; 4261 alu.src[1].chan = 0; 4262 4263 alu.last = 1; 4264 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4265 return r; 4266 } 4267 4268 /* 7. tmp1.x = tmp0.x - tmp0.w */ 4269 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4270 alu.op = ALU_OP2_SUB_INT; 4271 4272 alu.dst.sel = tmp1; 4273 alu.dst.chan = 0; 4274 alu.dst.write = 1; 4275 4276 alu.src[0].sel = tmp0; 4277 alu.src[0].chan = 0; 4278 alu.src[1].sel = tmp0; 4279 alu.src[1].chan = 3; 4280 4281 alu.last = 1; 4282 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4283 return r; 4284 4285 /* 8. tmp1.y = tmp0.x + tmp0.w */ 4286 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4287 alu.op = ALU_OP2_ADD_INT; 4288 4289 alu.dst.sel = tmp1; 4290 alu.dst.chan = 1; 4291 alu.dst.write = 1; 4292 4293 alu.src[0].sel = tmp0; 4294 alu.src[0].chan = 0; 4295 alu.src[1].sel = tmp0; 4296 alu.src[1].chan = 3; 4297 4298 alu.last = 1; 4299 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4300 return r; 4301 4302 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 4303 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4304 alu.op = ALU_OP3_CNDE_INT; 4305 alu.is_op3 = 1; 4306 4307 alu.dst.sel = tmp0; 4308 alu.dst.chan = 0; 4309 alu.dst.write = 1; 4310 4311 alu.src[0].sel = tmp0; 4312 alu.src[0].chan = 1; 4313 alu.src[1].sel = tmp1; 4314 alu.src[1].chan = 1; 4315 alu.src[2].sel = tmp1; 4316 alu.src[2].chan = 0; 4317 4318 alu.last = 1; 4319 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4320 return r; 4321 4322 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 4323 if (ctx->bc->chip_class == CAYMAN) { 4324 for (j = 0 ; j < 4; j++) { 4325 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4326 alu.op = ALU_OP2_MULHI_UINT; 4327 4328 alu.dst.sel = tmp0; 4329 alu.dst.chan = j; 4330 alu.dst.write = (j == 2); 4331 4332 alu.src[0].sel = tmp0; 4333 alu.src[0].chan = 0; 4334 4335 if (signed_op) { 4336 alu.src[1].sel = tmp2; 4337 alu.src[1].chan = 0; 4338 } else { 4339 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4340 } 4341 4342 alu.last = (j == 3); 4343 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4344 return r; 4345 } 4346 } else { 4347 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4348 alu.op = ALU_OP2_MULHI_UINT; 4349 4350 alu.dst.sel = tmp0; 4351 alu.dst.chan = 2; 4352 alu.dst.write = 1; 4353 4354 alu.src[0].sel = tmp0; 4355 alu.src[0].chan = 0; 4356 4357 if (signed_op) { 4358 alu.src[1].sel = tmp2; 4359 alu.src[1].chan = 0; 4360 } else { 4361 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4362 } 4363 4364 alu.last = 1; 4365 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4366 return r; 4367 } 4368 4369 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 4370 if (ctx->bc->chip_class == CAYMAN) { 4371 for (j = 0 ; j < 4; j++) { 4372 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4373 alu.op = ALU_OP2_MULLO_UINT; 4374 4375 alu.dst.sel = tmp0; 4376 alu.dst.chan = j; 4377 alu.dst.write = (j == 1); 4378 4379 if (signed_op) { 4380 alu.src[0].sel = tmp2; 4381 alu.src[0].chan = 1; 4382 } else { 4383 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4384 } 4385 4386 alu.src[1].sel = tmp0; 4387 alu.src[1].chan = 2; 4388 4389 alu.last = (j == 3); 4390 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4391 return r; 4392 } 4393 } else { 4394 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4395 alu.op = ALU_OP2_MULLO_UINT; 4396 4397 alu.dst.sel = tmp0; 4398 alu.dst.chan = 1; 4399 alu.dst.write = 1; 4400 4401 if (signed_op) { 4402 alu.src[0].sel = tmp2; 4403 alu.src[0].chan = 1; 4404 } else { 4405 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4406 } 4407 4408 alu.src[1].sel = tmp0; 4409 alu.src[1].chan = 2; 4410 4411 alu.last = 1; 4412 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4413 return r; 4414 } 4415 4416 /* 12. tmp0.w = src1 - tmp0.y = r */ 4417 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4418 alu.op = ALU_OP2_SUB_INT; 4419 4420 alu.dst.sel = tmp0; 4421 alu.dst.chan = 3; 4422 alu.dst.write = 1; 4423 4424 if (signed_op) { 4425 alu.src[0].sel = tmp2; 4426 alu.src[0].chan = 0; 4427 } else { 4428 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4429 } 4430 4431 alu.src[1].sel = tmp0; 4432 alu.src[1].chan = 1; 4433 4434 alu.last = 1; 4435 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4436 return r; 4437 4438 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 4439 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4440 alu.op = ALU_OP2_SETGE_UINT; 4441 4442 alu.dst.sel = tmp1; 4443 alu.dst.chan = 0; 4444 alu.dst.write = 1; 4445 4446 alu.src[0].sel = tmp0; 4447 alu.src[0].chan = 3; 4448 if (signed_op) { 4449 alu.src[1].sel = tmp2; 4450 alu.src[1].chan = 1; 4451 } else { 4452 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4453 } 4454 4455 alu.last = 1; 4456 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4457 return r; 4458 4459 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 4460 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4461 alu.op = ALU_OP2_SETGE_UINT; 4462 4463 alu.dst.sel = tmp1; 4464 alu.dst.chan = 1; 4465 alu.dst.write = 1; 4466 4467 if (signed_op) { 4468 alu.src[0].sel = tmp2; 4469 alu.src[0].chan = 0; 4470 } else { 4471 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4472 } 4473 4474 alu.src[1].sel = tmp0; 4475 alu.src[1].chan = 1; 4476 4477 alu.last = 1; 4478 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4479 return r; 4480 4481 if (mod) { /* UMOD */ 4482 4483 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 4484 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4485 alu.op = ALU_OP2_SUB_INT; 4486 4487 alu.dst.sel = tmp1; 4488 alu.dst.chan = 2; 4489 alu.dst.write = 1; 4490 4491 alu.src[0].sel = tmp0; 4492 alu.src[0].chan = 3; 4493 4494 if (signed_op) { 4495 alu.src[1].sel = tmp2; 4496 alu.src[1].chan = 1; 4497 } else { 4498 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4499 } 4500 4501 alu.last = 1; 4502 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4503 return r; 4504 4505 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 4506 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4507 alu.op = ALU_OP2_ADD_INT; 4508 4509 alu.dst.sel = tmp1; 4510 alu.dst.chan = 3; 4511 alu.dst.write = 1; 4512 4513 alu.src[0].sel = tmp0; 4514 alu.src[0].chan = 3; 4515 if (signed_op) { 4516 alu.src[1].sel = tmp2; 4517 alu.src[1].chan = 1; 4518 } else { 4519 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4520 } 4521 4522 alu.last = 1; 4523 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4524 return r; 4525 4526 } else { /* UDIV */ 4527 4528 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 4529 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4530 alu.op = ALU_OP2_ADD_INT; 4531 4532 alu.dst.sel = tmp1; 4533 alu.dst.chan = 2; 4534 alu.dst.write = 1; 4535 4536 alu.src[0].sel = tmp0; 4537 alu.src[0].chan = 2; 4538 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 4539 4540 alu.last = 1; 4541 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4542 return r; 4543 4544 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 4545 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4546 alu.op = ALU_OP2_ADD_INT; 4547 4548 alu.dst.sel = tmp1; 4549 alu.dst.chan = 3; 4550 alu.dst.write = 1; 4551 4552 alu.src[0].sel = tmp0; 4553 alu.src[0].chan = 2; 4554 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 4555 4556 alu.last = 1; 4557 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4558 return r; 4559 4560 } 4561 4562 /* 17. tmp1.x = tmp1.x & tmp1.y */ 4563 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4564 alu.op = ALU_OP2_AND_INT; 4565 4566 alu.dst.sel = tmp1; 4567 alu.dst.chan = 0; 4568 alu.dst.write = 1; 4569 4570 alu.src[0].sel = tmp1; 4571 alu.src[0].chan = 0; 4572 alu.src[1].sel = tmp1; 4573 alu.src[1].chan = 1; 4574 4575 alu.last = 1; 4576 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4577 return r; 4578 4579 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 4580 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 4581 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4582 alu.op = ALU_OP3_CNDE_INT; 4583 alu.is_op3 = 1; 4584 4585 alu.dst.sel = tmp0; 4586 alu.dst.chan = 2; 4587 alu.dst.write = 1; 4588 4589 alu.src[0].sel = tmp1; 4590 alu.src[0].chan = 0; 4591 alu.src[1].sel = tmp0; 4592 alu.src[1].chan = mod ? 3 : 2; 4593 alu.src[2].sel = tmp1; 4594 alu.src[2].chan = 2; 4595 4596 alu.last = 1; 4597 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4598 return r; 4599 4600 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 4601 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4602 alu.op = ALU_OP3_CNDE_INT; 4603 alu.is_op3 = 1; 4604 4605 if (signed_op) { 4606 alu.dst.sel = tmp0; 4607 alu.dst.chan = 2; 4608 alu.dst.write = 1; 4609 } else { 4610 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4611 } 4612 4613 alu.src[0].sel = tmp1; 4614 alu.src[0].chan = 1; 4615 alu.src[1].sel = tmp1; 4616 alu.src[1].chan = 3; 4617 alu.src[2].sel = tmp0; 4618 alu.src[2].chan = 2; 4619 4620 alu.last = 1; 4621 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4622 return r; 4623 4624 if (signed_op) { 4625 4626 /* fix the sign of the result */ 4627 4628 if (mod) { 4629 4630 /* tmp0.x = -tmp0.z */ 4631 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4632 alu.op = ALU_OP2_SUB_INT; 4633 4634 alu.dst.sel = tmp0; 4635 alu.dst.chan = 0; 4636 alu.dst.write = 1; 4637 4638 alu.src[0].sel = V_SQ_ALU_SRC_0; 4639 alu.src[1].sel = tmp0; 4640 alu.src[1].chan = 2; 4641 4642 alu.last = 1; 4643 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4644 return r; 4645 4646 /* sign of the remainder is the same as the sign of src0 */ 4647 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 4648 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4649 alu.op = ALU_OP3_CNDGE_INT; 4650 alu.is_op3 = 1; 4651 4652 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4653 4654 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4655 alu.src[1].sel = tmp0; 4656 alu.src[1].chan = 2; 4657 alu.src[2].sel = tmp0; 4658 alu.src[2].chan = 0; 4659 4660 alu.last = 1; 4661 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4662 return r; 4663 4664 } else { 4665 4666 /* tmp0.x = -tmp0.z */ 4667 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4668 alu.op = ALU_OP2_SUB_INT; 4669 4670 alu.dst.sel = tmp0; 4671 alu.dst.chan = 0; 4672 alu.dst.write = 1; 4673 4674 alu.src[0].sel = V_SQ_ALU_SRC_0; 4675 alu.src[1].sel = tmp0; 4676 alu.src[1].chan = 2; 4677 4678 alu.last = 1; 4679 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4680 return r; 4681 4682 /* fix the quotient sign (same as the sign of src0*src1) */ 4683 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 4684 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4685 alu.op = ALU_OP3_CNDGE_INT; 4686 alu.is_op3 = 1; 4687 4688 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4689 4690 alu.src[0].sel = tmp2; 4691 alu.src[0].chan = 2; 4692 alu.src[1].sel = tmp0; 4693 alu.src[1].chan = 2; 4694 alu.src[2].sel = tmp0; 4695 alu.src[2].chan = 0; 4696 4697 alu.last = 1; 4698 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4699 return r; 4700 } 4701 } 4702 } 4703 return 0; 4704} 4705 4706static int tgsi_udiv(struct r600_shader_ctx *ctx) 4707{ 4708 return tgsi_divmod(ctx, 0, 0); 4709} 4710 4711static int tgsi_umod(struct r600_shader_ctx *ctx) 4712{ 4713 return tgsi_divmod(ctx, 1, 0); 4714} 4715 4716static int tgsi_idiv(struct r600_shader_ctx *ctx) 4717{ 4718 return tgsi_divmod(ctx, 0, 1); 4719} 4720 4721static int tgsi_imod(struct r600_shader_ctx *ctx) 4722{ 4723 return tgsi_divmod(ctx, 1, 1); 4724} 4725 4726 4727static int tgsi_f2i(struct r600_shader_ctx *ctx) 4728{ 4729 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4730 struct r600_bytecode_alu alu; 4731 int i, r; 4732 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4733 int last_inst = tgsi_last_instruction(write_mask); 4734 4735 for (i = 0; i < 4; i++) { 4736 if (!(write_mask & (1<<i))) 4737 continue; 4738 4739 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4740 alu.op = ALU_OP1_TRUNC; 4741 4742 alu.dst.sel = ctx->temp_reg; 4743 alu.dst.chan = i; 4744 alu.dst.write = 1; 4745 4746 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4747 if (i == last_inst) 4748 alu.last = 1; 4749 r = r600_bytecode_add_alu(ctx->bc, &alu); 4750 if (r) 4751 return r; 4752 } 4753 4754 for (i = 0; i < 4; i++) { 4755 if (!(write_mask & (1<<i))) 4756 continue; 4757 4758 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4759 alu.op = ctx->inst_info->op; 4760 4761 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4762 4763 alu.src[0].sel = ctx->temp_reg; 4764 alu.src[0].chan = i; 4765 4766 if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT) 4767 alu.last = 1; 4768 r = r600_bytecode_add_alu(ctx->bc, &alu); 4769 if (r) 4770 return r; 4771 } 4772 4773 return 0; 4774} 4775 4776static int tgsi_iabs(struct r600_shader_ctx *ctx) 4777{ 4778 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4779 struct r600_bytecode_alu alu; 4780 int i, r; 4781 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4782 int last_inst = tgsi_last_instruction(write_mask); 4783 4784 /* tmp = -src */ 4785 for (i = 0; i < 4; i++) { 4786 if (!(write_mask & (1<<i))) 4787 continue; 4788 4789 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4790 alu.op = ALU_OP2_SUB_INT; 4791 4792 alu.dst.sel = ctx->temp_reg; 4793 alu.dst.chan = i; 4794 alu.dst.write = 1; 4795 4796 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4797 alu.src[0].sel = V_SQ_ALU_SRC_0; 4798 4799 if (i == last_inst) 4800 alu.last = 1; 4801 r = r600_bytecode_add_alu(ctx->bc, &alu); 4802 if (r) 4803 return r; 4804 } 4805 4806 /* dst = (src >= 0 ? src : tmp) */ 4807 for (i = 0; i < 4; i++) { 4808 if (!(write_mask & (1<<i))) 4809 continue; 4810 4811 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4812 alu.op = ALU_OP3_CNDGE_INT; 4813 alu.is_op3 = 1; 4814 alu.dst.write = 1; 4815 4816 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4817 4818 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4819 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4820 alu.src[2].sel = ctx->temp_reg; 4821 alu.src[2].chan = i; 4822 4823 if (i == last_inst) 4824 alu.last = 1; 4825 r = r600_bytecode_add_alu(ctx->bc, &alu); 4826 if (r) 4827 return r; 4828 } 4829 return 0; 4830} 4831 4832static int tgsi_issg(struct r600_shader_ctx *ctx) 4833{ 4834 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4835 struct r600_bytecode_alu alu; 4836 int i, r; 4837 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4838 int last_inst = tgsi_last_instruction(write_mask); 4839 4840 /* tmp = (src >= 0 ? src : -1) */ 4841 for (i = 0; i < 4; i++) { 4842 if (!(write_mask & (1<<i))) 4843 continue; 4844 4845 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4846 alu.op = ALU_OP3_CNDGE_INT; 4847 alu.is_op3 = 1; 4848 4849 alu.dst.sel = ctx->temp_reg; 4850 alu.dst.chan = i; 4851 alu.dst.write = 1; 4852 4853 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4854 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4855 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 4856 4857 if (i == last_inst) 4858 alu.last = 1; 4859 r = r600_bytecode_add_alu(ctx->bc, &alu); 4860 if (r) 4861 return r; 4862 } 4863 4864 /* dst = (tmp > 0 ? 1 : tmp) */ 4865 for (i = 0; i < 4; i++) { 4866 if (!(write_mask & (1<<i))) 4867 continue; 4868 4869 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4870 alu.op = ALU_OP3_CNDGT_INT; 4871 alu.is_op3 = 1; 4872 alu.dst.write = 1; 4873 4874 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4875 4876 alu.src[0].sel = ctx->temp_reg; 4877 alu.src[0].chan = i; 4878 4879 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 4880 4881 alu.src[2].sel = ctx->temp_reg; 4882 alu.src[2].chan = i; 4883 4884 if (i == last_inst) 4885 alu.last = 1; 4886 r = r600_bytecode_add_alu(ctx->bc, &alu); 4887 if (r) 4888 return r; 4889 } 4890 return 0; 4891} 4892 4893 4894 4895static int tgsi_ssg(struct r600_shader_ctx *ctx) 4896{ 4897 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4898 struct r600_bytecode_alu alu; 4899 int i, r; 4900 4901 /* tmp = (src > 0 ? 1 : src) */ 4902 for (i = 0; i < 4; i++) { 4903 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4904 alu.op = ALU_OP3_CNDGT; 4905 alu.is_op3 = 1; 4906 4907 alu.dst.sel = ctx->temp_reg; 4908 alu.dst.chan = i; 4909 4910 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4911 alu.src[1].sel = V_SQ_ALU_SRC_1; 4912 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 4913 4914 if (i == 3) 4915 alu.last = 1; 4916 r = r600_bytecode_add_alu(ctx->bc, &alu); 4917 if (r) 4918 return r; 4919 } 4920 4921 /* dst = (-tmp > 0 ? -1 : tmp) */ 4922 for (i = 0; i < 4; i++) { 4923 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4924 alu.op = ALU_OP3_CNDGT; 4925 alu.is_op3 = 1; 4926 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4927 4928 alu.src[0].sel = ctx->temp_reg; 4929 alu.src[0].chan = i; 4930 alu.src[0].neg = 1; 4931 4932 alu.src[1].sel = V_SQ_ALU_SRC_1; 4933 alu.src[1].neg = 1; 4934 4935 alu.src[2].sel = ctx->temp_reg; 4936 alu.src[2].chan = i; 4937 4938 if (i == 3) 4939 alu.last = 1; 4940 r = r600_bytecode_add_alu(ctx->bc, &alu); 4941 if (r) 4942 return r; 4943 } 4944 return 0; 4945} 4946 4947static int tgsi_bfi(struct r600_shader_ctx *ctx) 4948{ 4949 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4950 struct r600_bytecode_alu alu; 4951 int i, r, t1, t2; 4952 4953 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4954 int last_inst = tgsi_last_instruction(write_mask); 4955 4956 t1 = ctx->temp_reg; 4957 4958 for (i = 0; i < 4; i++) { 4959 if (!(write_mask & (1<<i))) 4960 continue; 4961 4962 /* create mask tmp */ 4963 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4964 alu.op = ALU_OP2_BFM_INT; 4965 alu.dst.sel = t1; 4966 alu.dst.chan = i; 4967 alu.dst.write = 1; 4968 alu.last = i == last_inst; 4969 4970 r600_bytecode_src(&alu.src[0], &ctx->src[3], i); 4971 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 4972 4973 r = r600_bytecode_add_alu(ctx->bc, &alu); 4974 if (r) 4975 return r; 4976 } 4977 4978 t2 = r600_get_temp(ctx); 4979 4980 for (i = 0; i < 4; i++) { 4981 if (!(write_mask & (1<<i))) 4982 continue; 4983 4984 /* shift insert left */ 4985 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4986 alu.op = ALU_OP2_LSHL_INT; 4987 alu.dst.sel = t2; 4988 alu.dst.chan = i; 4989 alu.dst.write = 1; 4990 alu.last = i == last_inst; 4991 4992 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4993 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 4994 4995 r = r600_bytecode_add_alu(ctx->bc, &alu); 4996 if (r) 4997 return r; 4998 } 4999 5000 for (i = 0; i < 4; i++) { 5001 if (!(write_mask & (1<<i))) 5002 continue; 5003 5004 /* actual bitfield insert */ 5005 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5006 alu.op = ALU_OP3_BFI_INT; 5007 alu.is_op3 = 1; 5008 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5009 alu.dst.chan = i; 5010 alu.dst.write = 1; 5011 alu.last = i == last_inst; 5012 5013 alu.src[0].sel = t1; 5014 alu.src[0].chan = i; 5015 alu.src[1].sel = t2; 5016 alu.src[1].chan = i; 5017 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 5018 5019 r = r600_bytecode_add_alu(ctx->bc, &alu); 5020 if (r) 5021 return r; 5022 } 5023 5024 return 0; 5025} 5026 5027static int tgsi_msb(struct r600_shader_ctx *ctx) 5028{ 5029 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5030 struct r600_bytecode_alu alu; 5031 int i, r, t1, t2; 5032 5033 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5034 int last_inst = tgsi_last_instruction(write_mask); 5035 5036 assert(ctx->inst_info->op == ALU_OP1_FFBH_INT || 5037 ctx->inst_info->op == ALU_OP1_FFBH_UINT); 5038 5039 t1 = ctx->temp_reg; 5040 5041 /* bit position is indexed from lsb by TGSI, and from msb by the hardware */ 5042 for (i = 0; i < 4; i++) { 5043 if (!(write_mask & (1<<i))) 5044 continue; 5045 5046 /* t1 = FFBH_INT / FFBH_UINT */ 5047 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5048 alu.op = ctx->inst_info->op; 5049 alu.dst.sel = t1; 5050 alu.dst.chan = i; 5051 alu.dst.write = 1; 5052 alu.last = i == last_inst; 5053 5054 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5055 5056 r = r600_bytecode_add_alu(ctx->bc, &alu); 5057 if (r) 5058 return r; 5059 } 5060 5061 t2 = r600_get_temp(ctx); 5062 5063 for (i = 0; i < 4; i++) { 5064 if (!(write_mask & (1<<i))) 5065 continue; 5066 5067 /* t2 = 31 - t1 */ 5068 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5069 alu.op = ALU_OP2_SUB_INT; 5070 alu.dst.sel = t2; 5071 alu.dst.chan = i; 5072 alu.dst.write = 1; 5073 alu.last = i == last_inst; 5074 5075 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 5076 alu.src[0].value = 31; 5077 alu.src[1].sel = t1; 5078 alu.src[1].chan = i; 5079 5080 r = r600_bytecode_add_alu(ctx->bc, &alu); 5081 if (r) 5082 return r; 5083 } 5084 5085 for (i = 0; i < 4; i++) { 5086 if (!(write_mask & (1<<i))) 5087 continue; 5088 5089 /* result = t1 >= 0 ? t2 : t1 */ 5090 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5091 alu.op = ALU_OP3_CNDGE_INT; 5092 alu.is_op3 = 1; 5093 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5094 alu.dst.chan = i; 5095 alu.dst.write = 1; 5096 alu.last = i == last_inst; 5097 5098 alu.src[0].sel = t1; 5099 alu.src[0].chan = i; 5100 alu.src[1].sel = t2; 5101 alu.src[1].chan = i; 5102 alu.src[2].sel = t1; 5103 alu.src[2].chan = i; 5104 5105 r = r600_bytecode_add_alu(ctx->bc, &alu); 5106 if (r) 5107 return r; 5108 } 5109 5110 return 0; 5111} 5112 5113static int tgsi_interp_egcm(struct r600_shader_ctx *ctx) 5114{ 5115 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5116 struct r600_bytecode_alu alu; 5117 int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti; 5118 unsigned location; 5119 int input; 5120 5121 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); 5122 5123 input = inst->Src[0].Register.Index; 5124 5125 /* Interpolators have been marked for use already by allocate_system_value_inputs */ 5126 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 5127 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5128 location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */ 5129 } 5130 else { 5131 location = TGSI_INTERPOLATE_LOC_CENTROID; 5132 } 5133 5134 k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location); 5135 if (k < 0) 5136 k = 0; 5137 interp_gpr = ctx->eg_interpolators[k].ij_index / 2; 5138 interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2); 5139 5140 /* NOTE: currently offset is not perspective correct */ 5141 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 5142 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5143 int sample_gpr = -1; 5144 int gradientsH, gradientsV; 5145 struct r600_bytecode_tex tex; 5146 5147 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5148 sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]); 5149 } 5150 5151 gradientsH = r600_get_temp(ctx); 5152 gradientsV = r600_get_temp(ctx); 5153 for (i = 0; i < 2; i++) { 5154 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 5155 tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V; 5156 tex.src_gpr = interp_gpr; 5157 tex.src_sel_x = interp_base_chan + 0; 5158 tex.src_sel_y = interp_base_chan + 1; 5159 tex.src_sel_z = 0; 5160 tex.src_sel_w = 0; 5161 tex.dst_gpr = i == 0 ? gradientsH : gradientsV; 5162 tex.dst_sel_x = 0; 5163 tex.dst_sel_y = 1; 5164 tex.dst_sel_z = 7; 5165 tex.dst_sel_w = 7; 5166 tex.inst_mod = 1; // Use per pixel gradient calculation 5167 tex.sampler_id = 0; 5168 tex.resource_id = tex.sampler_id; 5169 r = r600_bytecode_add_tex(ctx->bc, &tex); 5170 if (r) 5171 return r; 5172 } 5173 5174 for (i = 0; i < 2; i++) { 5175 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5176 alu.op = ALU_OP3_MULADD; 5177 alu.is_op3 = 1; 5178 alu.src[0].sel = gradientsH; 5179 alu.src[0].chan = i; 5180 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5181 alu.src[1].sel = sample_gpr; 5182 alu.src[1].chan = 2; 5183 } 5184 else { 5185 r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 5186 } 5187 alu.src[2].sel = interp_gpr; 5188 alu.src[2].chan = interp_base_chan + i; 5189 alu.dst.sel = ctx->temp_reg; 5190 alu.dst.chan = i; 5191 alu.last = i == 1; 5192 5193 r = r600_bytecode_add_alu(ctx->bc, &alu); 5194 if (r) 5195 return r; 5196 } 5197 5198 for (i = 0; i < 2; i++) { 5199 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5200 alu.op = ALU_OP3_MULADD; 5201 alu.is_op3 = 1; 5202 alu.src[0].sel = gradientsV; 5203 alu.src[0].chan = i; 5204 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5205 alu.src[1].sel = sample_gpr; 5206 alu.src[1].chan = 3; 5207 } 5208 else { 5209 r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 5210 } 5211 alu.src[2].sel = ctx->temp_reg; 5212 alu.src[2].chan = i; 5213 alu.dst.sel = ctx->temp_reg; 5214 alu.dst.chan = i; 5215 alu.last = i == 1; 5216 5217 r = r600_bytecode_add_alu(ctx->bc, &alu); 5218 if (r) 5219 return r; 5220 } 5221 } 5222 5223 tmp = r600_get_temp(ctx); 5224 for (i = 0; i < 8; i++) { 5225 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5226 alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY; 5227 5228 alu.dst.sel = tmp; 5229 if ((i > 1 && i < 6)) { 5230 alu.dst.write = 1; 5231 } 5232 else { 5233 alu.dst.write = 0; 5234 } 5235 alu.dst.chan = i % 4; 5236 5237 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 5238 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5239 alu.src[0].sel = ctx->temp_reg; 5240 alu.src[0].chan = 1 - (i % 2); 5241 } else { 5242 alu.src[0].sel = interp_gpr; 5243 alu.src[0].chan = interp_base_chan + 1 - (i % 2); 5244 } 5245 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 5246 alu.src[1].chan = 0; 5247 5248 alu.last = i % 4 == 3; 5249 alu.bank_swizzle_force = SQ_ALU_VEC_210; 5250 5251 r = r600_bytecode_add_alu(ctx->bc, &alu); 5252 if (r) 5253 return r; 5254 } 5255 5256 // INTERP can't swizzle dst 5257 lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5258 for (i = 0; i <= lasti; i++) { 5259 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5260 continue; 5261 5262 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5263 alu.op = ALU_OP1_MOV; 5264 alu.src[0].sel = tmp; 5265 alu.src[0].chan = ctx->src[0].swizzle[i]; 5266 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5267 alu.dst.write = 1; 5268 alu.last = i == lasti; 5269 r = r600_bytecode_add_alu(ctx->bc, &alu); 5270 if (r) 5271 return r; 5272 } 5273 5274 return 0; 5275} 5276 5277 5278static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 5279{ 5280 struct r600_bytecode_alu alu; 5281 int i, r; 5282 5283 for (i = 0; i < 4; i++) { 5284 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5285 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 5286 alu.op = ALU_OP0_NOP; 5287 alu.dst.chan = i; 5288 } else { 5289 alu.op = ALU_OP1_MOV; 5290 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5291 alu.src[0].sel = ctx->temp_reg; 5292 alu.src[0].chan = i; 5293 } 5294 if (i == 3) { 5295 alu.last = 1; 5296 } 5297 r = r600_bytecode_add_alu(ctx->bc, &alu); 5298 if (r) 5299 return r; 5300 } 5301 return 0; 5302} 5303 5304static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, 5305 unsigned temp, int chan, 5306 struct r600_bytecode_alu_src *bc_src, 5307 const struct r600_shader_src *shader_src) 5308{ 5309 struct r600_bytecode_alu alu; 5310 int r; 5311 5312 r600_bytecode_src(bc_src, shader_src, chan); 5313 5314 /* op3 operands don't support abs modifier */ 5315 if (bc_src->abs) { 5316 assert(temp!=0); /* we actually need the extra register, make sure it is allocated. */ 5317 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5318 alu.op = ALU_OP1_MOV; 5319 alu.dst.sel = temp; 5320 alu.dst.chan = chan; 5321 alu.dst.write = 1; 5322 5323 alu.src[0] = *bc_src; 5324 alu.last = true; // sufficient? 5325 r = r600_bytecode_add_alu(ctx->bc, &alu); 5326 if (r) 5327 return r; 5328 5329 memset(bc_src, 0, sizeof(*bc_src)); 5330 bc_src->sel = temp; 5331 bc_src->chan = chan; 5332 } 5333 return 0; 5334} 5335 5336static int tgsi_op3(struct r600_shader_ctx *ctx) 5337{ 5338 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5339 struct r600_bytecode_alu alu; 5340 int i, j, r; 5341 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5342 int temp_regs[4]; 5343 5344 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 5345 temp_regs[j] = 0; 5346 if (ctx->src[j].abs) 5347 temp_regs[j] = r600_get_temp(ctx); 5348 } 5349 for (i = 0; i < lasti + 1; i++) { 5350 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5351 continue; 5352 5353 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5354 alu.op = ctx->inst_info->op; 5355 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 5356 r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]); 5357 if (r) 5358 return r; 5359 } 5360 5361 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5362 alu.dst.chan = i; 5363 alu.dst.write = 1; 5364 alu.is_op3 = 1; 5365 if (i == lasti) { 5366 alu.last = 1; 5367 } 5368 r = r600_bytecode_add_alu(ctx->bc, &alu); 5369 if (r) 5370 return r; 5371 } 5372 return 0; 5373} 5374 5375static int tgsi_dp(struct r600_shader_ctx *ctx) 5376{ 5377 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5378 struct r600_bytecode_alu alu; 5379 int i, j, r; 5380 5381 for (i = 0; i < 4; i++) { 5382 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5383 alu.op = ctx->inst_info->op; 5384 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 5385 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 5386 } 5387 5388 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5389 alu.dst.chan = i; 5390 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 5391 /* handle some special cases */ 5392 switch (inst->Instruction.Opcode) { 5393 case TGSI_OPCODE_DP2: 5394 if (i > 1) { 5395 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 5396 alu.src[0].chan = alu.src[1].chan = 0; 5397 } 5398 break; 5399 case TGSI_OPCODE_DP3: 5400 if (i > 2) { 5401 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 5402 alu.src[0].chan = alu.src[1].chan = 0; 5403 } 5404 break; 5405 case TGSI_OPCODE_DPH: 5406 if (i == 3) { 5407 alu.src[0].sel = V_SQ_ALU_SRC_1; 5408 alu.src[0].chan = 0; 5409 alu.src[0].neg = 0; 5410 } 5411 break; 5412 default: 5413 break; 5414 } 5415 if (i == 3) { 5416 alu.last = 1; 5417 } 5418 r = r600_bytecode_add_alu(ctx->bc, &alu); 5419 if (r) 5420 return r; 5421 } 5422 return 0; 5423} 5424 5425static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 5426 unsigned index) 5427{ 5428 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5429 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 5430 inst->Src[index].Register.File != TGSI_FILE_INPUT && 5431 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 5432 ctx->src[index].neg || ctx->src[index].abs || 5433 (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == TGSI_PROCESSOR_GEOMETRY); 5434} 5435 5436static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 5437 unsigned index) 5438{ 5439 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5440 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 5441} 5442 5443static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) 5444{ 5445 struct r600_bytecode_vtx vtx; 5446 struct r600_bytecode_alu alu; 5447 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5448 int src_gpr, r, i; 5449 int id = tgsi_tex_get_src_gpr(ctx, 1); 5450 5451 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 5452 if (src_requires_loading) { 5453 for (i = 0; i < 4; i++) { 5454 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5455 alu.op = ALU_OP1_MOV; 5456 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5457 alu.dst.sel = ctx->temp_reg; 5458 alu.dst.chan = i; 5459 if (i == 3) 5460 alu.last = 1; 5461 alu.dst.write = 1; 5462 r = r600_bytecode_add_alu(ctx->bc, &alu); 5463 if (r) 5464 return r; 5465 } 5466 src_gpr = ctx->temp_reg; 5467 } 5468 5469 memset(&vtx, 0, sizeof(vtx)); 5470 vtx.op = FETCH_OP_VFETCH; 5471 vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; 5472 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 5473 vtx.src_gpr = src_gpr; 5474 vtx.mega_fetch_count = 16; 5475 vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 5476 vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 5477 vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 5478 vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 5479 vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 5480 vtx.use_const_fields = 1; 5481 5482 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 5483 return r; 5484 5485 if (ctx->bc->chip_class >= EVERGREEN) 5486 return 0; 5487 5488 for (i = 0; i < 4; i++) { 5489 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5490 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5491 continue; 5492 5493 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5494 alu.op = ALU_OP2_AND_INT; 5495 5496 alu.dst.chan = i; 5497 alu.dst.sel = vtx.dst_gpr; 5498 alu.dst.write = 1; 5499 5500 alu.src[0].sel = vtx.dst_gpr; 5501 alu.src[0].chan = i; 5502 5503 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL; 5504 alu.src[1].sel += (id * 2); 5505 alu.src[1].chan = i % 4; 5506 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 5507 5508 if (i == lasti) 5509 alu.last = 1; 5510 r = r600_bytecode_add_alu(ctx->bc, &alu); 5511 if (r) 5512 return r; 5513 } 5514 5515 if (inst->Dst[0].Register.WriteMask & 3) { 5516 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5517 alu.op = ALU_OP2_OR_INT; 5518 5519 alu.dst.chan = 3; 5520 alu.dst.sel = vtx.dst_gpr; 5521 alu.dst.write = 1; 5522 5523 alu.src[0].sel = vtx.dst_gpr; 5524 alu.src[0].chan = 3; 5525 5526 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1; 5527 alu.src[1].chan = 0; 5528 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 5529 5530 alu.last = 1; 5531 r = r600_bytecode_add_alu(ctx->bc, &alu); 5532 if (r) 5533 return r; 5534 } 5535 return 0; 5536} 5537 5538static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) 5539{ 5540 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5541 struct r600_bytecode_alu alu; 5542 int r; 5543 int id = tgsi_tex_get_src_gpr(ctx, 1); 5544 5545 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5546 alu.op = ALU_OP1_MOV; 5547 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 5548 if (ctx->bc->chip_class >= EVERGREEN) { 5549 /* channel 0 or 2 of each word */ 5550 alu.src[0].sel += (id / 2); 5551 alu.src[0].chan = (id % 2) * 2; 5552 } else { 5553 /* r600 we have them at channel 2 of the second dword */ 5554 alu.src[0].sel += (id * 2) + 1; 5555 alu.src[0].chan = 1; 5556 } 5557 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 5558 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 5559 alu.last = 1; 5560 r = r600_bytecode_add_alu(ctx->bc, &alu); 5561 if (r) 5562 return r; 5563 return 0; 5564} 5565 5566static int tgsi_tex(struct r600_shader_ctx *ctx) 5567{ 5568 static float one_point_five = 1.5f; 5569 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5570 struct r600_bytecode_tex tex; 5571 struct r600_bytecode_alu alu; 5572 unsigned src_gpr; 5573 int r, i, j; 5574 int opcode; 5575 bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing && 5576 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 5577 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || 5578 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); 5579 5580 bool txf_add_offsets = inst->Texture.NumOffsets && 5581 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 5582 inst->Texture.Texture != TGSI_TEXTURE_BUFFER; 5583 5584 /* Texture fetch instructions can only use gprs as source. 5585 * Also they cannot negate the source or take the absolute value */ 5586 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && 5587 tgsi_tex_src_requires_loading(ctx, 0)) || 5588 read_compressed_msaa || txf_add_offsets; 5589 5590 boolean src_loaded = FALSE; 5591 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; 5592 int8_t offset_x = 0, offset_y = 0, offset_z = 0; 5593 boolean has_txq_cube_array_z = false; 5594 unsigned sampler_index_mode; 5595 5596 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && 5597 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 5598 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) 5599 if (inst->Dst[0].Register.WriteMask & 4) { 5600 ctx->shader->has_txq_cube_array_z_comp = true; 5601 has_txq_cube_array_z = true; 5602 } 5603 5604 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 5605 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 5606 inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || 5607 inst->Instruction.Opcode == TGSI_OPCODE_TG4) 5608 sampler_src_reg = 2; 5609 5610 /* TGSI moves the sampler to src reg 3 for TXD */ 5611 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) 5612 sampler_src_reg = 3; 5613 5614 sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 5615 if (sampler_index_mode) 5616 ctx->shader->uses_index_registers = true; 5617 5618 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 5619 5620 if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { 5621 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { 5622 ctx->shader->uses_tex_buffers = true; 5623 return r600_do_buffer_txq(ctx); 5624 } 5625 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 5626 if (ctx->bc->chip_class < EVERGREEN) 5627 ctx->shader->uses_tex_buffers = true; 5628 return do_vtx_fetch_inst(ctx, src_requires_loading); 5629 } 5630 } 5631 5632 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 5633 int out_chan; 5634 /* Add perspective divide */ 5635 if (ctx->bc->chip_class == CAYMAN) { 5636 out_chan = 2; 5637 for (i = 0; i < 3; i++) { 5638 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5639 alu.op = ALU_OP1_RECIP_IEEE; 5640 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 5641 5642 alu.dst.sel = ctx->temp_reg; 5643 alu.dst.chan = i; 5644 if (i == 2) 5645 alu.last = 1; 5646 if (out_chan == i) 5647 alu.dst.write = 1; 5648 r = r600_bytecode_add_alu(ctx->bc, &alu); 5649 if (r) 5650 return r; 5651 } 5652 5653 } else { 5654 out_chan = 3; 5655 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5656 alu.op = ALU_OP1_RECIP_IEEE; 5657 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 5658 5659 alu.dst.sel = ctx->temp_reg; 5660 alu.dst.chan = out_chan; 5661 alu.last = 1; 5662 alu.dst.write = 1; 5663 r = r600_bytecode_add_alu(ctx->bc, &alu); 5664 if (r) 5665 return r; 5666 } 5667 5668 for (i = 0; i < 3; i++) { 5669 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5670 alu.op = ALU_OP2_MUL; 5671 alu.src[0].sel = ctx->temp_reg; 5672 alu.src[0].chan = out_chan; 5673 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5674 alu.dst.sel = ctx->temp_reg; 5675 alu.dst.chan = i; 5676 alu.dst.write = 1; 5677 r = r600_bytecode_add_alu(ctx->bc, &alu); 5678 if (r) 5679 return r; 5680 } 5681 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5682 alu.op = ALU_OP1_MOV; 5683 alu.src[0].sel = V_SQ_ALU_SRC_1; 5684 alu.src[0].chan = 0; 5685 alu.dst.sel = ctx->temp_reg; 5686 alu.dst.chan = 3; 5687 alu.last = 1; 5688 alu.dst.write = 1; 5689 r = r600_bytecode_add_alu(ctx->bc, &alu); 5690 if (r) 5691 return r; 5692 src_loaded = TRUE; 5693 src_gpr = ctx->temp_reg; 5694 } 5695 5696 5697 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 5698 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 5699 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 5700 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 5701 inst->Instruction.Opcode != TGSI_OPCODE_TXQ && 5702 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { 5703 5704 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 5705 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 5706 5707 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 5708 for (i = 0; i < 4; i++) { 5709 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5710 alu.op = ALU_OP2_CUBE; 5711 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 5712 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 5713 alu.dst.sel = ctx->temp_reg; 5714 alu.dst.chan = i; 5715 if (i == 3) 5716 alu.last = 1; 5717 alu.dst.write = 1; 5718 r = r600_bytecode_add_alu(ctx->bc, &alu); 5719 if (r) 5720 return r; 5721 } 5722 5723 /* tmp1.z = RCP_e(|tmp1.z|) */ 5724 if (ctx->bc->chip_class == CAYMAN) { 5725 for (i = 0; i < 3; i++) { 5726 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5727 alu.op = ALU_OP1_RECIP_IEEE; 5728 alu.src[0].sel = ctx->temp_reg; 5729 alu.src[0].chan = 2; 5730 alu.src[0].abs = 1; 5731 alu.dst.sel = ctx->temp_reg; 5732 alu.dst.chan = i; 5733 if (i == 2) 5734 alu.dst.write = 1; 5735 if (i == 2) 5736 alu.last = 1; 5737 r = r600_bytecode_add_alu(ctx->bc, &alu); 5738 if (r) 5739 return r; 5740 } 5741 } else { 5742 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5743 alu.op = ALU_OP1_RECIP_IEEE; 5744 alu.src[0].sel = ctx->temp_reg; 5745 alu.src[0].chan = 2; 5746 alu.src[0].abs = 1; 5747 alu.dst.sel = ctx->temp_reg; 5748 alu.dst.chan = 2; 5749 alu.dst.write = 1; 5750 alu.last = 1; 5751 r = r600_bytecode_add_alu(ctx->bc, &alu); 5752 if (r) 5753 return r; 5754 } 5755 5756 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 5757 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 5758 * muladd has no writemask, have to use another temp 5759 */ 5760 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5761 alu.op = ALU_OP3_MULADD; 5762 alu.is_op3 = 1; 5763 5764 alu.src[0].sel = ctx->temp_reg; 5765 alu.src[0].chan = 0; 5766 alu.src[1].sel = ctx->temp_reg; 5767 alu.src[1].chan = 2; 5768 5769 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 5770 alu.src[2].chan = 0; 5771 alu.src[2].value = *(uint32_t *)&one_point_five; 5772 5773 alu.dst.sel = ctx->temp_reg; 5774 alu.dst.chan = 0; 5775 alu.dst.write = 1; 5776 5777 r = r600_bytecode_add_alu(ctx->bc, &alu); 5778 if (r) 5779 return r; 5780 5781 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5782 alu.op = ALU_OP3_MULADD; 5783 alu.is_op3 = 1; 5784 5785 alu.src[0].sel = ctx->temp_reg; 5786 alu.src[0].chan = 1; 5787 alu.src[1].sel = ctx->temp_reg; 5788 alu.src[1].chan = 2; 5789 5790 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 5791 alu.src[2].chan = 0; 5792 alu.src[2].value = *(uint32_t *)&one_point_five; 5793 5794 alu.dst.sel = ctx->temp_reg; 5795 alu.dst.chan = 1; 5796 alu.dst.write = 1; 5797 5798 alu.last = 1; 5799 r = r600_bytecode_add_alu(ctx->bc, &alu); 5800 if (r) 5801 return r; 5802 /* write initial compare value into Z component 5803 - W src 0 for shadow cube 5804 - X src 1 for shadow cube array */ 5805 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 5806 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 5807 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5808 alu.op = ALU_OP1_MOV; 5809 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) 5810 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 5811 else 5812 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 5813 alu.dst.sel = ctx->temp_reg; 5814 alu.dst.chan = 2; 5815 alu.dst.write = 1; 5816 alu.last = 1; 5817 r = r600_bytecode_add_alu(ctx->bc, &alu); 5818 if (r) 5819 return r; 5820 } 5821 5822 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 5823 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 5824 if (ctx->bc->chip_class >= EVERGREEN) { 5825 int mytmp = r600_get_temp(ctx); 5826 static const float eight = 8.0f; 5827 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5828 alu.op = ALU_OP1_MOV; 5829 alu.src[0].sel = ctx->temp_reg; 5830 alu.src[0].chan = 3; 5831 alu.dst.sel = mytmp; 5832 alu.dst.chan = 0; 5833 alu.dst.write = 1; 5834 alu.last = 1; 5835 r = r600_bytecode_add_alu(ctx->bc, &alu); 5836 if (r) 5837 return r; 5838 5839 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ 5840 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5841 alu.op = ALU_OP3_MULADD; 5842 alu.is_op3 = 1; 5843 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 5844 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 5845 alu.src[1].chan = 0; 5846 alu.src[1].value = *(uint32_t *)&eight; 5847 alu.src[2].sel = mytmp; 5848 alu.src[2].chan = 0; 5849 alu.dst.sel = ctx->temp_reg; 5850 alu.dst.chan = 3; 5851 alu.dst.write = 1; 5852 alu.last = 1; 5853 r = r600_bytecode_add_alu(ctx->bc, &alu); 5854 if (r) 5855 return r; 5856 } else if (ctx->bc->chip_class < EVERGREEN) { 5857 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 5858 tex.op = FETCH_OP_SET_CUBEMAP_INDEX; 5859 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 5860 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 5861 tex.src_gpr = r600_get_temp(ctx); 5862 tex.src_sel_x = 0; 5863 tex.src_sel_y = 0; 5864 tex.src_sel_z = 0; 5865 tex.src_sel_w = 0; 5866 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 5867 tex.coord_type_x = 1; 5868 tex.coord_type_y = 1; 5869 tex.coord_type_z = 1; 5870 tex.coord_type_w = 1; 5871 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5872 alu.op = ALU_OP1_MOV; 5873 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 5874 alu.dst.sel = tex.src_gpr; 5875 alu.dst.chan = 0; 5876 alu.last = 1; 5877 alu.dst.write = 1; 5878 r = r600_bytecode_add_alu(ctx->bc, &alu); 5879 if (r) 5880 return r; 5881 5882 r = r600_bytecode_add_tex(ctx->bc, &tex); 5883 if (r) 5884 return r; 5885 } 5886 5887 } 5888 5889 /* for cube forms of lod and bias we need to route things */ 5890 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || 5891 inst->Instruction.Opcode == TGSI_OPCODE_TXL || 5892 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 5893 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { 5894 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5895 alu.op = ALU_OP1_MOV; 5896 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 5897 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 5898 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 5899 else 5900 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 5901 alu.dst.sel = ctx->temp_reg; 5902 alu.dst.chan = 2; 5903 alu.last = 1; 5904 alu.dst.write = 1; 5905 r = r600_bytecode_add_alu(ctx->bc, &alu); 5906 if (r) 5907 return r; 5908 } 5909 5910 src_loaded = TRUE; 5911 src_gpr = ctx->temp_reg; 5912 } 5913 5914 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 5915 int temp_h = 0, temp_v = 0; 5916 int start_val = 0; 5917 5918 /* if we've already loaded the src (i.e. CUBE don't reload it). */ 5919 if (src_loaded == TRUE) 5920 start_val = 1; 5921 else 5922 src_loaded = TRUE; 5923 for (i = start_val; i < 3; i++) { 5924 int treg = r600_get_temp(ctx); 5925 5926 if (i == 0) 5927 src_gpr = treg; 5928 else if (i == 1) 5929 temp_h = treg; 5930 else 5931 temp_v = treg; 5932 5933 for (j = 0; j < 4; j++) { 5934 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5935 alu.op = ALU_OP1_MOV; 5936 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 5937 alu.dst.sel = treg; 5938 alu.dst.chan = j; 5939 if (j == 3) 5940 alu.last = 1; 5941 alu.dst.write = 1; 5942 r = r600_bytecode_add_alu(ctx->bc, &alu); 5943 if (r) 5944 return r; 5945 } 5946 } 5947 for (i = 1; i < 3; i++) { 5948 /* set gradients h/v */ 5949 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 5950 tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H : 5951 FETCH_OP_SET_GRADIENTS_V; 5952 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 5953 tex.sampler_index_mode = sampler_index_mode; 5954 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 5955 tex.resource_index_mode = sampler_index_mode; 5956 5957 tex.src_gpr = (i == 1) ? temp_h : temp_v; 5958 tex.src_sel_x = 0; 5959 tex.src_sel_y = 1; 5960 tex.src_sel_z = 2; 5961 tex.src_sel_w = 3; 5962 5963 tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */ 5964 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 5965 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 5966 tex.coord_type_x = 1; 5967 tex.coord_type_y = 1; 5968 tex.coord_type_z = 1; 5969 tex.coord_type_w = 1; 5970 } 5971 r = r600_bytecode_add_tex(ctx->bc, &tex); 5972 if (r) 5973 return r; 5974 } 5975 } 5976 5977 if (src_requires_loading && !src_loaded) { 5978 for (i = 0; i < 4; i++) { 5979 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5980 alu.op = ALU_OP1_MOV; 5981 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5982 alu.dst.sel = ctx->temp_reg; 5983 alu.dst.chan = i; 5984 if (i == 3) 5985 alu.last = 1; 5986 alu.dst.write = 1; 5987 r = r600_bytecode_add_alu(ctx->bc, &alu); 5988 if (r) 5989 return r; 5990 } 5991 src_loaded = TRUE; 5992 src_gpr = ctx->temp_reg; 5993 } 5994 5995 /* get offset values */ 5996 if (inst->Texture.NumOffsets) { 5997 assert(inst->Texture.NumOffsets == 1); 5998 5999 /* The texture offset feature doesn't work with the TXF instruction 6000 * and must be emulated by adding the offset to the texture coordinates. */ 6001 if (txf_add_offsets) { 6002 const struct tgsi_texture_offset *off = inst->TexOffsets; 6003 6004 switch (inst->Texture.Texture) { 6005 case TGSI_TEXTURE_3D: 6006 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6007 alu.op = ALU_OP2_ADD_INT; 6008 alu.src[0].sel = src_gpr; 6009 alu.src[0].chan = 2; 6010 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6011 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ]; 6012 alu.dst.sel = src_gpr; 6013 alu.dst.chan = 2; 6014 alu.dst.write = 1; 6015 alu.last = 1; 6016 r = r600_bytecode_add_alu(ctx->bc, &alu); 6017 if (r) 6018 return r; 6019 /* fall through */ 6020 6021 case TGSI_TEXTURE_2D: 6022 case TGSI_TEXTURE_SHADOW2D: 6023 case TGSI_TEXTURE_RECT: 6024 case TGSI_TEXTURE_SHADOWRECT: 6025 case TGSI_TEXTURE_2D_ARRAY: 6026 case TGSI_TEXTURE_SHADOW2D_ARRAY: 6027 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6028 alu.op = ALU_OP2_ADD_INT; 6029 alu.src[0].sel = src_gpr; 6030 alu.src[0].chan = 1; 6031 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6032 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY]; 6033 alu.dst.sel = src_gpr; 6034 alu.dst.chan = 1; 6035 alu.dst.write = 1; 6036 alu.last = 1; 6037 r = r600_bytecode_add_alu(ctx->bc, &alu); 6038 if (r) 6039 return r; 6040 /* fall through */ 6041 6042 case TGSI_TEXTURE_1D: 6043 case TGSI_TEXTURE_SHADOW1D: 6044 case TGSI_TEXTURE_1D_ARRAY: 6045 case TGSI_TEXTURE_SHADOW1D_ARRAY: 6046 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6047 alu.op = ALU_OP2_ADD_INT; 6048 alu.src[0].sel = src_gpr; 6049 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6050 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX]; 6051 alu.dst.sel = src_gpr; 6052 alu.dst.write = 1; 6053 alu.last = 1; 6054 r = r600_bytecode_add_alu(ctx->bc, &alu); 6055 if (r) 6056 return r; 6057 break; 6058 /* texture offsets do not apply to other texture targets */ 6059 } 6060 } else { 6061 switch (inst->Texture.Texture) { 6062 case TGSI_TEXTURE_3D: 6063 offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 6064 /* fallthrough */ 6065 case TGSI_TEXTURE_2D: 6066 case TGSI_TEXTURE_SHADOW2D: 6067 case TGSI_TEXTURE_RECT: 6068 case TGSI_TEXTURE_SHADOWRECT: 6069 case TGSI_TEXTURE_2D_ARRAY: 6070 case TGSI_TEXTURE_SHADOW2D_ARRAY: 6071 offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 6072 /* fallthrough */ 6073 case TGSI_TEXTURE_1D: 6074 case TGSI_TEXTURE_SHADOW1D: 6075 case TGSI_TEXTURE_1D_ARRAY: 6076 case TGSI_TEXTURE_SHADOW1D_ARRAY: 6077 offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 6078 } 6079 } 6080 } 6081 6082 /* Obtain the sample index for reading a compressed MSAA color texture. 6083 * To read the FMASK, we use the ldfptr instruction, which tells us 6084 * where the samples are stored. 6085 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, 6086 * which is the identity mapping. Each nibble says which physical sample 6087 * should be fetched to get that sample. 6088 * 6089 * Assume src.z contains the sample index. It should be modified like this: 6090 * src.z = (ldfptr() >> (src.z * 4)) & 0xF; 6091 * Then fetch the texel with src. 6092 */ 6093 if (read_compressed_msaa) { 6094 unsigned sample_chan = 3; 6095 unsigned temp = r600_get_temp(ctx); 6096 assert(src_loaded); 6097 6098 /* temp.w = ldfptr() */ 6099 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6100 tex.op = FETCH_OP_LD; 6101 tex.inst_mod = 1; /* to indicate this is ldfptr */ 6102 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6103 tex.sampler_index_mode = sampler_index_mode; 6104 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6105 tex.resource_index_mode = sampler_index_mode; 6106 tex.src_gpr = src_gpr; 6107 tex.dst_gpr = temp; 6108 tex.dst_sel_x = 7; /* mask out these components */ 6109 tex.dst_sel_y = 7; 6110 tex.dst_sel_z = 7; 6111 tex.dst_sel_w = 0; /* store X */ 6112 tex.src_sel_x = 0; 6113 tex.src_sel_y = 1; 6114 tex.src_sel_z = 2; 6115 tex.src_sel_w = 3; 6116 tex.offset_x = offset_x; 6117 tex.offset_y = offset_y; 6118 tex.offset_z = offset_z; 6119 r = r600_bytecode_add_tex(ctx->bc, &tex); 6120 if (r) 6121 return r; 6122 6123 /* temp.x = sample_index*4 */ 6124 if (ctx->bc->chip_class == CAYMAN) { 6125 for (i = 0 ; i < 4; i++) { 6126 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6127 alu.op = ALU_OP2_MULLO_INT; 6128 alu.src[0].sel = src_gpr; 6129 alu.src[0].chan = sample_chan; 6130 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6131 alu.src[1].value = 4; 6132 alu.dst.sel = temp; 6133 alu.dst.chan = i; 6134 alu.dst.write = i == 0; 6135 if (i == 3) 6136 alu.last = 1; 6137 r = r600_bytecode_add_alu(ctx->bc, &alu); 6138 if (r) 6139 return r; 6140 } 6141 } else { 6142 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6143 alu.op = ALU_OP2_MULLO_INT; 6144 alu.src[0].sel = src_gpr; 6145 alu.src[0].chan = sample_chan; 6146 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6147 alu.src[1].value = 4; 6148 alu.dst.sel = temp; 6149 alu.dst.chan = 0; 6150 alu.dst.write = 1; 6151 alu.last = 1; 6152 r = r600_bytecode_add_alu(ctx->bc, &alu); 6153 if (r) 6154 return r; 6155 } 6156 6157 /* sample_index = temp.w >> temp.x */ 6158 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6159 alu.op = ALU_OP2_LSHR_INT; 6160 alu.src[0].sel = temp; 6161 alu.src[0].chan = 3; 6162 alu.src[1].sel = temp; 6163 alu.src[1].chan = 0; 6164 alu.dst.sel = src_gpr; 6165 alu.dst.chan = sample_chan; 6166 alu.dst.write = 1; 6167 alu.last = 1; 6168 r = r600_bytecode_add_alu(ctx->bc, &alu); 6169 if (r) 6170 return r; 6171 6172 /* sample_index & 0xF */ 6173 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6174 alu.op = ALU_OP2_AND_INT; 6175 alu.src[0].sel = src_gpr; 6176 alu.src[0].chan = sample_chan; 6177 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6178 alu.src[1].value = 0xF; 6179 alu.dst.sel = src_gpr; 6180 alu.dst.chan = sample_chan; 6181 alu.dst.write = 1; 6182 alu.last = 1; 6183 r = r600_bytecode_add_alu(ctx->bc, &alu); 6184 if (r) 6185 return r; 6186#if 0 6187 /* visualize the FMASK */ 6188 for (i = 0; i < 4; i++) { 6189 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6190 alu.op = ALU_OP1_INT_TO_FLT; 6191 alu.src[0].sel = src_gpr; 6192 alu.src[0].chan = sample_chan; 6193 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 6194 alu.dst.chan = i; 6195 alu.dst.write = 1; 6196 alu.last = 1; 6197 r = r600_bytecode_add_alu(ctx->bc, &alu); 6198 if (r) 6199 return r; 6200 } 6201 return 0; 6202#endif 6203 } 6204 6205 /* does this shader want a num layers from TXQ for a cube array? */ 6206 if (has_txq_cube_array_z) { 6207 int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6208 6209 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6210 alu.op = ALU_OP1_MOV; 6211 6212 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 6213 if (ctx->bc->chip_class >= EVERGREEN) { 6214 /* channel 1 or 3 of each word */ 6215 alu.src[0].sel += (id / 2); 6216 alu.src[0].chan = ((id % 2) * 2) + 1; 6217 } else { 6218 /* r600 we have them at channel 2 of the second dword */ 6219 alu.src[0].sel += (id * 2) + 1; 6220 alu.src[0].chan = 2; 6221 } 6222 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6223 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 6224 alu.last = 1; 6225 r = r600_bytecode_add_alu(ctx->bc, &alu); 6226 if (r) 6227 return r; 6228 /* disable writemask from texture instruction */ 6229 inst->Dst[0].Register.WriteMask &= ~4; 6230 } 6231 6232 opcode = ctx->inst_info->op; 6233 if (opcode == FETCH_OP_GATHER4 && 6234 inst->TexOffsets[0].File != TGSI_FILE_NULL && 6235 inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) { 6236 opcode = FETCH_OP_GATHER4_O; 6237 6238 /* GATHER4_O/GATHER4_C_O use offset values loaded by 6239 SET_TEXTURE_OFFSETS instruction. The immediate offset values 6240 encoded in the instruction are ignored. */ 6241 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6242 tex.op = FETCH_OP_SET_TEXTURE_OFFSETS; 6243 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6244 tex.sampler_index_mode = sampler_index_mode; 6245 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6246 tex.resource_index_mode = sampler_index_mode; 6247 6248 tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index; 6249 tex.src_sel_x = inst->TexOffsets[0].SwizzleX; 6250 tex.src_sel_y = inst->TexOffsets[0].SwizzleY; 6251 tex.src_sel_z = inst->TexOffsets[0].SwizzleZ; 6252 tex.src_sel_w = 4; 6253 6254 tex.dst_sel_x = 7; 6255 tex.dst_sel_y = 7; 6256 tex.dst_sel_z = 7; 6257 tex.dst_sel_w = 7; 6258 6259 r = r600_bytecode_add_tex(ctx->bc, &tex); 6260 if (r) 6261 return r; 6262 } 6263 6264 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 6265 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 6266 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 6267 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6268 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 6269 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 6270 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6271 switch (opcode) { 6272 case FETCH_OP_SAMPLE: 6273 opcode = FETCH_OP_SAMPLE_C; 6274 break; 6275 case FETCH_OP_SAMPLE_L: 6276 opcode = FETCH_OP_SAMPLE_C_L; 6277 break; 6278 case FETCH_OP_SAMPLE_LB: 6279 opcode = FETCH_OP_SAMPLE_C_LB; 6280 break; 6281 case FETCH_OP_SAMPLE_G: 6282 opcode = FETCH_OP_SAMPLE_C_G; 6283 break; 6284 /* Texture gather variants */ 6285 case FETCH_OP_GATHER4: 6286 opcode = FETCH_OP_GATHER4_C; 6287 break; 6288 case FETCH_OP_GATHER4_O: 6289 opcode = FETCH_OP_GATHER4_C_O; 6290 break; 6291 } 6292 } 6293 6294 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6295 tex.op = opcode; 6296 6297 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6298 tex.sampler_index_mode = sampler_index_mode; 6299 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6300 tex.resource_index_mode = sampler_index_mode; 6301 tex.src_gpr = src_gpr; 6302 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 6303 6304 if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE || 6305 inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) { 6306 tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */ 6307 } 6308 6309 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) { 6310 int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]; 6311 tex.inst_mod = texture_component_select; 6312 6313 if (ctx->bc->chip_class == CAYMAN) { 6314 /* GATHER4 result order is different from TGSI TG4 */ 6315 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 0 : 7; 6316 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 1 : 7; 6317 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 2 : 7; 6318 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 6319 } else { 6320 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 6321 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 6322 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 6323 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 6324 } 6325 } 6326 else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) { 6327 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 6328 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 6329 tex.dst_sel_z = 7; 6330 tex.dst_sel_w = 7; 6331 } 6332 else { 6333 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 6334 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 6335 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 6336 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 6337 } 6338 6339 6340 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) { 6341 tex.src_sel_x = 4; 6342 tex.src_sel_y = 4; 6343 tex.src_sel_z = 4; 6344 tex.src_sel_w = 4; 6345 } else if (src_loaded) { 6346 tex.src_sel_x = 0; 6347 tex.src_sel_y = 1; 6348 tex.src_sel_z = 2; 6349 tex.src_sel_w = 3; 6350 } else { 6351 tex.src_sel_x = ctx->src[0].swizzle[0]; 6352 tex.src_sel_y = ctx->src[0].swizzle[1]; 6353 tex.src_sel_z = ctx->src[0].swizzle[2]; 6354 tex.src_sel_w = ctx->src[0].swizzle[3]; 6355 tex.src_rel = ctx->src[0].rel; 6356 } 6357 6358 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || 6359 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6360 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6361 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6362 tex.src_sel_x = 1; 6363 tex.src_sel_y = 0; 6364 tex.src_sel_z = 3; 6365 tex.src_sel_w = 2; /* route Z compare or Lod value into W */ 6366 } 6367 6368 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 6369 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 6370 tex.coord_type_x = 1; 6371 tex.coord_type_y = 1; 6372 } 6373 tex.coord_type_z = 1; 6374 tex.coord_type_w = 1; 6375 6376 tex.offset_x = offset_x; 6377 tex.offset_y = offset_y; 6378 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && 6379 (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 6380 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) { 6381 tex.offset_z = 0; 6382 } 6383 else { 6384 tex.offset_z = offset_z; 6385 } 6386 6387 /* Put the depth for comparison in W. 6388 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 6389 * Some instructions expect the depth in Z. */ 6390 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 6391 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 6392 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 6393 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 6394 opcode != FETCH_OP_SAMPLE_C_L && 6395 opcode != FETCH_OP_SAMPLE_C_LB) { 6396 tex.src_sel_w = tex.src_sel_z; 6397 } 6398 6399 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 6400 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 6401 if (opcode == FETCH_OP_SAMPLE_C_L || 6402 opcode == FETCH_OP_SAMPLE_C_LB) { 6403 /* the array index is read from Y */ 6404 tex.coord_type_y = 0; 6405 } else { 6406 /* the array index is read from Z */ 6407 tex.coord_type_z = 0; 6408 tex.src_sel_z = tex.src_sel_y; 6409 } 6410 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 6411 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 6412 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6413 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 6414 (ctx->bc->chip_class >= EVERGREEN))) 6415 /* the array index is read from Z */ 6416 tex.coord_type_z = 0; 6417 6418 /* mask unused source components */ 6419 if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { 6420 switch (inst->Texture.Texture) { 6421 case TGSI_TEXTURE_2D: 6422 case TGSI_TEXTURE_RECT: 6423 tex.src_sel_z = 7; 6424 tex.src_sel_w = 7; 6425 break; 6426 case TGSI_TEXTURE_1D_ARRAY: 6427 tex.src_sel_y = 7; 6428 tex.src_sel_w = 7; 6429 break; 6430 case TGSI_TEXTURE_1D: 6431 tex.src_sel_y = 7; 6432 tex.src_sel_z = 7; 6433 tex.src_sel_w = 7; 6434 break; 6435 } 6436 } 6437 6438 r = r600_bytecode_add_tex(ctx->bc, &tex); 6439 if (r) 6440 return r; 6441 6442 /* add shadow ambient support - gallium doesn't do it yet */ 6443 return 0; 6444} 6445 6446static int tgsi_lrp(struct r600_shader_ctx *ctx) 6447{ 6448 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6449 struct r600_bytecode_alu alu; 6450 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6451 unsigned i, temp_regs[2]; 6452 int r; 6453 6454 /* optimize if it's just an equal balance */ 6455 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 6456 for (i = 0; i < lasti + 1; i++) { 6457 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6458 continue; 6459 6460 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6461 alu.op = ALU_OP2_ADD; 6462 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 6463 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6464 alu.omod = 3; 6465 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6466 alu.dst.chan = i; 6467 if (i == lasti) { 6468 alu.last = 1; 6469 } 6470 r = r600_bytecode_add_alu(ctx->bc, &alu); 6471 if (r) 6472 return r; 6473 } 6474 return 0; 6475 } 6476 6477 /* 1 - src0 */ 6478 for (i = 0; i < lasti + 1; i++) { 6479 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6480 continue; 6481 6482 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6483 alu.op = ALU_OP2_ADD; 6484 alu.src[0].sel = V_SQ_ALU_SRC_1; 6485 alu.src[0].chan = 0; 6486 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6487 r600_bytecode_src_toggle_neg(&alu.src[1]); 6488 alu.dst.sel = ctx->temp_reg; 6489 alu.dst.chan = i; 6490 if (i == lasti) { 6491 alu.last = 1; 6492 } 6493 alu.dst.write = 1; 6494 r = r600_bytecode_add_alu(ctx->bc, &alu); 6495 if (r) 6496 return r; 6497 } 6498 6499 /* (1 - src0) * src2 */ 6500 for (i = 0; i < lasti + 1; i++) { 6501 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6502 continue; 6503 6504 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6505 alu.op = ALU_OP2_MUL; 6506 alu.src[0].sel = ctx->temp_reg; 6507 alu.src[0].chan = i; 6508 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6509 alu.dst.sel = ctx->temp_reg; 6510 alu.dst.chan = i; 6511 if (i == lasti) { 6512 alu.last = 1; 6513 } 6514 alu.dst.write = 1; 6515 r = r600_bytecode_add_alu(ctx->bc, &alu); 6516 if (r) 6517 return r; 6518 } 6519 6520 /* src0 * src1 + (1 - src0) * src2 */ 6521 if (ctx->src[0].abs) 6522 temp_regs[0] = r600_get_temp(ctx); 6523 else 6524 temp_regs[0] = 0; 6525 if (ctx->src[1].abs) 6526 temp_regs[1] = r600_get_temp(ctx); 6527 else 6528 temp_regs[1] = 0; 6529 6530 for (i = 0; i < lasti + 1; i++) { 6531 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6532 continue; 6533 6534 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6535 alu.op = ALU_OP3_MULADD; 6536 alu.is_op3 = 1; 6537 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 6538 if (r) 6539 return r; 6540 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[1]); 6541 if (r) 6542 return r; 6543 alu.src[2].sel = ctx->temp_reg; 6544 alu.src[2].chan = i; 6545 6546 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6547 alu.dst.chan = i; 6548 if (i == lasti) { 6549 alu.last = 1; 6550 } 6551 r = r600_bytecode_add_alu(ctx->bc, &alu); 6552 if (r) 6553 return r; 6554 } 6555 return 0; 6556} 6557 6558static int tgsi_cmp(struct r600_shader_ctx *ctx) 6559{ 6560 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6561 struct r600_bytecode_alu alu; 6562 int i, r, j; 6563 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6564 int temp_regs[3]; 6565 6566 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6567 temp_regs[j] = 0; 6568 if (ctx->src[j].abs) 6569 temp_regs[j] = r600_get_temp(ctx); 6570 } 6571 6572 for (i = 0; i < lasti + 1; i++) { 6573 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6574 continue; 6575 6576 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6577 alu.op = ALU_OP3_CNDGE; 6578 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 6579 if (r) 6580 return r; 6581 r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]); 6582 if (r) 6583 return r; 6584 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]); 6585 if (r) 6586 return r; 6587 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6588 alu.dst.chan = i; 6589 alu.dst.write = 1; 6590 alu.is_op3 = 1; 6591 if (i == lasti) 6592 alu.last = 1; 6593 r = r600_bytecode_add_alu(ctx->bc, &alu); 6594 if (r) 6595 return r; 6596 } 6597 return 0; 6598} 6599 6600static int tgsi_ucmp(struct r600_shader_ctx *ctx) 6601{ 6602 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6603 struct r600_bytecode_alu alu; 6604 int i, r; 6605 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6606 6607 for (i = 0; i < lasti + 1; i++) { 6608 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6609 continue; 6610 6611 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6612 alu.op = ALU_OP3_CNDE_INT; 6613 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6614 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6615 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 6616 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6617 alu.dst.chan = i; 6618 alu.dst.write = 1; 6619 alu.is_op3 = 1; 6620 if (i == lasti) 6621 alu.last = 1; 6622 r = r600_bytecode_add_alu(ctx->bc, &alu); 6623 if (r) 6624 return r; 6625 } 6626 return 0; 6627} 6628 6629static int tgsi_xpd(struct r600_shader_ctx *ctx) 6630{ 6631 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6632 static const unsigned int src0_swizzle[] = {2, 0, 1}; 6633 static const unsigned int src1_swizzle[] = {1, 2, 0}; 6634 struct r600_bytecode_alu alu; 6635 uint32_t use_temp = 0; 6636 int i, r; 6637 6638 if (inst->Dst[0].Register.WriteMask != 0xf) 6639 use_temp = 1; 6640 6641 for (i = 0; i < 4; i++) { 6642 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6643 alu.op = ALU_OP2_MUL; 6644 if (i < 3) { 6645 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 6646 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 6647 } else { 6648 alu.src[0].sel = V_SQ_ALU_SRC_0; 6649 alu.src[0].chan = i; 6650 alu.src[1].sel = V_SQ_ALU_SRC_0; 6651 alu.src[1].chan = i; 6652 } 6653 6654 alu.dst.sel = ctx->temp_reg; 6655 alu.dst.chan = i; 6656 alu.dst.write = 1; 6657 6658 if (i == 3) 6659 alu.last = 1; 6660 r = r600_bytecode_add_alu(ctx->bc, &alu); 6661 if (r) 6662 return r; 6663 } 6664 6665 for (i = 0; i < 4; i++) { 6666 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6667 alu.op = ALU_OP3_MULADD; 6668 6669 if (i < 3) { 6670 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 6671 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 6672 } else { 6673 alu.src[0].sel = V_SQ_ALU_SRC_0; 6674 alu.src[0].chan = i; 6675 alu.src[1].sel = V_SQ_ALU_SRC_0; 6676 alu.src[1].chan = i; 6677 } 6678 6679 alu.src[2].sel = ctx->temp_reg; 6680 alu.src[2].neg = 1; 6681 alu.src[2].chan = i; 6682 6683 if (use_temp) 6684 alu.dst.sel = ctx->temp_reg; 6685 else 6686 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6687 alu.dst.chan = i; 6688 alu.dst.write = 1; 6689 alu.is_op3 = 1; 6690 if (i == 3) 6691 alu.last = 1; 6692 r = r600_bytecode_add_alu(ctx->bc, &alu); 6693 if (r) 6694 return r; 6695 } 6696 if (use_temp) 6697 return tgsi_helper_copy(ctx, inst); 6698 return 0; 6699} 6700 6701static int tgsi_exp(struct r600_shader_ctx *ctx) 6702{ 6703 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6704 struct r600_bytecode_alu alu; 6705 int r; 6706 int i; 6707 6708 /* result.x = 2^floor(src); */ 6709 if (inst->Dst[0].Register.WriteMask & 1) { 6710 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6711 6712 alu.op = ALU_OP1_FLOOR; 6713 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6714 6715 alu.dst.sel = ctx->temp_reg; 6716 alu.dst.chan = 0; 6717 alu.dst.write = 1; 6718 alu.last = 1; 6719 r = r600_bytecode_add_alu(ctx->bc, &alu); 6720 if (r) 6721 return r; 6722 6723 if (ctx->bc->chip_class == CAYMAN) { 6724 for (i = 0; i < 3; i++) { 6725 alu.op = ALU_OP1_EXP_IEEE; 6726 alu.src[0].sel = ctx->temp_reg; 6727 alu.src[0].chan = 0; 6728 6729 alu.dst.sel = ctx->temp_reg; 6730 alu.dst.chan = i; 6731 alu.dst.write = i == 0; 6732 alu.last = i == 2; 6733 r = r600_bytecode_add_alu(ctx->bc, &alu); 6734 if (r) 6735 return r; 6736 } 6737 } else { 6738 alu.op = ALU_OP1_EXP_IEEE; 6739 alu.src[0].sel = ctx->temp_reg; 6740 alu.src[0].chan = 0; 6741 6742 alu.dst.sel = ctx->temp_reg; 6743 alu.dst.chan = 0; 6744 alu.dst.write = 1; 6745 alu.last = 1; 6746 r = r600_bytecode_add_alu(ctx->bc, &alu); 6747 if (r) 6748 return r; 6749 } 6750 } 6751 6752 /* result.y = tmp - floor(tmp); */ 6753 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 6754 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6755 6756 alu.op = ALU_OP1_FRACT; 6757 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6758 6759 alu.dst.sel = ctx->temp_reg; 6760#if 0 6761 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6762 if (r) 6763 return r; 6764#endif 6765 alu.dst.write = 1; 6766 alu.dst.chan = 1; 6767 6768 alu.last = 1; 6769 6770 r = r600_bytecode_add_alu(ctx->bc, &alu); 6771 if (r) 6772 return r; 6773 } 6774 6775 /* result.z = RoughApprox2ToX(tmp);*/ 6776 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 6777 if (ctx->bc->chip_class == CAYMAN) { 6778 for (i = 0; i < 3; i++) { 6779 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6780 alu.op = ALU_OP1_EXP_IEEE; 6781 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6782 6783 alu.dst.sel = ctx->temp_reg; 6784 alu.dst.chan = i; 6785 if (i == 2) { 6786 alu.dst.write = 1; 6787 alu.last = 1; 6788 } 6789 6790 r = r600_bytecode_add_alu(ctx->bc, &alu); 6791 if (r) 6792 return r; 6793 } 6794 } else { 6795 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6796 alu.op = ALU_OP1_EXP_IEEE; 6797 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6798 6799 alu.dst.sel = ctx->temp_reg; 6800 alu.dst.write = 1; 6801 alu.dst.chan = 2; 6802 6803 alu.last = 1; 6804 6805 r = r600_bytecode_add_alu(ctx->bc, &alu); 6806 if (r) 6807 return r; 6808 } 6809 } 6810 6811 /* result.w = 1.0;*/ 6812 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 6813 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6814 6815 alu.op = ALU_OP1_MOV; 6816 alu.src[0].sel = V_SQ_ALU_SRC_1; 6817 alu.src[0].chan = 0; 6818 6819 alu.dst.sel = ctx->temp_reg; 6820 alu.dst.chan = 3; 6821 alu.dst.write = 1; 6822 alu.last = 1; 6823 r = r600_bytecode_add_alu(ctx->bc, &alu); 6824 if (r) 6825 return r; 6826 } 6827 return tgsi_helper_copy(ctx, inst); 6828} 6829 6830static int tgsi_log(struct r600_shader_ctx *ctx) 6831{ 6832 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6833 struct r600_bytecode_alu alu; 6834 int r; 6835 int i; 6836 6837 /* result.x = floor(log2(|src|)); */ 6838 if (inst->Dst[0].Register.WriteMask & 1) { 6839 if (ctx->bc->chip_class == CAYMAN) { 6840 for (i = 0; i < 3; i++) { 6841 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6842 6843 alu.op = ALU_OP1_LOG_IEEE; 6844 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6845 r600_bytecode_src_set_abs(&alu.src[0]); 6846 6847 alu.dst.sel = ctx->temp_reg; 6848 alu.dst.chan = i; 6849 if (i == 0) 6850 alu.dst.write = 1; 6851 if (i == 2) 6852 alu.last = 1; 6853 r = r600_bytecode_add_alu(ctx->bc, &alu); 6854 if (r) 6855 return r; 6856 } 6857 6858 } else { 6859 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6860 6861 alu.op = ALU_OP1_LOG_IEEE; 6862 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6863 r600_bytecode_src_set_abs(&alu.src[0]); 6864 6865 alu.dst.sel = ctx->temp_reg; 6866 alu.dst.chan = 0; 6867 alu.dst.write = 1; 6868 alu.last = 1; 6869 r = r600_bytecode_add_alu(ctx->bc, &alu); 6870 if (r) 6871 return r; 6872 } 6873 6874 alu.op = ALU_OP1_FLOOR; 6875 alu.src[0].sel = ctx->temp_reg; 6876 alu.src[0].chan = 0; 6877 6878 alu.dst.sel = ctx->temp_reg; 6879 alu.dst.chan = 0; 6880 alu.dst.write = 1; 6881 alu.last = 1; 6882 6883 r = r600_bytecode_add_alu(ctx->bc, &alu); 6884 if (r) 6885 return r; 6886 } 6887 6888 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 6889 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 6890 6891 if (ctx->bc->chip_class == CAYMAN) { 6892 for (i = 0; i < 3; i++) { 6893 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6894 6895 alu.op = ALU_OP1_LOG_IEEE; 6896 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6897 r600_bytecode_src_set_abs(&alu.src[0]); 6898 6899 alu.dst.sel = ctx->temp_reg; 6900 alu.dst.chan = i; 6901 if (i == 1) 6902 alu.dst.write = 1; 6903 if (i == 2) 6904 alu.last = 1; 6905 6906 r = r600_bytecode_add_alu(ctx->bc, &alu); 6907 if (r) 6908 return r; 6909 } 6910 } else { 6911 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6912 6913 alu.op = ALU_OP1_LOG_IEEE; 6914 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6915 r600_bytecode_src_set_abs(&alu.src[0]); 6916 6917 alu.dst.sel = ctx->temp_reg; 6918 alu.dst.chan = 1; 6919 alu.dst.write = 1; 6920 alu.last = 1; 6921 6922 r = r600_bytecode_add_alu(ctx->bc, &alu); 6923 if (r) 6924 return r; 6925 } 6926 6927 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6928 6929 alu.op = ALU_OP1_FLOOR; 6930 alu.src[0].sel = ctx->temp_reg; 6931 alu.src[0].chan = 1; 6932 6933 alu.dst.sel = ctx->temp_reg; 6934 alu.dst.chan = 1; 6935 alu.dst.write = 1; 6936 alu.last = 1; 6937 6938 r = r600_bytecode_add_alu(ctx->bc, &alu); 6939 if (r) 6940 return r; 6941 6942 if (ctx->bc->chip_class == CAYMAN) { 6943 for (i = 0; i < 3; i++) { 6944 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6945 alu.op = ALU_OP1_EXP_IEEE; 6946 alu.src[0].sel = ctx->temp_reg; 6947 alu.src[0].chan = 1; 6948 6949 alu.dst.sel = ctx->temp_reg; 6950 alu.dst.chan = i; 6951 if (i == 1) 6952 alu.dst.write = 1; 6953 if (i == 2) 6954 alu.last = 1; 6955 6956 r = r600_bytecode_add_alu(ctx->bc, &alu); 6957 if (r) 6958 return r; 6959 } 6960 } else { 6961 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6962 alu.op = ALU_OP1_EXP_IEEE; 6963 alu.src[0].sel = ctx->temp_reg; 6964 alu.src[0].chan = 1; 6965 6966 alu.dst.sel = ctx->temp_reg; 6967 alu.dst.chan = 1; 6968 alu.dst.write = 1; 6969 alu.last = 1; 6970 6971 r = r600_bytecode_add_alu(ctx->bc, &alu); 6972 if (r) 6973 return r; 6974 } 6975 6976 if (ctx->bc->chip_class == CAYMAN) { 6977 for (i = 0; i < 3; i++) { 6978 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6979 alu.op = ALU_OP1_RECIP_IEEE; 6980 alu.src[0].sel = ctx->temp_reg; 6981 alu.src[0].chan = 1; 6982 6983 alu.dst.sel = ctx->temp_reg; 6984 alu.dst.chan = i; 6985 if (i == 1) 6986 alu.dst.write = 1; 6987 if (i == 2) 6988 alu.last = 1; 6989 6990 r = r600_bytecode_add_alu(ctx->bc, &alu); 6991 if (r) 6992 return r; 6993 } 6994 } else { 6995 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6996 alu.op = ALU_OP1_RECIP_IEEE; 6997 alu.src[0].sel = ctx->temp_reg; 6998 alu.src[0].chan = 1; 6999 7000 alu.dst.sel = ctx->temp_reg; 7001 alu.dst.chan = 1; 7002 alu.dst.write = 1; 7003 alu.last = 1; 7004 7005 r = r600_bytecode_add_alu(ctx->bc, &alu); 7006 if (r) 7007 return r; 7008 } 7009 7010 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7011 7012 alu.op = ALU_OP2_MUL; 7013 7014 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7015 r600_bytecode_src_set_abs(&alu.src[0]); 7016 7017 alu.src[1].sel = ctx->temp_reg; 7018 alu.src[1].chan = 1; 7019 7020 alu.dst.sel = ctx->temp_reg; 7021 alu.dst.chan = 1; 7022 alu.dst.write = 1; 7023 alu.last = 1; 7024 7025 r = r600_bytecode_add_alu(ctx->bc, &alu); 7026 if (r) 7027 return r; 7028 } 7029 7030 /* result.z = log2(|src|);*/ 7031 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 7032 if (ctx->bc->chip_class == CAYMAN) { 7033 for (i = 0; i < 3; i++) { 7034 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7035 7036 alu.op = ALU_OP1_LOG_IEEE; 7037 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7038 r600_bytecode_src_set_abs(&alu.src[0]); 7039 7040 alu.dst.sel = ctx->temp_reg; 7041 if (i == 2) 7042 alu.dst.write = 1; 7043 alu.dst.chan = i; 7044 if (i == 2) 7045 alu.last = 1; 7046 7047 r = r600_bytecode_add_alu(ctx->bc, &alu); 7048 if (r) 7049 return r; 7050 } 7051 } else { 7052 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7053 7054 alu.op = ALU_OP1_LOG_IEEE; 7055 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7056 r600_bytecode_src_set_abs(&alu.src[0]); 7057 7058 alu.dst.sel = ctx->temp_reg; 7059 alu.dst.write = 1; 7060 alu.dst.chan = 2; 7061 alu.last = 1; 7062 7063 r = r600_bytecode_add_alu(ctx->bc, &alu); 7064 if (r) 7065 return r; 7066 } 7067 } 7068 7069 /* result.w = 1.0; */ 7070 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 7071 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7072 7073 alu.op = ALU_OP1_MOV; 7074 alu.src[0].sel = V_SQ_ALU_SRC_1; 7075 alu.src[0].chan = 0; 7076 7077 alu.dst.sel = ctx->temp_reg; 7078 alu.dst.chan = 3; 7079 alu.dst.write = 1; 7080 alu.last = 1; 7081 7082 r = r600_bytecode_add_alu(ctx->bc, &alu); 7083 if (r) 7084 return r; 7085 } 7086 7087 return tgsi_helper_copy(ctx, inst); 7088} 7089 7090static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 7091{ 7092 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7093 struct r600_bytecode_alu alu; 7094 int r; 7095 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7096 unsigned reg = inst->Dst[0].Register.Index > 0 ? ctx->bc->index_reg[inst->Dst[0].Register.Index - 1] : ctx->bc->ar_reg; 7097 7098 assert(inst->Dst[0].Register.Index < 3); 7099 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7100 7101 switch (inst->Instruction.Opcode) { 7102 case TGSI_OPCODE_ARL: 7103 alu.op = ALU_OP1_FLT_TO_INT_FLOOR; 7104 break; 7105 case TGSI_OPCODE_ARR: 7106 alu.op = ALU_OP1_FLT_TO_INT; 7107 break; 7108 case TGSI_OPCODE_UARL: 7109 alu.op = ALU_OP1_MOV; 7110 break; 7111 default: 7112 assert(0); 7113 return -1; 7114 } 7115 7116 for (i = 0; i <= lasti; ++i) { 7117 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7118 continue; 7119 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7120 alu.last = i == lasti; 7121 alu.dst.sel = reg; 7122 alu.dst.chan = i; 7123 alu.dst.write = 1; 7124 r = r600_bytecode_add_alu(ctx->bc, &alu); 7125 if (r) 7126 return r; 7127 } 7128 7129 if (inst->Dst[0].Register.Index > 0) 7130 ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0; 7131 else 7132 ctx->bc->ar_loaded = 0; 7133 7134 return 0; 7135} 7136static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 7137{ 7138 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7139 struct r600_bytecode_alu alu; 7140 int r; 7141 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7142 7143 switch (inst->Instruction.Opcode) { 7144 case TGSI_OPCODE_ARL: 7145 memset(&alu, 0, sizeof(alu)); 7146 alu.op = ALU_OP1_FLOOR; 7147 alu.dst.sel = ctx->bc->ar_reg; 7148 alu.dst.write = 1; 7149 for (i = 0; i <= lasti; ++i) { 7150 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 7151 alu.dst.chan = i; 7152 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7153 alu.last = i == lasti; 7154 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 7155 return r; 7156 } 7157 } 7158 7159 memset(&alu, 0, sizeof(alu)); 7160 alu.op = ALU_OP1_FLT_TO_INT; 7161 alu.src[0].sel = ctx->bc->ar_reg; 7162 alu.dst.sel = ctx->bc->ar_reg; 7163 alu.dst.write = 1; 7164 /* FLT_TO_INT is trans-only on r600/r700 */ 7165 alu.last = TRUE; 7166 for (i = 0; i <= lasti; ++i) { 7167 alu.dst.chan = i; 7168 alu.src[0].chan = i; 7169 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 7170 return r; 7171 } 7172 break; 7173 case TGSI_OPCODE_ARR: 7174 memset(&alu, 0, sizeof(alu)); 7175 alu.op = ALU_OP1_FLT_TO_INT; 7176 alu.dst.sel = ctx->bc->ar_reg; 7177 alu.dst.write = 1; 7178 /* FLT_TO_INT is trans-only on r600/r700 */ 7179 alu.last = TRUE; 7180 for (i = 0; i <= lasti; ++i) { 7181 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 7182 alu.dst.chan = i; 7183 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7184 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 7185 return r; 7186 } 7187 } 7188 break; 7189 case TGSI_OPCODE_UARL: 7190 memset(&alu, 0, sizeof(alu)); 7191 alu.op = ALU_OP1_MOV; 7192 alu.dst.sel = ctx->bc->ar_reg; 7193 alu.dst.write = 1; 7194 for (i = 0; i <= lasti; ++i) { 7195 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 7196 alu.dst.chan = i; 7197 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7198 alu.last = i == lasti; 7199 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 7200 return r; 7201 } 7202 } 7203 break; 7204 default: 7205 assert(0); 7206 return -1; 7207 } 7208 7209 ctx->bc->ar_loaded = 0; 7210 return 0; 7211} 7212 7213static int tgsi_opdst(struct r600_shader_ctx *ctx) 7214{ 7215 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7216 struct r600_bytecode_alu alu; 7217 int i, r = 0; 7218 7219 for (i = 0; i < 4; i++) { 7220 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7221 7222 alu.op = ALU_OP2_MUL; 7223 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7224 7225 if (i == 0 || i == 3) { 7226 alu.src[0].sel = V_SQ_ALU_SRC_1; 7227 } else { 7228 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7229 } 7230 7231 if (i == 0 || i == 2) { 7232 alu.src[1].sel = V_SQ_ALU_SRC_1; 7233 } else { 7234 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 7235 } 7236 if (i == 3) 7237 alu.last = 1; 7238 r = r600_bytecode_add_alu(ctx->bc, &alu); 7239 if (r) 7240 return r; 7241 } 7242 return 0; 7243} 7244 7245static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type) 7246{ 7247 struct r600_bytecode_alu alu; 7248 int r; 7249 7250 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7251 alu.op = opcode; 7252 alu.execute_mask = 1; 7253 alu.update_pred = 1; 7254 7255 alu.dst.sel = ctx->temp_reg; 7256 alu.dst.write = 1; 7257 alu.dst.chan = 0; 7258 7259 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7260 alu.src[1].sel = V_SQ_ALU_SRC_0; 7261 alu.src[1].chan = 0; 7262 7263 alu.last = 1; 7264 7265 r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); 7266 if (r) 7267 return r; 7268 return 0; 7269} 7270 7271static int pops(struct r600_shader_ctx *ctx, int pops) 7272{ 7273 unsigned force_pop = ctx->bc->force_add_cf; 7274 7275 if (!force_pop) { 7276 int alu_pop = 3; 7277 if (ctx->bc->cf_last) { 7278 if (ctx->bc->cf_last->op == CF_OP_ALU) 7279 alu_pop = 0; 7280 else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER) 7281 alu_pop = 1; 7282 } 7283 alu_pop += pops; 7284 if (alu_pop == 1) { 7285 ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER; 7286 ctx->bc->force_add_cf = 1; 7287 } else if (alu_pop == 2) { 7288 ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER; 7289 ctx->bc->force_add_cf = 1; 7290 } else { 7291 force_pop = 1; 7292 } 7293 } 7294 7295 if (force_pop) { 7296 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 7297 ctx->bc->cf_last->pop_count = pops; 7298 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 7299 } 7300 7301 return 0; 7302} 7303 7304static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, 7305 unsigned reason) 7306{ 7307 struct r600_stack_info *stack = &ctx->bc->stack; 7308 unsigned elements, entries; 7309 7310 unsigned entry_size = stack->entry_size; 7311 7312 elements = (stack->loop + stack->push_wqm ) * entry_size; 7313 elements += stack->push; 7314 7315 switch (ctx->bc->chip_class) { 7316 case R600: 7317 case R700: 7318 /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on 7319 * the stack must be reserved to hold the current active/continue 7320 * masks */ 7321 if (reason == FC_PUSH_VPM) { 7322 elements += 2; 7323 } 7324 break; 7325 7326 case CAYMAN: 7327 /* r9xx: any stack operation on empty stack consumes 2 additional 7328 * elements */ 7329 elements += 2; 7330 7331 /* fallthrough */ 7332 /* FIXME: do the two elements added above cover the cases for the 7333 * r8xx+ below? */ 7334 7335 case EVERGREEN: 7336 /* r8xx+: 2 extra elements are not always required, but one extra 7337 * element must be added for each of the following cases: 7338 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest 7339 * stack usage. 7340 * (Currently we don't use ALU_ELSE_AFTER.) 7341 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM 7342 * PUSH instruction executed. 7343 * 7344 * NOTE: it seems we also need to reserve additional element in some 7345 * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, 7346 * then STACK_SIZE should be 2 instead of 1 */ 7347 if (reason == FC_PUSH_VPM) { 7348 elements += 1; 7349 } 7350 break; 7351 7352 default: 7353 assert(0); 7354 break; 7355 } 7356 7357 /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4 7358 * for all chips, so we use 4 in the final formula, not the real entry_size 7359 * for the chip */ 7360 entry_size = 4; 7361 7362 entries = (elements + (entry_size - 1)) / entry_size; 7363 7364 if (entries > stack->max_entries) 7365 stack->max_entries = entries; 7366} 7367 7368static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) 7369{ 7370 switch(reason) { 7371 case FC_PUSH_VPM: 7372 --ctx->bc->stack.push; 7373 assert(ctx->bc->stack.push >= 0); 7374 break; 7375 case FC_PUSH_WQM: 7376 --ctx->bc->stack.push_wqm; 7377 assert(ctx->bc->stack.push_wqm >= 0); 7378 break; 7379 case FC_LOOP: 7380 --ctx->bc->stack.loop; 7381 assert(ctx->bc->stack.loop >= 0); 7382 break; 7383 default: 7384 assert(0); 7385 break; 7386 } 7387} 7388 7389static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) 7390{ 7391 switch (reason) { 7392 case FC_PUSH_VPM: 7393 ++ctx->bc->stack.push; 7394 break; 7395 case FC_PUSH_WQM: 7396 ++ctx->bc->stack.push_wqm; 7397 case FC_LOOP: 7398 ++ctx->bc->stack.loop; 7399 break; 7400 default: 7401 assert(0); 7402 } 7403 7404 callstack_update_max_depth(ctx, reason); 7405} 7406 7407static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 7408{ 7409 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 7410 7411 sp->mid = realloc((void *)sp->mid, 7412 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 7413 sp->mid[sp->num_mid] = ctx->bc->cf_last; 7414 sp->num_mid++; 7415} 7416 7417static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 7418{ 7419 ctx->bc->fc_sp++; 7420 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 7421 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 7422} 7423 7424static void fc_poplevel(struct r600_shader_ctx *ctx) 7425{ 7426 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 7427 free(sp->mid); 7428 sp->mid = NULL; 7429 sp->num_mid = 0; 7430 sp->start = NULL; 7431 sp->type = 0; 7432 ctx->bc->fc_sp--; 7433} 7434 7435#if 0 7436static int emit_return(struct r600_shader_ctx *ctx) 7437{ 7438 r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN)); 7439 return 0; 7440} 7441 7442static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 7443{ 7444 7445 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP)); 7446 ctx->bc->cf_last->pop_count = pops; 7447 /* XXX work out offset */ 7448 return 0; 7449} 7450 7451static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 7452{ 7453 return 0; 7454} 7455 7456static void emit_testflag(struct r600_shader_ctx *ctx) 7457{ 7458 7459} 7460 7461static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 7462{ 7463 emit_testflag(ctx); 7464 emit_jump_to_offset(ctx, 1, 4); 7465 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 7466 pops(ctx, ifidx + 1); 7467 emit_return(ctx); 7468} 7469 7470static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 7471{ 7472 emit_testflag(ctx); 7473 7474 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 7475 ctx->bc->cf_last->pop_count = 1; 7476 7477 fc_set_mid(ctx, fc_sp); 7478 7479 pops(ctx, 1); 7480} 7481#endif 7482 7483static int emit_if(struct r600_shader_ctx *ctx, int opcode) 7484{ 7485 int alu_type = CF_OP_ALU_PUSH_BEFORE; 7486 7487 /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by 7488 * LOOP_STARTxxx for nested loops may put the branch stack into a state 7489 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this 7490 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ 7491 if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) { 7492 r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); 7493 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 7494 alu_type = CF_OP_ALU; 7495 } 7496 7497 emit_logic_pred(ctx, opcode, alu_type); 7498 7499 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 7500 7501 fc_pushlevel(ctx, FC_IF); 7502 7503 callstack_push(ctx, FC_PUSH_VPM); 7504 return 0; 7505} 7506 7507static int tgsi_if(struct r600_shader_ctx *ctx) 7508{ 7509 return emit_if(ctx, ALU_OP2_PRED_SETNE); 7510} 7511 7512static int tgsi_uif(struct r600_shader_ctx *ctx) 7513{ 7514 return emit_if(ctx, ALU_OP2_PRED_SETNE_INT); 7515} 7516 7517static int tgsi_else(struct r600_shader_ctx *ctx) 7518{ 7519 r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); 7520 ctx->bc->cf_last->pop_count = 1; 7521 7522 fc_set_mid(ctx, ctx->bc->fc_sp); 7523 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 7524 return 0; 7525} 7526 7527static int tgsi_endif(struct r600_shader_ctx *ctx) 7528{ 7529 pops(ctx, 1); 7530 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 7531 R600_ERR("if/endif unbalanced in shader\n"); 7532 return -1; 7533 } 7534 7535 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 7536 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 7537 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 7538 } else { 7539 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 7540 } 7541 fc_poplevel(ctx); 7542 7543 callstack_pop(ctx, FC_PUSH_VPM); 7544 return 0; 7545} 7546 7547static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 7548{ 7549 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not 7550 * limited to 4096 iterations, like the other LOOP_* instructions. */ 7551 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10); 7552 7553 fc_pushlevel(ctx, FC_LOOP); 7554 7555 /* check stack depth */ 7556 callstack_push(ctx, FC_LOOP); 7557 return 0; 7558} 7559 7560static int tgsi_endloop(struct r600_shader_ctx *ctx) 7561{ 7562 int i; 7563 7564 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); 7565 7566 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 7567 R600_ERR("loop/endloop in shader code are not paired.\n"); 7568 return -EINVAL; 7569 } 7570 7571 /* fixup loop pointers - from r600isa 7572 LOOP END points to CF after LOOP START, 7573 LOOP START point to CF after LOOP END 7574 BRK/CONT point to LOOP END CF 7575 */ 7576 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 7577 7578 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 7579 7580 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 7581 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 7582 } 7583 /* XXX add LOOPRET support */ 7584 fc_poplevel(ctx); 7585 callstack_pop(ctx, FC_LOOP); 7586 return 0; 7587} 7588 7589static int tgsi_loop_breakc(struct r600_shader_ctx *ctx) 7590{ 7591 int r; 7592 unsigned int fscp; 7593 7594 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 7595 { 7596 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 7597 break; 7598 } 7599 if (fscp == 0) { 7600 R600_ERR("BREAKC not inside loop/endloop pair\n"); 7601 return -EINVAL; 7602 } 7603 7604 if (ctx->bc->chip_class == EVERGREEN && 7605 ctx->bc->family != CHIP_CYPRESS && 7606 ctx->bc->family != CHIP_JUNIPER) { 7607 /* HW bug: ALU_BREAK does not save the active mask correctly */ 7608 r = tgsi_uif(ctx); 7609 if (r) 7610 return r; 7611 7612 r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK); 7613 if (r) 7614 return r; 7615 fc_set_mid(ctx, fscp); 7616 7617 return tgsi_endif(ctx); 7618 } else { 7619 r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK); 7620 if (r) 7621 return r; 7622 fc_set_mid(ctx, fscp); 7623 } 7624 7625 return 0; 7626} 7627 7628static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 7629{ 7630 unsigned int fscp; 7631 7632 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 7633 { 7634 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 7635 break; 7636 } 7637 7638 if (fscp == 0) { 7639 R600_ERR("Break not inside loop/endloop pair\n"); 7640 return -EINVAL; 7641 } 7642 7643 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 7644 7645 fc_set_mid(ctx, fscp); 7646 7647 return 0; 7648} 7649 7650static int tgsi_gs_emit(struct r600_shader_ctx *ctx) 7651{ 7652 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7653 int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX]; 7654 int r; 7655 7656 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 7657 emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE); 7658 7659 r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 7660 if (!r) 7661 ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream 7662 return r; 7663} 7664 7665static int tgsi_umad(struct r600_shader_ctx *ctx) 7666{ 7667 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7668 struct r600_bytecode_alu alu; 7669 int i, j, k, r; 7670 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7671 7672 /* src0 * src1 */ 7673 for (i = 0; i < lasti + 1; i++) { 7674 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7675 continue; 7676 7677 if (ctx->bc->chip_class == CAYMAN) { 7678 for (j = 0 ; j < 4; j++) { 7679 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7680 7681 alu.op = ALU_OP2_MULLO_UINT; 7682 for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { 7683 r600_bytecode_src(&alu.src[k], &ctx->src[k], i); 7684 } 7685 alu.dst.chan = j; 7686 alu.dst.sel = ctx->temp_reg; 7687 alu.dst.write = (j == i); 7688 if (j == 3) 7689 alu.last = 1; 7690 r = r600_bytecode_add_alu(ctx->bc, &alu); 7691 if (r) 7692 return r; 7693 } 7694 } else { 7695 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7696 7697 alu.dst.chan = i; 7698 alu.dst.sel = ctx->temp_reg; 7699 alu.dst.write = 1; 7700 7701 alu.op = ALU_OP2_MULLO_UINT; 7702 for (j = 0; j < 2; j++) { 7703 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 7704 } 7705 7706 alu.last = 1; 7707 r = r600_bytecode_add_alu(ctx->bc, &alu); 7708 if (r) 7709 return r; 7710 } 7711 } 7712 7713 7714 for (i = 0; i < lasti + 1; i++) { 7715 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7716 continue; 7717 7718 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7719 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7720 7721 alu.op = ALU_OP2_ADD_INT; 7722 7723 alu.src[0].sel = ctx->temp_reg; 7724 alu.src[0].chan = i; 7725 7726 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7727 if (i == lasti) { 7728 alu.last = 1; 7729 } 7730 r = r600_bytecode_add_alu(ctx->bc, &alu); 7731 if (r) 7732 return r; 7733 } 7734 return 0; 7735} 7736 7737static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 7738 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, 7739 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 7740 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 7741 7742 /* XXX: 7743 * For state trackers other than OpenGL, we'll want to use 7744 * _RECIP_IEEE instead. 7745 */ 7746 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 7747 7748 [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, 7749 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 7750 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 7751 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 7752 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 7753 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 7754 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 7755 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 7756 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 7757 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 7758 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 7759 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 7760 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 7761 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 7762 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 7763 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 7764 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 7765 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 7766 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 7767 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 7768 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 7769 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 7770 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 7771 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 7772 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 7773 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 7774 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 7775 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 7776 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 7777 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 7778 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 7779 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 7780 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 7781 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 7782 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 7783 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 7784 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 7785 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 7786 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 7787 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 7788 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 7789 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 7790 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 7791 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 7792 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 7793 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 7794 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 7795 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 7796 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 7797 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 7798 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 7799 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 7800 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 7801 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 7802 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 7803 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 7804 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 7805 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_r600_arl}, 7806 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 7807 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 7808 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 7809 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 7810 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 7811 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 7812 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 7813 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 7814 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 7815 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 7816 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 7817 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 7818 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 7819 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 7820 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 7821 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 7822 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 7823 [TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 7824 [TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 7825 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 7826 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 7827 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 7828 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 7829 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 7830 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 7831 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2_trans}, 7832 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 7833 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 7834 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 7835 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 7836 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 7837 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 7838 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 7839 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 7840 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 7841 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 7842 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 7843 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 7844 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 7845 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 7846 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 7847 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 7848 [104] = { ALU_OP0_NOP, tgsi_unsupported}, 7849 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 7850 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 7851 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 7852 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 7853 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 7854 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 7855 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 7856 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 7857 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 7858 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 7859 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc}, 7860 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 7861 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 7862 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 7863 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, 7864 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 7865 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 7866 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 7867 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 7868 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 7869 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2_trans}, 7870 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 7871 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, 7872 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 7873 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 7874 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 7875 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 7876 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 7877 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 7878 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 7879 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 7880 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 7881 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 7882 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2_trans}, 7883 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 7884 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2_swap}, 7885 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 7886 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 7887 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 7888 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 7889 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 7890 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 7891 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 7892 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 7893 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 7894 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 7895 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 7896 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 7897 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 7898 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 7899 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 7900 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 7901 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_r600_arl}, 7902 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 7903 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 7904 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 7905 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 7906 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 7907 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 7908 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 7909 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 7910 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 7911 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 7912 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 7913 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 7914 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 7915 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 7916 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 7917 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 7918 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 7919 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 7920 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 7921 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 7922 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 7923 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 7924 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 7925 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 7926 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_unsupported}, 7927 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_unsupported}, 7928 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_unsupported}, 7929 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_unsupported}, 7930 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_unsupported}, 7931 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_unsupported}, 7932 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_unsupported}, 7933 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_unsupported}, 7934 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_unsupported}, 7935 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_unsupported}, 7936 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_unsupported}, 7937 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_unsupported}, 7938 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_unsupported}, 7939 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 7940}; 7941 7942static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 7943 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 7944 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 7945 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 7946 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 7947 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, 7948 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 7949 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 7950 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 7951 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 7952 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 7953 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 7954 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 7955 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 7956 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 7957 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 7958 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 7959 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 7960 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 7961 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 7962 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 7963 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 7964 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 7965 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 7966 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 7967 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 7968 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 7969 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 7970 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 7971 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 7972 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 7973 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 7974 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 7975 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 7976 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 7977 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 7978 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 7979 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 7980 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 7981 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 7982 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 7983 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 7984 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 7985 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 7986 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 7987 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 7988 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 7989 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 7990 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 7991 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 7992 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 7993 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 7994 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 7995 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 7996 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 7997 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 7998 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 7999 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8000 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8001 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8002 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 8003 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 8004 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 8005 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 8006 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 8007 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 8008 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 8009 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 8010 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 8011 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8012 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 8013 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 8014 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 8015 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8016 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 8017 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 8018 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 8019 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 8020 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 8021 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 8022 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8023 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8024 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 8025 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 8026 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 8027 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 8028 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 8029 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 8030 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 8031 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 8032 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 8033 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 8034 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 8035 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 8036 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8037 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 8038 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8039 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 8040 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 8041 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 8042 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 8043 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8044 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 8045 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8046 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8047 [104] = { ALU_OP0_NOP, tgsi_unsupported}, 8048 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 8049 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 8050 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 8051 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 8052 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 8053 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 8054 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 8055 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 8056 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 8057 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 8058 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 8059 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 8060 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 8061 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 8062 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i}, 8063 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 8064 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 8065 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 8066 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 8067 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 8068 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 8069 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 8070 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_f2i}, 8071 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 8072 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 8073 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 8074 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 8075 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 8076 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 8077 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 8078 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 8079 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 8080 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 8081 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 8082 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 8083 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 8084 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8085 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 8086 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 8087 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8088 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 8089 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 8090 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 8091 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 8092 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 8093 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 8094 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 8095 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 8096 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 8097 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 8098 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 8099 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 8100 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 8101 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 8102 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 8103 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 8104 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8105 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 8106 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8107 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8108 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8109 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 8110 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 8111 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 8112 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 8113 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 8114 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8115 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8116 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8117 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8118 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8119 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8120 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 8121 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8122 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8123 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 8124 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 8125 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 8126 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 8127 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 8128 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 8129 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 8130 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 8131 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 8132 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 8133 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 8134 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 8135 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8136 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8137 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8138 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 8139 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 8140 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 8141 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 8142 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 8143 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 8144 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 8145 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 8146 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 8147 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 8148 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 8149 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 8150 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 8151 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 8152 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 8153 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 8154 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 8155 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 8156 [TGSI_OPCODE_D2I] = { ALU_OP0_NOP, tgsi_unsupported}, 8157 [TGSI_OPCODE_I2D] = { ALU_OP0_NOP, tgsi_unsupported}, 8158 [TGSI_OPCODE_D2U] = { ALU_OP0_NOP, tgsi_unsupported}, 8159 [TGSI_OPCODE_U2D] = { ALU_OP0_NOP, tgsi_unsupported}, 8160 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 8161 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 8162}; 8163 8164static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 8165 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 8166 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 8167 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 8168 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, 8169 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, 8170 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 8171 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 8172 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 8173 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 8174 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 8175 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 8176 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 8177 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 8178 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 8179 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 8180 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 8181 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 8182 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 8183 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 8184 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 8185 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, 8186 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 8187 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 8188 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 8189 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 8190 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 8191 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 8192 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 8193 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, 8194 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, 8195 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, 8196 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 8197 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 8198 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 8199 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 8200 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 8201 [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig}, 8202 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8203 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8204 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 8205 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8206 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8207 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8208 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8209 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 8210 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 8211 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 8212 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 8213 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, cayman_trig}, 8214 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 8215 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 8216 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 8217 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 8218 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 8219 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 8220 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8221 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8222 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8223 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8224 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 8225 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 8226 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 8227 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 8228 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 8229 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 8230 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 8231 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 8232 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 8233 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8234 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 8235 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 8236 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 8237 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8238 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 8239 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 8240 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 8241 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 8242 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 8243 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 8244 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8245 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8246 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 8247 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 8248 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 8249 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2}, 8250 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 8251 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 8252 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 8253 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 8254 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 8255 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 8256 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 8257 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 8258 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8259 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 8260 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8261 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 8262 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 8263 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 8264 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 8265 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8266 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 8267 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8268 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8269 [104] = { ALU_OP0_NOP, tgsi_unsupported}, 8270 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 8271 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 8272 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 8273 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 8274 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 8275 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 8276 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 8277 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 8278 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 8279 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 8280 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 8281 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 8282 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 8283 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 8284 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2}, 8285 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 8286 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 8287 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 8288 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 8289 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 8290 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 8291 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 8292 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2}, 8293 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2}, 8294 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 8295 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 8296 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 8297 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 8298 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 8299 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 8300 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_INT, cayman_mul_int_instr}, 8301 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 8302 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 8303 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 8304 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 8305 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 8306 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8307 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 8308 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 8309 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8310 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 8311 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 8312 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 8313 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 8314 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 8315 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 8316 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 8317 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 8318 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 8319 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 8320 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 8321 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 8322 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 8323 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 8324 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 8325 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 8326 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8327 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 8328 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8329 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8330 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8331 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 8332 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 8333 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 8334 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 8335 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 8336 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8337 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8338 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8339 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8340 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8341 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8342 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 8343 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8344 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8345 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, cayman_mul_int_instr}, 8346 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, cayman_mul_int_instr}, 8347 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 8348 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 8349 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 8350 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 8351 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 8352 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 8353 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 8354 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 8355 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 8356 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 8357 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8358 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8359 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8360 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 8361 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 8362 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 8363 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 8364 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 8365 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 8366 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 8367 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 8368 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 8369 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 8370 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 8371 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 8372 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 8373 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 8374 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 8375 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 8376 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 8377 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 8378 [TGSI_OPCODE_D2I] = { ALU_OP0_NOP, tgsi_unsupported}, 8379 [TGSI_OPCODE_I2D] = { ALU_OP0_NOP, tgsi_unsupported}, 8380 [TGSI_OPCODE_D2U] = { ALU_OP0_NOP, tgsi_unsupported}, 8381 [TGSI_OPCODE_U2D] = { ALU_OP0_NOP, tgsi_unsupported}, 8382 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 8383 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 8384}; 8385