r600_shader.c revision 0e49151dcfe042d937e1ac3c6eab86bb0a68cf04
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "r600_sq.h" 24#include "r600_llvm.h" 25#include "r600_formats.h" 26#include "r600_opcodes.h" 27#include "r600_shader.h" 28#include "r600d.h" 29 30#include "sb/sb_public.h" 31 32#include "pipe/p_shader_tokens.h" 33#include "tgsi/tgsi_info.h" 34#include "tgsi/tgsi_parse.h" 35#include "tgsi/tgsi_scan.h" 36#include "tgsi/tgsi_dump.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39#include <stdio.h> 40#include <errno.h> 41 42/* CAYMAN notes 43Why CAYMAN got loops for lots of instructions is explained here. 44 45-These 8xx t-slot only ops are implemented in all vector slots. 46MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 47These 8xx t-slot only opcodes become vector ops, with all four 48slots expecting the arguments on sources a and b. Result is 49broadcast to all channels. 50MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64 51These 8xx t-slot only opcodes become vector ops in the z, y, and 52x slots. 53EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 54RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 55SQRT_IEEE/_64 56SIN/COS 57The w slot may have an independent co-issued operation, or if the 58result is required to be in the w slot, the opcode above may be 59issued in the w slot as well. 60The compiler must issue the source argument to slots z, y, and x 61*/ 62 63#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) 64static int r600_shader_from_tgsi(struct r600_context *rctx, 65 struct r600_pipe_shader *pipeshader, 66 union r600_shader_key key); 67 68 69static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, 70 int size, unsigned comp_mask) { 71 72 if (!size) 73 return; 74 75 if (ps->num_arrays == ps->max_arrays) { 76 ps->max_arrays += 64; 77 ps->arrays = realloc(ps->arrays, ps->max_arrays * 78 sizeof(struct r600_shader_array)); 79 } 80 81 int n = ps->num_arrays; 82 ++ps->num_arrays; 83 84 ps->arrays[n].comp_mask = comp_mask; 85 ps->arrays[n].gpr_start = start_gpr; 86 ps->arrays[n].gpr_count = size; 87} 88 89static void r600_dump_streamout(struct pipe_stream_output_info *so) 90{ 91 unsigned i; 92 93 fprintf(stderr, "STREAMOUT\n"); 94 for (i = 0; i < so->num_outputs; i++) { 95 unsigned mask = ((1 << so->output[i].num_components) - 1) << 96 so->output[i].start_component; 97 fprintf(stderr, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", 98 i, 99 so->output[i].stream, 100 so->output[i].output_buffer, 101 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, 102 so->output[i].register_index, 103 mask & 1 ? "x" : "", 104 mask & 2 ? "y" : "", 105 mask & 4 ? "z" : "", 106 mask & 8 ? "w" : "", 107 so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); 108 } 109} 110 111static int store_shader(struct pipe_context *ctx, 112 struct r600_pipe_shader *shader) 113{ 114 struct r600_context *rctx = (struct r600_context *)ctx; 115 uint32_t *ptr, i; 116 117 if (shader->bo == NULL) { 118 shader->bo = (struct r600_resource*) 119 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); 120 if (shader->bo == NULL) { 121 return -ENOMEM; 122 } 123 ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); 124 if (R600_BIG_ENDIAN) { 125 for (i = 0; i < shader->shader.bc.ndw; ++i) { 126 ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); 127 } 128 } else { 129 memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); 130 } 131 rctx->b.ws->buffer_unmap(shader->bo->cs_buf); 132 } 133 134 return 0; 135} 136 137int r600_pipe_shader_create(struct pipe_context *ctx, 138 struct r600_pipe_shader *shader, 139 union r600_shader_key key) 140{ 141 struct r600_context *rctx = (struct r600_context *)ctx; 142 struct r600_pipe_shader_selector *sel = shader->selector; 143 int r; 144 bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens); 145 unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); 146 unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 147 unsigned export_shader; 148 149 shader->shader.bc.isa = rctx->isa; 150 151 if (dump) { 152 fprintf(stderr, "--------------------------------------------------------------\n"); 153 tgsi_dump(sel->tokens, 0); 154 155 if (sel->so.num_outputs) { 156 r600_dump_streamout(&sel->so); 157 } 158 } 159 r = r600_shader_from_tgsi(rctx, shader, key); 160 if (r) { 161 R600_ERR("translation from TGSI failed !\n"); 162 goto error; 163 } 164 165 /* disable SB for shaders using doubles */ 166 use_sb &= !shader->shader.uses_doubles; 167 168 /* Check if the bytecode has already been built. When using the llvm 169 * backend, r600_shader_from_tgsi() will take care of building the 170 * bytecode. 171 */ 172 if (!shader->shader.bc.bytecode) { 173 r = r600_bytecode_build(&shader->shader.bc); 174 if (r) { 175 R600_ERR("building bytecode failed !\n"); 176 goto error; 177 } 178 } 179 180 if (dump && !sb_disasm) { 181 fprintf(stderr, "--------------------------------------------------------------\n"); 182 r600_bytecode_disasm(&shader->shader.bc); 183 fprintf(stderr, "______________________________________________________________\n"); 184 } else if ((dump && sb_disasm) || use_sb) { 185 r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, 186 dump, use_sb); 187 if (r) { 188 R600_ERR("r600_sb_bytecode_process failed !\n"); 189 goto error; 190 } 191 } 192 193 if (shader->gs_copy_shader) { 194 if (dump) { 195 // dump copy shader 196 r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, 197 &shader->gs_copy_shader->shader, dump, 0); 198 if (r) 199 goto error; 200 } 201 202 if ((r = store_shader(ctx, shader->gs_copy_shader))) 203 goto error; 204 } 205 206 /* Store the shader in a buffer. */ 207 if ((r = store_shader(ctx, shader))) 208 goto error; 209 210 /* Build state. */ 211 switch (shader->shader.processor_type) { 212 case TGSI_PROCESSOR_GEOMETRY: 213 if (rctx->b.chip_class >= EVERGREEN) { 214 evergreen_update_gs_state(ctx, shader); 215 evergreen_update_vs_state(ctx, shader->gs_copy_shader); 216 } else { 217 r600_update_gs_state(ctx, shader); 218 r600_update_vs_state(ctx, shader->gs_copy_shader); 219 } 220 break; 221 case TGSI_PROCESSOR_VERTEX: 222 export_shader = key.vs.as_es; 223 if (rctx->b.chip_class >= EVERGREEN) { 224 if (export_shader) 225 evergreen_update_es_state(ctx, shader); 226 else 227 evergreen_update_vs_state(ctx, shader); 228 } else { 229 if (export_shader) 230 r600_update_es_state(ctx, shader); 231 else 232 r600_update_vs_state(ctx, shader); 233 } 234 break; 235 case TGSI_PROCESSOR_FRAGMENT: 236 if (rctx->b.chip_class >= EVERGREEN) { 237 evergreen_update_ps_state(ctx, shader); 238 } else { 239 r600_update_ps_state(ctx, shader); 240 } 241 break; 242 default: 243 r = -EINVAL; 244 goto error; 245 } 246 return 0; 247 248error: 249 r600_pipe_shader_destroy(ctx, shader); 250 return r; 251} 252 253void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 254{ 255 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 256 r600_bytecode_clear(&shader->shader.bc); 257 r600_release_command_buffer(&shader->command_buffer); 258} 259 260/* 261 * tgsi -> r600 shader 262 */ 263struct r600_shader_tgsi_instruction; 264 265struct r600_shader_src { 266 unsigned sel; 267 unsigned swizzle[4]; 268 unsigned neg; 269 unsigned abs; 270 unsigned rel; 271 unsigned kc_bank; 272 boolean kc_rel; /* true if cache bank is indexed */ 273 uint32_t value[4]; 274}; 275 276struct eg_interp { 277 boolean enabled; 278 unsigned ij_index; 279}; 280 281struct r600_shader_ctx { 282 struct tgsi_shader_info info; 283 struct tgsi_parse_context parse; 284 const struct tgsi_token *tokens; 285 unsigned type; 286 unsigned file_offset[TGSI_FILE_COUNT]; 287 unsigned temp_reg; 288 const struct r600_shader_tgsi_instruction *inst_info; 289 struct r600_bytecode *bc; 290 struct r600_shader *shader; 291 struct r600_shader_src src[4]; 292 uint32_t *literals; 293 uint32_t nliterals; 294 uint32_t max_driver_temp_used; 295 boolean use_llvm; 296 /* needed for evergreen interpolation */ 297 struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid 298 /* evergreen/cayman also store sample mask in face register */ 299 int face_gpr; 300 /* sample id is .w component stored in fixed point position register */ 301 int fixed_pt_position_gpr; 302 int colors_used; 303 boolean clip_vertex_write; 304 unsigned cv_output; 305 unsigned edgeflag_output; 306 int fragcoord_input; 307 int native_integers; 308 int next_ring_offset; 309 int gs_out_ring_offset; 310 int gs_next_vertex; 311 struct r600_shader *gs_for_vs; 312 int gs_export_gpr_tregs[4]; 313 const struct pipe_stream_output_info *gs_stream_output_info; 314 unsigned enabled_stream_buffers_mask; 315}; 316 317struct r600_shader_tgsi_instruction { 318 unsigned op; 319 int (*process)(struct r600_shader_ctx *ctx); 320}; 321 322static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind); 323static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 324static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 325static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); 326static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 327static int tgsi_else(struct r600_shader_ctx *ctx); 328static int tgsi_endif(struct r600_shader_ctx *ctx); 329static int tgsi_bgnloop(struct r600_shader_ctx *ctx); 330static int tgsi_endloop(struct r600_shader_ctx *ctx); 331static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 332static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 333 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 334 unsigned int dst_reg); 335static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 336 const struct r600_shader_src *shader_src, 337 unsigned chan); 338 339static int tgsi_is_supported(struct r600_shader_ctx *ctx) 340{ 341 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 342 int j; 343 344 if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) { 345 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 346 return -EINVAL; 347 } 348 if (i->Instruction.Predicate) { 349 R600_ERR("predicate unsupported\n"); 350 return -EINVAL; 351 } 352#if 0 353 if (i->Instruction.Label) { 354 R600_ERR("label unsupported\n"); 355 return -EINVAL; 356 } 357#endif 358 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 359 if (i->Src[j].Register.Dimension) { 360 switch (i->Src[j].Register.File) { 361 case TGSI_FILE_CONSTANT: 362 break; 363 case TGSI_FILE_INPUT: 364 if (ctx->type == TGSI_PROCESSOR_GEOMETRY) 365 break; 366 default: 367 R600_ERR("unsupported src %d (dimension %d)\n", j, 368 i->Src[j].Register.Dimension); 369 return -EINVAL; 370 } 371 } 372 } 373 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 374 if (i->Dst[j].Register.Dimension) { 375 R600_ERR("unsupported dst (dimension)\n"); 376 return -EINVAL; 377 } 378 } 379 return 0; 380} 381 382int eg_get_interpolator_index(unsigned interpolate, unsigned location) 383{ 384 if (interpolate == TGSI_INTERPOLATE_COLOR || 385 interpolate == TGSI_INTERPOLATE_LINEAR || 386 interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 387 { 388 int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR; 389 int loc; 390 391 switch(location) { 392 case TGSI_INTERPOLATE_LOC_CENTER: 393 loc = 1; 394 break; 395 case TGSI_INTERPOLATE_LOC_CENTROID: 396 loc = 2; 397 break; 398 case TGSI_INTERPOLATE_LOC_SAMPLE: 399 default: 400 loc = 0; break; 401 } 402 403 return is_linear * 3 + loc; 404 } 405 406 return -1; 407} 408 409static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, 410 int input) 411{ 412 int i = eg_get_interpolator_index( 413 ctx->shader->input[input].interpolate, 414 ctx->shader->input[input].interpolate_location); 415 assert(i >= 0); 416 ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index; 417} 418 419static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 420{ 421 int i, r; 422 struct r600_bytecode_alu alu; 423 int gpr = 0, base_chan = 0; 424 int ij_index = ctx->shader->input[input].ij_index; 425 426 /* work out gpr and base_chan from index */ 427 gpr = ij_index / 2; 428 base_chan = (2 * (ij_index % 2)) + 1; 429 430 for (i = 0; i < 8; i++) { 431 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 432 433 if (i < 4) 434 alu.op = ALU_OP2_INTERP_ZW; 435 else 436 alu.op = ALU_OP2_INTERP_XY; 437 438 if ((i > 1) && (i < 6)) { 439 alu.dst.sel = ctx->shader->input[input].gpr; 440 alu.dst.write = 1; 441 } 442 443 alu.dst.chan = i % 4; 444 445 alu.src[0].sel = gpr; 446 alu.src[0].chan = (base_chan - (i % 2)); 447 448 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 449 450 alu.bank_swizzle_force = SQ_ALU_VEC_210; 451 if ((i % 4) == 3) 452 alu.last = 1; 453 r = r600_bytecode_add_alu(ctx->bc, &alu); 454 if (r) 455 return r; 456 } 457 return 0; 458} 459 460static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 461{ 462 int i, r; 463 struct r600_bytecode_alu alu; 464 465 for (i = 0; i < 4; i++) { 466 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 467 468 alu.op = ALU_OP1_INTERP_LOAD_P0; 469 470 alu.dst.sel = ctx->shader->input[input].gpr; 471 alu.dst.write = 1; 472 473 alu.dst.chan = i; 474 475 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 476 alu.src[0].chan = i; 477 478 if (i == 3) 479 alu.last = 1; 480 r = r600_bytecode_add_alu(ctx->bc, &alu); 481 if (r) 482 return r; 483 } 484 return 0; 485} 486 487/* 488 * Special export handling in shaders 489 * 490 * shader export ARRAY_BASE for EXPORT_POS: 491 * 60 is position 492 * 61 is misc vector 493 * 62, 63 are clip distance vectors 494 * 495 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 496 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 497 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 498 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 499 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 500 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 501 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 502 * exclusive from render target index) 503 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 504 * 505 * 506 * shader export ARRAY_BASE for EXPORT_PIXEL: 507 * 0-7 CB targets 508 * 61 computed Z vector 509 * 510 * The use of the values exported in the computed Z vector are controlled 511 * by DB_SHADER_CONTROL: 512 * Z_EXPORT_ENABLE - Z as a float in RED 513 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 514 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 515 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 516 * DB_SOURCE_FORMAT - export control restrictions 517 * 518 */ 519 520 521/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 522static int r600_spi_sid(struct r600_shader_io * io) 523{ 524 int index, name = io->name; 525 526 /* These params are handled differently, they don't need 527 * semantic indices, so we'll use 0 for them. 528 */ 529 if (name == TGSI_SEMANTIC_POSITION || 530 name == TGSI_SEMANTIC_PSIZE || 531 name == TGSI_SEMANTIC_EDGEFLAG || 532 name == TGSI_SEMANTIC_FACE || 533 name == TGSI_SEMANTIC_SAMPLEMASK) 534 index = 0; 535 else { 536 if (name == TGSI_SEMANTIC_GENERIC) { 537 /* For generic params simply use sid from tgsi */ 538 index = io->sid; 539 } else { 540 /* For non-generic params - pack name and sid into 8 bits */ 541 index = 0x80 | (name<<3) | (io->sid); 542 } 543 544 /* Make sure that all really used indices have nonzero value, so 545 * we can just compare it to 0 later instead of comparing the name 546 * with different values to detect special cases. */ 547 index++; 548 } 549 550 return index; 551}; 552 553/* turn input into interpolate on EG */ 554static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 555{ 556 int r = 0; 557 558 if (ctx->shader->input[index].spi_sid) { 559 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 560 if (ctx->shader->input[index].interpolate > 0) { 561 evergreen_interp_assign_ij_index(ctx, index); 562 if (!ctx->use_llvm) 563 r = evergreen_interp_alu(ctx, index); 564 } else { 565 if (!ctx->use_llvm) 566 r = evergreen_interp_flat(ctx, index); 567 } 568 } 569 return r; 570} 571 572static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 573{ 574 struct r600_bytecode_alu alu; 575 int i, r; 576 int gpr_front = ctx->shader->input[front].gpr; 577 int gpr_back = ctx->shader->input[back].gpr; 578 579 for (i = 0; i < 4; i++) { 580 memset(&alu, 0, sizeof(alu)); 581 alu.op = ALU_OP3_CNDGT; 582 alu.is_op3 = 1; 583 alu.dst.write = 1; 584 alu.dst.sel = gpr_front; 585 alu.src[0].sel = ctx->face_gpr; 586 alu.src[1].sel = gpr_front; 587 alu.src[2].sel = gpr_back; 588 589 alu.dst.chan = i; 590 alu.src[1].chan = i; 591 alu.src[2].chan = i; 592 alu.last = (i==3); 593 594 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 595 return r; 596 } 597 598 return 0; 599} 600 601static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index) 602{ 603 return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg; 604} 605 606static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid) 607{ 608 int i; 609 i = ctx->shader->noutput++; 610 ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID; 611 ctx->shader->output[i].sid = 0; 612 ctx->shader->output[i].gpr = 0; 613 ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT; 614 ctx->shader->output[i].write_mask = 0x4; 615 ctx->shader->output[i].spi_sid = prim_id_sid; 616 617 return 0; 618} 619 620static int tgsi_declaration(struct r600_shader_ctx *ctx) 621{ 622 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 623 int r, i, j, count = d->Range.Last - d->Range.First + 1; 624 625 switch (d->Declaration.File) { 626 case TGSI_FILE_INPUT: 627 for (j = 0; j < count; j++) { 628 i = ctx->shader->ninput + j; 629 assert(i < Elements(ctx->shader->input)); 630 ctx->shader->input[i].name = d->Semantic.Name; 631 ctx->shader->input[i].sid = d->Semantic.Index + j; 632 ctx->shader->input[i].interpolate = d->Interp.Interpolate; 633 ctx->shader->input[i].interpolate_location = d->Interp.Location; 634 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j; 635 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 636 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 637 switch (ctx->shader->input[i].name) { 638 case TGSI_SEMANTIC_FACE: 639 if (ctx->face_gpr != -1) 640 ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ 641 else 642 ctx->face_gpr = ctx->shader->input[i].gpr; 643 break; 644 case TGSI_SEMANTIC_COLOR: 645 ctx->colors_used++; 646 break; 647 case TGSI_SEMANTIC_POSITION: 648 ctx->fragcoord_input = i; 649 break; 650 case TGSI_SEMANTIC_PRIMID: 651 /* set this for now */ 652 ctx->shader->gs_prim_id_input = true; 653 ctx->shader->ps_prim_id_input = i; 654 break; 655 } 656 if (ctx->bc->chip_class >= EVERGREEN) { 657 if ((r = evergreen_interp_input(ctx, i))) 658 return r; 659 } 660 } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 661 /* FIXME probably skip inputs if they aren't passed in the ring */ 662 ctx->shader->input[i].ring_offset = ctx->next_ring_offset; 663 ctx->next_ring_offset += 16; 664 if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) 665 ctx->shader->gs_prim_id_input = true; 666 } 667 } 668 ctx->shader->ninput += count; 669 break; 670 case TGSI_FILE_OUTPUT: 671 for (j = 0; j < count; j++) { 672 i = ctx->shader->noutput + j; 673 assert(i < Elements(ctx->shader->output)); 674 ctx->shader->output[i].name = d->Semantic.Name; 675 ctx->shader->output[i].sid = d->Semantic.Index + j; 676 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j; 677 ctx->shader->output[i].interpolate = d->Interp.Interpolate; 678 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 679 if (ctx->type == TGSI_PROCESSOR_VERTEX || 680 ctx->type == TGSI_PROCESSOR_GEOMETRY) { 681 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 682 switch (d->Semantic.Name) { 683 case TGSI_SEMANTIC_CLIPDIST: 684 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << 685 ((d->Semantic.Index + j) << 2); 686 break; 687 case TGSI_SEMANTIC_PSIZE: 688 ctx->shader->vs_out_misc_write = 1; 689 ctx->shader->vs_out_point_size = 1; 690 break; 691 case TGSI_SEMANTIC_EDGEFLAG: 692 ctx->shader->vs_out_misc_write = 1; 693 ctx->shader->vs_out_edgeflag = 1; 694 ctx->edgeflag_output = i; 695 break; 696 case TGSI_SEMANTIC_VIEWPORT_INDEX: 697 ctx->shader->vs_out_misc_write = 1; 698 ctx->shader->vs_out_viewport = 1; 699 break; 700 case TGSI_SEMANTIC_LAYER: 701 ctx->shader->vs_out_misc_write = 1; 702 ctx->shader->vs_out_layer = 1; 703 break; 704 case TGSI_SEMANTIC_CLIPVERTEX: 705 ctx->clip_vertex_write = TRUE; 706 ctx->cv_output = i; 707 break; 708 } 709 if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 710 ctx->gs_out_ring_offset += 16; 711 } 712 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 713 switch (d->Semantic.Name) { 714 case TGSI_SEMANTIC_COLOR: 715 ctx->shader->nr_ps_max_color_exports++; 716 break; 717 } 718 } 719 } 720 ctx->shader->noutput += count; 721 break; 722 case TGSI_FILE_TEMPORARY: 723 if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 724 if (d->Array.ArrayID) { 725 r600_add_gpr_array(ctx->shader, 726 ctx->file_offset[TGSI_FILE_TEMPORARY] + 727 d->Range.First, 728 d->Range.Last - d->Range.First + 1, 0x0F); 729 } 730 } 731 break; 732 733 case TGSI_FILE_CONSTANT: 734 case TGSI_FILE_SAMPLER: 735 case TGSI_FILE_SAMPLER_VIEW: 736 case TGSI_FILE_ADDRESS: 737 break; 738 739 case TGSI_FILE_SYSTEM_VALUE: 740 if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK || 741 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID || 742 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) { 743 break; /* Already handled from allocate_system_value_inputs */ 744 } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 745 if (!ctx->native_integers) { 746 struct r600_bytecode_alu alu; 747 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 748 749 alu.op = ALU_OP1_INT_TO_FLT; 750 alu.src[0].sel = 0; 751 alu.src[0].chan = 3; 752 753 alu.dst.sel = 0; 754 alu.dst.chan = 3; 755 alu.dst.write = 1; 756 alu.last = 1; 757 758 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 759 return r; 760 } 761 break; 762 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 763 break; 764 else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID) 765 break; 766 default: 767 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 768 return -EINVAL; 769 } 770 return 0; 771} 772 773static int r600_get_temp(struct r600_shader_ctx *ctx) 774{ 775 return ctx->temp_reg + ctx->max_driver_temp_used++; 776} 777 778static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset) 779{ 780 struct tgsi_parse_context parse; 781 struct { 782 boolean enabled; 783 int *reg; 784 unsigned name, alternate_name; 785 } inputs[2] = { 786 { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */ 787 788 { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */ 789 }; 790 int i, k, num_regs = 0; 791 792 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 793 return 0; 794 } 795 796 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 797 while (!tgsi_parse_end_of_tokens(&parse)) { 798 tgsi_parse_token(&parse); 799 800 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 801 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 802 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 803 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 804 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 805 { 806 int interpolate, location, k; 807 808 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 809 location = TGSI_INTERPOLATE_LOC_CENTER; 810 inputs[1].enabled = true; /* needs SAMPLEID */ 811 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 812 location = TGSI_INTERPOLATE_LOC_CENTER; 813 /* Needs sample positions, currently those are always available */ 814 } else { 815 location = TGSI_INTERPOLATE_LOC_CENTROID; 816 } 817 818 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 819 k = eg_get_interpolator_index(interpolate, location); 820 ctx->eg_interpolators[k].enabled = true; 821 } 822 } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { 823 struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration; 824 if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 825 for (k = 0; k < Elements(inputs); k++) { 826 if (d->Semantic.Name == inputs[k].name || 827 d->Semantic.Name == inputs[k].alternate_name) { 828 inputs[k].enabled = true; 829 } 830 } 831 } 832 } 833 } 834 835 tgsi_parse_free(&parse); 836 837 for (i = 0; i < Elements(inputs); i++) { 838 boolean enabled = inputs[i].enabled; 839 int *reg = inputs[i].reg; 840 unsigned name = inputs[i].name; 841 842 if (enabled) { 843 int gpr = gpr_offset + num_regs++; 844 845 // add to inputs, allocate a gpr 846 k = ctx->shader->ninput ++; 847 ctx->shader->input[k].name = name; 848 ctx->shader->input[k].sid = 0; 849 ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT; 850 ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER; 851 *reg = ctx->shader->input[k].gpr = gpr; 852 } 853 } 854 855 return gpr_offset + num_regs; 856} 857 858/* 859 * for evergreen we need to scan the shader to find the number of GPRs we need to 860 * reserve for interpolation and system values 861 * 862 * we need to know if we are going to emit 863 * any sample or centroid inputs 864 * if perspective and linear are required 865*/ 866static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 867{ 868 int i; 869 int num_baryc; 870 struct tgsi_parse_context parse; 871 872 memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators)); 873 874 for (i = 0; i < ctx->info.num_inputs; i++) { 875 int k; 876 /* skip position/face/mask/sampleid */ 877 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 878 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE || 879 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK || 880 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID) 881 continue; 882 883 k = eg_get_interpolator_index( 884 ctx->info.input_interpolate[i], 885 ctx->info.input_interpolate_loc[i]); 886 if (k >= 0) 887 ctx->eg_interpolators[k].enabled = TRUE; 888 } 889 890 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 891 return 0; 892 } 893 894 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 895 while (!tgsi_parse_end_of_tokens(&parse)) { 896 tgsi_parse_token(&parse); 897 898 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 899 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 900 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 901 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 902 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 903 { 904 int interpolate, location, k; 905 906 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 907 location = TGSI_INTERPOLATE_LOC_CENTER; 908 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 909 location = TGSI_INTERPOLATE_LOC_CENTER; 910 } else { 911 location = TGSI_INTERPOLATE_LOC_CENTROID; 912 } 913 914 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 915 k = eg_get_interpolator_index(interpolate, location); 916 ctx->eg_interpolators[k].enabled = true; 917 } 918 } 919 } 920 921 tgsi_parse_free(&parse); 922 923 /* assign gpr to each interpolator according to priority */ 924 num_baryc = 0; 925 for (i = 0; i < Elements(ctx->eg_interpolators); i++) { 926 if (ctx->eg_interpolators[i].enabled) { 927 ctx->eg_interpolators[i].ij_index = num_baryc; 928 num_baryc ++; 929 } 930 } 931 932 /* XXX PULL MODEL and LINE STIPPLE */ 933 934 num_baryc = (num_baryc + 1) >> 1; 935 return allocate_system_value_inputs(ctx, num_baryc); 936} 937 938/* sample_id_sel == NULL means fetch for current sample */ 939static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel) 940{ 941 struct r600_bytecode_vtx vtx; 942 int r, t1; 943 944 assert(ctx->fixed_pt_position_gpr != -1); 945 946 t1 = r600_get_temp(ctx); 947 948 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 949 vtx.op = FETCH_OP_VFETCH; 950 vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 951 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 952 if (sample_id == NULL) { 953 vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; 954 vtx.src_sel_x = 3; 955 } 956 else { 957 struct r600_bytecode_alu alu; 958 959 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 960 alu.op = ALU_OP1_MOV; 961 r600_bytecode_src(&alu.src[0], sample_id, chan_sel); 962 alu.dst.sel = t1; 963 alu.dst.write = 1; 964 alu.last = 1; 965 r = r600_bytecode_add_alu(ctx->bc, &alu); 966 if (r) 967 return r; 968 969 vtx.src_gpr = t1; 970 vtx.src_sel_x = 0; 971 } 972 vtx.mega_fetch_count = 16; 973 vtx.dst_gpr = t1; 974 vtx.dst_sel_x = 0; 975 vtx.dst_sel_y = 1; 976 vtx.dst_sel_z = 2; 977 vtx.dst_sel_w = 3; 978 vtx.data_format = FMT_32_32_32_32_FLOAT; 979 vtx.num_format_all = 2; 980 vtx.format_comp_all = 1; 981 vtx.use_const_fields = 0; 982 vtx.offset = 1; // first element is size of buffer 983 vtx.endian = r600_endian_swap(32); 984 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 985 986 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 987 if (r) 988 return r; 989 990 return t1; 991} 992 993static void tgsi_src(struct r600_shader_ctx *ctx, 994 const struct tgsi_full_src_register *tgsi_src, 995 struct r600_shader_src *r600_src) 996{ 997 memset(r600_src, 0, sizeof(*r600_src)); 998 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 999 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 1000 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 1001 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 1002 r600_src->neg = tgsi_src->Register.Negate; 1003 r600_src->abs = tgsi_src->Register.Absolute; 1004 1005 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 1006 int index; 1007 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 1008 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 1009 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 1010 1011 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 1012 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs); 1013 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 1014 return; 1015 } 1016 index = tgsi_src->Register.Index; 1017 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 1018 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 1019 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 1020 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) { 1021 r600_src->swizzle[0] = 2; // Z value 1022 r600_src->swizzle[1] = 2; 1023 r600_src->swizzle[2] = 2; 1024 r600_src->swizzle[3] = 2; 1025 r600_src->sel = ctx->face_gpr; 1026 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) { 1027 r600_src->swizzle[0] = 3; // W value 1028 r600_src->swizzle[1] = 3; 1029 r600_src->swizzle[2] = 3; 1030 r600_src->swizzle[3] = 3; 1031 r600_src->sel = ctx->fixed_pt_position_gpr; 1032 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) { 1033 r600_src->swizzle[0] = 0; 1034 r600_src->swizzle[1] = 1; 1035 r600_src->swizzle[2] = 4; 1036 r600_src->swizzle[3] = 4; 1037 r600_src->sel = load_sample_position(ctx, NULL, -1); 1038 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 1039 r600_src->swizzle[0] = 3; 1040 r600_src->swizzle[1] = 3; 1041 r600_src->swizzle[2] = 3; 1042 r600_src->swizzle[3] = 3; 1043 r600_src->sel = 0; 1044 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 1045 r600_src->swizzle[0] = 0; 1046 r600_src->swizzle[1] = 0; 1047 r600_src->swizzle[2] = 0; 1048 r600_src->swizzle[3] = 0; 1049 r600_src->sel = 0; 1050 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1051 r600_src->swizzle[0] = 3; 1052 r600_src->swizzle[1] = 3; 1053 r600_src->swizzle[2] = 3; 1054 r600_src->swizzle[3] = 3; 1055 r600_src->sel = 1; 1056 } 1057 } else { 1058 if (tgsi_src->Register.Indirect) 1059 r600_src->rel = V_SQ_REL_RELATIVE; 1060 r600_src->sel = tgsi_src->Register.Index; 1061 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 1062 } 1063 if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { 1064 if (tgsi_src->Register.Dimension) { 1065 r600_src->kc_bank = tgsi_src->Dimension.Index; 1066 if (tgsi_src->Dimension.Indirect) { 1067 r600_src->kc_rel = 1; 1068 } 1069 } 1070 } 1071} 1072 1073static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 1074 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 1075 unsigned int dst_reg) 1076{ 1077 struct r600_bytecode_vtx vtx; 1078 unsigned int ar_reg; 1079 int r; 1080 1081 if (offset) { 1082 struct r600_bytecode_alu alu; 1083 1084 memset(&alu, 0, sizeof(alu)); 1085 1086 alu.op = ALU_OP2_ADD_INT; 1087 alu.src[0].sel = ctx->bc->ar_reg; 1088 alu.src[0].chan = ar_chan; 1089 1090 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1091 alu.src[1].value = offset; 1092 1093 alu.dst.sel = dst_reg; 1094 alu.dst.chan = ar_chan; 1095 alu.dst.write = 1; 1096 alu.last = 1; 1097 1098 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1099 return r; 1100 1101 ar_reg = dst_reg; 1102 } else { 1103 ar_reg = ctx->bc->ar_reg; 1104 } 1105 1106 memset(&vtx, 0, sizeof(vtx)); 1107 vtx.buffer_id = cb_idx; 1108 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1109 vtx.src_gpr = ar_reg; 1110 vtx.src_sel_x = ar_chan; 1111 vtx.mega_fetch_count = 16; 1112 vtx.dst_gpr = dst_reg; 1113 vtx.dst_sel_x = 0; /* SEL_X */ 1114 vtx.dst_sel_y = 1; /* SEL_Y */ 1115 vtx.dst_sel_z = 2; /* SEL_Z */ 1116 vtx.dst_sel_w = 3; /* SEL_W */ 1117 vtx.data_format = FMT_32_32_32_32_FLOAT; 1118 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 1119 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 1120 vtx.endian = r600_endian_swap(32); 1121 vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE; 1122 1123 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1124 return r; 1125 1126 return 0; 1127} 1128 1129static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1130{ 1131 struct r600_bytecode_vtx vtx; 1132 int r; 1133 unsigned index = src->Register.Index; 1134 unsigned vtx_id = src->Dimension.Index; 1135 int offset_reg = vtx_id / 3; 1136 int offset_chan = vtx_id % 3; 1137 1138 /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, 1139 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ 1140 1141 if (offset_reg == 0 && offset_chan == 2) 1142 offset_chan = 3; 1143 1144 if (src->Dimension.Indirect) { 1145 int treg[3]; 1146 int t2; 1147 struct r600_bytecode_alu alu; 1148 int r, i; 1149 1150 /* you have got to be shitting me - 1151 we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt. 1152 at least this is what fglrx seems to do. */ 1153 for (i = 0; i < 3; i++) { 1154 treg[i] = r600_get_temp(ctx); 1155 } 1156 r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F); 1157 1158 t2 = r600_get_temp(ctx); 1159 for (i = 0; i < 3; i++) { 1160 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1161 alu.op = ALU_OP1_MOV; 1162 alu.src[0].sel = 0; 1163 alu.src[0].chan = i == 2 ? 3 : i; 1164 alu.dst.sel = treg[i]; 1165 alu.dst.chan = 0; 1166 alu.dst.write = 1; 1167 alu.last = 1; 1168 r = r600_bytecode_add_alu(ctx->bc, &alu); 1169 if (r) 1170 return r; 1171 } 1172 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1173 alu.op = ALU_OP1_MOV; 1174 alu.src[0].sel = treg[0]; 1175 alu.src[0].rel = 1; 1176 alu.dst.sel = t2; 1177 alu.dst.write = 1; 1178 alu.last = 1; 1179 r = r600_bytecode_add_alu(ctx->bc, &alu); 1180 if (r) 1181 return r; 1182 offset_reg = t2; 1183 } 1184 1185 1186 memset(&vtx, 0, sizeof(vtx)); 1187 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1188 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1189 vtx.src_gpr = offset_reg; 1190 vtx.src_sel_x = offset_chan; 1191 vtx.offset = index * 16; /*bytes*/ 1192 vtx.mega_fetch_count = 16; 1193 vtx.dst_gpr = dst_reg; 1194 vtx.dst_sel_x = 0; /* SEL_X */ 1195 vtx.dst_sel_y = 1; /* SEL_Y */ 1196 vtx.dst_sel_z = 2; /* SEL_Z */ 1197 vtx.dst_sel_w = 3; /* SEL_W */ 1198 if (ctx->bc->chip_class >= EVERGREEN) { 1199 vtx.use_const_fields = 1; 1200 } else { 1201 vtx.data_format = FMT_32_32_32_32_FLOAT; 1202 } 1203 1204 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1205 return r; 1206 1207 return 0; 1208} 1209 1210static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) 1211{ 1212 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1213 int i; 1214 1215 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1216 struct tgsi_full_src_register *src = &inst->Src[i]; 1217 1218 if (src->Register.File == TGSI_FILE_INPUT) { 1219 if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) { 1220 /* primitive id is in R0.z */ 1221 ctx->src[i].sel = 0; 1222 ctx->src[i].swizzle[0] = 2; 1223 } 1224 } 1225 if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { 1226 int treg = r600_get_temp(ctx); 1227 1228 fetch_gs_input(ctx, src, treg); 1229 ctx->src[i].sel = treg; 1230 } 1231 } 1232 return 0; 1233} 1234 1235static int tgsi_split_constant(struct r600_shader_ctx *ctx) 1236{ 1237 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1238 struct r600_bytecode_alu alu; 1239 int i, j, k, nconst, r; 1240 1241 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 1242 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 1243 nconst++; 1244 } 1245 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 1246 } 1247 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 1248 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 1249 continue; 1250 } 1251 1252 if (ctx->src[i].rel) { 1253 int chan = inst->Src[i].Indirect.Swizzle; 1254 int treg = r600_get_temp(ctx); 1255 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg))) 1256 return r; 1257 1258 ctx->src[i].kc_bank = 0; 1259 ctx->src[i].kc_rel = 0; 1260 ctx->src[i].sel = treg; 1261 ctx->src[i].rel = 0; 1262 j--; 1263 } else if (j > 0) { 1264 int treg = r600_get_temp(ctx); 1265 for (k = 0; k < 4; k++) { 1266 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1267 alu.op = ALU_OP1_MOV; 1268 alu.src[0].sel = ctx->src[i].sel; 1269 alu.src[0].chan = k; 1270 alu.src[0].rel = ctx->src[i].rel; 1271 alu.src[0].kc_bank = ctx->src[i].kc_bank; 1272 alu.src[0].kc_rel = ctx->src[i].kc_rel; 1273 alu.dst.sel = treg; 1274 alu.dst.chan = k; 1275 alu.dst.write = 1; 1276 if (k == 3) 1277 alu.last = 1; 1278 r = r600_bytecode_add_alu(ctx->bc, &alu); 1279 if (r) 1280 return r; 1281 } 1282 ctx->src[i].sel = treg; 1283 ctx->src[i].rel =0; 1284 j--; 1285 } 1286 } 1287 return 0; 1288} 1289 1290/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 1291static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 1292{ 1293 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1294 struct r600_bytecode_alu alu; 1295 int i, j, k, nliteral, r; 1296 1297 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 1298 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1299 nliteral++; 1300 } 1301 } 1302 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 1303 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1304 int treg = r600_get_temp(ctx); 1305 for (k = 0; k < 4; k++) { 1306 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1307 alu.op = ALU_OP1_MOV; 1308 alu.src[0].sel = ctx->src[i].sel; 1309 alu.src[0].chan = k; 1310 alu.src[0].value = ctx->src[i].value[k]; 1311 alu.dst.sel = treg; 1312 alu.dst.chan = k; 1313 alu.dst.write = 1; 1314 if (k == 3) 1315 alu.last = 1; 1316 r = r600_bytecode_add_alu(ctx->bc, &alu); 1317 if (r) 1318 return r; 1319 } 1320 ctx->src[i].sel = treg; 1321 j--; 1322 } 1323 } 1324 return 0; 1325} 1326 1327static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 1328{ 1329 int i, r, count = ctx->shader->ninput; 1330 1331 for (i = 0; i < count; i++) { 1332 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1333 r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); 1334 if (r) 1335 return r; 1336 } 1337 } 1338 return 0; 1339} 1340 1341static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so, 1342 int stream, unsigned *stream_item_size) 1343{ 1344 unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; 1345 unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; 1346 int i, j, r; 1347 1348 /* Sanity checking. */ 1349 if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) { 1350 R600_ERR("Too many stream outputs: %d\n", so->num_outputs); 1351 r = -EINVAL; 1352 goto out_err; 1353 } 1354 for (i = 0; i < so->num_outputs; i++) { 1355 if (so->output[i].output_buffer >= 4) { 1356 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", 1357 so->output[i].output_buffer); 1358 r = -EINVAL; 1359 goto out_err; 1360 } 1361 } 1362 1363 /* Initialize locations where the outputs are stored. */ 1364 for (i = 0; i < so->num_outputs; i++) { 1365 1366 so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr; 1367 start_comp[i] = so->output[i].start_component; 1368 /* Lower outputs with dst_offset < start_component. 1369 * 1370 * We can only output 4D vectors with a write mask, e.g. we can 1371 * only output the W component at offset 3, etc. If we want 1372 * to store Y, Z, or W at buffer offset 0, we need to use MOV 1373 * to move it to X and output X. */ 1374 if (so->output[i].dst_offset < so->output[i].start_component) { 1375 unsigned tmp = r600_get_temp(ctx); 1376 1377 for (j = 0; j < so->output[i].num_components; j++) { 1378 struct r600_bytecode_alu alu; 1379 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1380 alu.op = ALU_OP1_MOV; 1381 alu.src[0].sel = so_gpr[i]; 1382 alu.src[0].chan = so->output[i].start_component + j; 1383 1384 alu.dst.sel = tmp; 1385 alu.dst.chan = j; 1386 alu.dst.write = 1; 1387 if (j == so->output[i].num_components - 1) 1388 alu.last = 1; 1389 r = r600_bytecode_add_alu(ctx->bc, &alu); 1390 if (r) 1391 return r; 1392 } 1393 start_comp[i] = 0; 1394 so_gpr[i] = tmp; 1395 } 1396 } 1397 1398 /* Write outputs to buffers. */ 1399 for (i = 0; i < so->num_outputs; i++) { 1400 struct r600_bytecode_output output; 1401 1402 if (stream != -1 && stream != so->output[i].output_buffer) 1403 continue; 1404 1405 memset(&output, 0, sizeof(struct r600_bytecode_output)); 1406 output.gpr = so_gpr[i]; 1407 output.elem_size = so->output[i].num_components - 1; 1408 if (output.elem_size == 2) 1409 output.elem_size = 3; // 3 not supported, write 4 with junk at end 1410 output.array_base = so->output[i].dst_offset - start_comp[i]; 1411 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 1412 output.burst_count = 1; 1413 /* array_size is an upper limit for the burst_count 1414 * with MEM_STREAM instructions */ 1415 output.array_size = 0xFFF; 1416 output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i]; 1417 1418 if (ctx->bc->chip_class >= EVERGREEN) { 1419 switch (so->output[i].output_buffer) { 1420 case 0: 1421 output.op = CF_OP_MEM_STREAM0_BUF0; 1422 break; 1423 case 1: 1424 output.op = CF_OP_MEM_STREAM0_BUF1; 1425 break; 1426 case 2: 1427 output.op = CF_OP_MEM_STREAM0_BUF2; 1428 break; 1429 case 3: 1430 output.op = CF_OP_MEM_STREAM0_BUF3; 1431 break; 1432 } 1433 output.op += so->output[i].stream * 4; 1434 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); 1435 ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4; 1436 } else { 1437 switch (so->output[i].output_buffer) { 1438 case 0: 1439 output.op = CF_OP_MEM_STREAM0; 1440 break; 1441 case 1: 1442 output.op = CF_OP_MEM_STREAM1; 1443 break; 1444 case 2: 1445 output.op = CF_OP_MEM_STREAM2; 1446 break; 1447 case 3: 1448 output.op = CF_OP_MEM_STREAM3; 1449 break; 1450 } 1451 ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer; 1452 } 1453 r = r600_bytecode_add_output(ctx->bc, &output); 1454 if (r) 1455 goto out_err; 1456 } 1457 return 0; 1458out_err: 1459 return r; 1460} 1461 1462static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) 1463{ 1464 struct r600_bytecode_alu alu; 1465 unsigned reg; 1466 1467 if (!ctx->shader->vs_out_edgeflag) 1468 return; 1469 1470 reg = ctx->shader->output[ctx->edgeflag_output].gpr; 1471 1472 /* clamp(x, 0, 1) */ 1473 memset(&alu, 0, sizeof(alu)); 1474 alu.op = ALU_OP1_MOV; 1475 alu.src[0].sel = reg; 1476 alu.dst.sel = reg; 1477 alu.dst.write = 1; 1478 alu.dst.clamp = 1; 1479 alu.last = 1; 1480 r600_bytecode_add_alu(ctx->bc, &alu); 1481 1482 memset(&alu, 0, sizeof(alu)); 1483 alu.op = ALU_OP1_FLT_TO_INT; 1484 alu.src[0].sel = reg; 1485 alu.dst.sel = reg; 1486 alu.dst.write = 1; 1487 alu.last = 1; 1488 r600_bytecode_add_alu(ctx->bc, &alu); 1489} 1490 1491static int generate_gs_copy_shader(struct r600_context *rctx, 1492 struct r600_pipe_shader *gs, 1493 struct pipe_stream_output_info *so) 1494{ 1495 struct r600_shader_ctx ctx = {}; 1496 struct r600_shader *gs_shader = &gs->shader; 1497 struct r600_pipe_shader *cshader; 1498 int ocnt = gs_shader->noutput; 1499 struct r600_bytecode_alu alu; 1500 struct r600_bytecode_vtx vtx; 1501 struct r600_bytecode_output output; 1502 struct r600_bytecode_cf *cf_jump, *cf_pop, 1503 *last_exp_pos = NULL, *last_exp_param = NULL; 1504 int i, j, next_clip_pos = 61, next_param = 0; 1505 int ring; 1506 1507 cshader = calloc(1, sizeof(struct r600_pipe_shader)); 1508 if (!cshader) 1509 return 0; 1510 1511 memcpy(cshader->shader.output, gs_shader->output, ocnt * 1512 sizeof(struct r600_shader_io)); 1513 1514 cshader->shader.noutput = ocnt; 1515 1516 ctx.shader = &cshader->shader; 1517 ctx.bc = &ctx.shader->bc; 1518 ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX; 1519 1520 r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, 1521 rctx->screen->has_compressed_msaa_texturing); 1522 1523 ctx.bc->isa = rctx->isa; 1524 1525 cf_jump = NULL; 1526 memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes)); 1527 1528 /* R0.x = R0.x & 0x3fffffff */ 1529 memset(&alu, 0, sizeof(alu)); 1530 alu.op = ALU_OP2_AND_INT; 1531 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1532 alu.src[1].value = 0x3fffffff; 1533 alu.dst.write = 1; 1534 r600_bytecode_add_alu(ctx.bc, &alu); 1535 1536 /* R0.y = R0.x >> 30 */ 1537 memset(&alu, 0, sizeof(alu)); 1538 alu.op = ALU_OP2_LSHR_INT; 1539 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1540 alu.src[1].value = 0x1e; 1541 alu.dst.chan = 1; 1542 alu.dst.write = 1; 1543 alu.last = 1; 1544 r600_bytecode_add_alu(ctx.bc, &alu); 1545 1546 /* fetch vertex data from GSVS ring */ 1547 for (i = 0; i < ocnt; ++i) { 1548 struct r600_shader_io *out = &ctx.shader->output[i]; 1549 1550 out->gpr = i + 1; 1551 out->ring_offset = i * 16; 1552 1553 memset(&vtx, 0, sizeof(vtx)); 1554 vtx.op = FETCH_OP_VFETCH; 1555 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1556 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1557 vtx.mega_fetch_count = 16; 1558 vtx.offset = out->ring_offset; 1559 vtx.dst_gpr = out->gpr; 1560 vtx.src_gpr = 0; 1561 vtx.dst_sel_x = 0; 1562 vtx.dst_sel_y = 1; 1563 vtx.dst_sel_z = 2; 1564 vtx.dst_sel_w = 3; 1565 if (rctx->b.chip_class >= EVERGREEN) { 1566 vtx.use_const_fields = 1; 1567 } else { 1568 vtx.data_format = FMT_32_32_32_32_FLOAT; 1569 } 1570 1571 r600_bytecode_add_vtx(ctx.bc, &vtx); 1572 } 1573 ctx.temp_reg = i + 1; 1574 for (ring = 3; ring >= 0; --ring) { 1575 bool enabled = false; 1576 for (i = 0; i < so->num_outputs; i++) { 1577 if (so->output[i].stream == ring) { 1578 enabled = true; 1579 break; 1580 } 1581 } 1582 if (ring != 0 && !enabled) { 1583 cshader->shader.ring_item_sizes[ring] = 0; 1584 continue; 1585 } 1586 1587 if (cf_jump) { 1588 // Patch up jump label 1589 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 1590 cf_pop = ctx.bc->cf_last; 1591 1592 cf_jump->cf_addr = cf_pop->id + 2; 1593 cf_jump->pop_count = 1; 1594 cf_pop->cf_addr = cf_pop->id + 2; 1595 cf_pop->pop_count = 1; 1596 } 1597 1598 /* PRED_SETE_INT __, R0.y, ring */ 1599 memset(&alu, 0, sizeof(alu)); 1600 alu.op = ALU_OP2_PRED_SETE_INT; 1601 alu.src[0].chan = 1; 1602 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1603 alu.src[1].value = ring; 1604 alu.execute_mask = 1; 1605 alu.update_pred = 1; 1606 alu.last = 1; 1607 r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); 1608 1609 r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); 1610 cf_jump = ctx.bc->cf_last; 1611 1612 if (enabled) 1613 emit_streamout(&ctx, so, ring, &cshader->shader.ring_item_sizes[ring]); 1614 cshader->shader.ring_item_sizes[ring] = ocnt * 16; 1615 } 1616 1617 /* bc adds nops - copy it */ 1618 if (ctx.bc->chip_class == R600) { 1619 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1620 alu.op = ALU_OP0_NOP; 1621 alu.last = 1; 1622 r600_bytecode_add_alu(ctx.bc, &alu); 1623 1624 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 1625 } 1626 1627 /* export vertex data */ 1628 /* XXX factor out common code with r600_shader_from_tgsi ? */ 1629 for (i = 0; i < ocnt; ++i) { 1630 struct r600_shader_io *out = &ctx.shader->output[i]; 1631 bool instream0 = true; 1632 if (out->name == TGSI_SEMANTIC_CLIPVERTEX) 1633 continue; 1634 1635 for (j = 0; j < so->num_outputs; j++) { 1636 if (so->output[j].register_index == i) { 1637 if (so->output[j].stream == 0) 1638 break; 1639 if (so->output[j].stream > 0) 1640 instream0 = false; 1641 } 1642 } 1643 if (!instream0) 1644 continue; 1645 memset(&output, 0, sizeof(output)); 1646 output.gpr = out->gpr; 1647 output.elem_size = 3; 1648 output.swizzle_x = 0; 1649 output.swizzle_y = 1; 1650 output.swizzle_z = 2; 1651 output.swizzle_w = 3; 1652 output.burst_count = 1; 1653 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1654 output.op = CF_OP_EXPORT; 1655 switch (out->name) { 1656 case TGSI_SEMANTIC_POSITION: 1657 output.array_base = 60; 1658 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1659 break; 1660 1661 case TGSI_SEMANTIC_PSIZE: 1662 output.array_base = 61; 1663 if (next_clip_pos == 61) 1664 next_clip_pos = 62; 1665 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1666 output.swizzle_y = 7; 1667 output.swizzle_z = 7; 1668 output.swizzle_w = 7; 1669 ctx.shader->vs_out_misc_write = 1; 1670 ctx.shader->vs_out_point_size = 1; 1671 break; 1672 case TGSI_SEMANTIC_LAYER: 1673 if (out->spi_sid) { 1674 /* duplicate it as PARAM to pass to the pixel shader */ 1675 output.array_base = next_param++; 1676 r600_bytecode_add_output(ctx.bc, &output); 1677 last_exp_param = ctx.bc->cf_last; 1678 } 1679 output.array_base = 61; 1680 if (next_clip_pos == 61) 1681 next_clip_pos = 62; 1682 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1683 output.swizzle_x = 7; 1684 output.swizzle_y = 7; 1685 output.swizzle_z = 0; 1686 output.swizzle_w = 7; 1687 ctx.shader->vs_out_misc_write = 1; 1688 ctx.shader->vs_out_layer = 1; 1689 break; 1690 case TGSI_SEMANTIC_VIEWPORT_INDEX: 1691 if (out->spi_sid) { 1692 /* duplicate it as PARAM to pass to the pixel shader */ 1693 output.array_base = next_param++; 1694 r600_bytecode_add_output(ctx.bc, &output); 1695 last_exp_param = ctx.bc->cf_last; 1696 } 1697 output.array_base = 61; 1698 if (next_clip_pos == 61) 1699 next_clip_pos = 62; 1700 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1701 ctx.shader->vs_out_misc_write = 1; 1702 ctx.shader->vs_out_viewport = 1; 1703 output.swizzle_x = 7; 1704 output.swizzle_y = 7; 1705 output.swizzle_z = 7; 1706 output.swizzle_w = 0; 1707 break; 1708 case TGSI_SEMANTIC_CLIPDIST: 1709 /* spi_sid is 0 for clipdistance outputs that were generated 1710 * for clipvertex - we don't need to pass them to PS */ 1711 ctx.shader->clip_dist_write = gs->shader.clip_dist_write; 1712 if (out->spi_sid) { 1713 /* duplicate it as PARAM to pass to the pixel shader */ 1714 output.array_base = next_param++; 1715 r600_bytecode_add_output(ctx.bc, &output); 1716 last_exp_param = ctx.bc->cf_last; 1717 } 1718 output.array_base = next_clip_pos++; 1719 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1720 break; 1721 case TGSI_SEMANTIC_FOG: 1722 output.swizzle_y = 4; /* 0 */ 1723 output.swizzle_z = 4; /* 0 */ 1724 output.swizzle_w = 5; /* 1 */ 1725 break; 1726 default: 1727 output.array_base = next_param++; 1728 break; 1729 } 1730 r600_bytecode_add_output(ctx.bc, &output); 1731 if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) 1732 last_exp_param = ctx.bc->cf_last; 1733 else 1734 last_exp_pos = ctx.bc->cf_last; 1735 } 1736 1737 if (!last_exp_pos) { 1738 memset(&output, 0, sizeof(output)); 1739 output.gpr = 0; 1740 output.elem_size = 3; 1741 output.swizzle_x = 7; 1742 output.swizzle_y = 7; 1743 output.swizzle_z = 7; 1744 output.swizzle_w = 7; 1745 output.burst_count = 1; 1746 output.type = 2; 1747 output.op = CF_OP_EXPORT; 1748 output.array_base = 60; 1749 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1750 r600_bytecode_add_output(ctx.bc, &output); 1751 last_exp_pos = ctx.bc->cf_last; 1752 } 1753 1754 if (!last_exp_param) { 1755 memset(&output, 0, sizeof(output)); 1756 output.gpr = 0; 1757 output.elem_size = 3; 1758 output.swizzle_x = 7; 1759 output.swizzle_y = 7; 1760 output.swizzle_z = 7; 1761 output.swizzle_w = 7; 1762 output.burst_count = 1; 1763 output.type = 2; 1764 output.op = CF_OP_EXPORT; 1765 output.array_base = next_param++; 1766 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1767 r600_bytecode_add_output(ctx.bc, &output); 1768 last_exp_param = ctx.bc->cf_last; 1769 } 1770 1771 last_exp_pos->op = CF_OP_EXPORT_DONE; 1772 last_exp_param->op = CF_OP_EXPORT_DONE; 1773 1774 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 1775 cf_pop = ctx.bc->cf_last; 1776 1777 cf_jump->cf_addr = cf_pop->id + 2; 1778 cf_jump->pop_count = 1; 1779 cf_pop->cf_addr = cf_pop->id + 2; 1780 cf_pop->pop_count = 1; 1781 1782 if (ctx.bc->chip_class == CAYMAN) 1783 cm_bytecode_add_cf_end(ctx.bc); 1784 else { 1785 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 1786 ctx.bc->cf_last->end_of_program = 1; 1787 } 1788 1789 gs->gs_copy_shader = cshader; 1790 cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 1791 1792 ctx.bc->nstack = 1; 1793 1794 return r600_bytecode_build(ctx.bc); 1795} 1796 1797static int emit_inc_ring_offset(struct r600_shader_ctx *ctx, int idx, bool ind) 1798{ 1799 if (ind) { 1800 struct r600_bytecode_alu alu; 1801 int r; 1802 1803 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1804 alu.op = ALU_OP2_ADD_INT; 1805 alu.src[0].sel = ctx->gs_export_gpr_tregs[idx]; 1806 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1807 alu.src[1].value = ctx->gs_out_ring_offset >> 4; 1808 alu.dst.sel = ctx->gs_export_gpr_tregs[idx]; 1809 alu.dst.write = 1; 1810 alu.last = 1; 1811 r = r600_bytecode_add_alu(ctx->bc, &alu); 1812 if (r) 1813 return r; 1814 } 1815 return 0; 1816} 1817 1818static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind) 1819{ 1820 struct r600_bytecode_output output; 1821 int i, k, ring_offset; 1822 int effective_stream = stream == -1 ? 0 : stream; 1823 int idx = 0; 1824 1825 for (i = 0; i < ctx->shader->noutput; i++) { 1826 if (ctx->gs_for_vs) { 1827 /* for ES we need to lookup corresponding ring offset expected by GS 1828 * (map this output to GS input by name and sid) */ 1829 /* FIXME precompute offsets */ 1830 ring_offset = -1; 1831 for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { 1832 struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; 1833 struct r600_shader_io *out = &ctx->shader->output[i]; 1834 if (in->name == out->name && in->sid == out->sid) 1835 ring_offset = in->ring_offset; 1836 } 1837 1838 if (ring_offset == -1) 1839 continue; 1840 } else { 1841 ring_offset = idx * 16; 1842 idx++; 1843 } 1844 1845 if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) 1846 continue; 1847 /* next_ring_offset after parsing input decls contains total size of 1848 * single vertex data, gs_next_vertex - current vertex index */ 1849 if (!ind) 1850 ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex; 1851 1852 memset(&output, 0, sizeof(struct r600_bytecode_output)); 1853 output.gpr = ctx->shader->output[i].gpr; 1854 output.elem_size = 3; 1855 output.comp_mask = 0xF; 1856 output.burst_count = 1; 1857 1858 if (ind) 1859 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 1860 else 1861 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 1862 1863 switch (stream) { 1864 default: 1865 case 0: 1866 output.op = CF_OP_MEM_RING; break; 1867 case 1: 1868 output.op = CF_OP_MEM_RING1; break; 1869 case 2: 1870 output.op = CF_OP_MEM_RING2; break; 1871 case 3: 1872 output.op = CF_OP_MEM_RING3; break; 1873 } 1874 1875 if (ind) { 1876 output.array_base = ring_offset >> 2; /* in dwords */ 1877 output.array_size = 0xfff; 1878 output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream]; 1879 } else 1880 output.array_base = ring_offset >> 2; /* in dwords */ 1881 r600_bytecode_add_output(ctx->bc, &output); 1882 } 1883 1884 ++ctx->gs_next_vertex; 1885 return 0; 1886} 1887 1888static int r600_shader_from_tgsi(struct r600_context *rctx, 1889 struct r600_pipe_shader *pipeshader, 1890 union r600_shader_key key) 1891{ 1892 struct r600_screen *rscreen = rctx->screen; 1893 struct r600_shader *shader = &pipeshader->shader; 1894 struct tgsi_token *tokens = pipeshader->selector->tokens; 1895 struct pipe_stream_output_info so = pipeshader->selector->so; 1896 struct tgsi_full_immediate *immediate; 1897 struct r600_shader_ctx ctx; 1898 struct r600_bytecode_output output[32]; 1899 unsigned output_done, noutput; 1900 unsigned opcode; 1901 int i, j, k, r = 0; 1902 int next_param_base = 0, next_clip_base; 1903 int max_color_exports = MAX2(key.ps.nr_cbufs, 1); 1904 /* Declarations used by llvm code */ 1905 bool use_llvm = false; 1906 bool indirect_gprs; 1907 bool ring_outputs = false; 1908 bool pos_emitted = false; 1909 1910#ifdef R600_USE_LLVM 1911 use_llvm = rscreen->b.debug_flags & DBG_LLVM; 1912#endif 1913 ctx.bc = &shader->bc; 1914 ctx.shader = shader; 1915 ctx.native_integers = true; 1916 1917 1918 r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, 1919 rscreen->has_compressed_msaa_texturing); 1920 ctx.tokens = tokens; 1921 tgsi_scan_shader(tokens, &ctx.info); 1922 shader->indirect_files = ctx.info.indirect_files; 1923 1924 shader->uses_doubles = ctx.info.uses_doubles; 1925 1926 indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER)); 1927 tgsi_parse_init(&ctx.parse, tokens); 1928 ctx.type = ctx.info.processor; 1929 shader->processor_type = ctx.type; 1930 ctx.bc->type = shader->processor_type; 1931 1932 switch (ctx.type) { 1933 case TGSI_PROCESSOR_VERTEX: 1934 shader->vs_as_gs_a = key.vs.as_gs_a; 1935 shader->vs_as_es = key.vs.as_es; 1936 if (shader->vs_as_es) 1937 ring_outputs = true; 1938 break; 1939 case TGSI_PROCESSOR_GEOMETRY: 1940 ring_outputs = true; 1941 break; 1942 case TGSI_PROCESSOR_FRAGMENT: 1943 shader->two_side = key.ps.color_two_side; 1944 break; 1945 default: 1946 break; 1947 } 1948 1949 if (shader->vs_as_es) { 1950 ctx.gs_for_vs = &rctx->gs_shader->current->shader; 1951 } else { 1952 ctx.gs_for_vs = NULL; 1953 } 1954 1955 ctx.next_ring_offset = 0; 1956 ctx.gs_out_ring_offset = 0; 1957 ctx.gs_next_vertex = 0; 1958 ctx.gs_stream_output_info = &so; 1959 1960 ctx.face_gpr = -1; 1961 ctx.fixed_pt_position_gpr = -1; 1962 ctx.fragcoord_input = -1; 1963 ctx.colors_used = 0; 1964 ctx.clip_vertex_write = 0; 1965 1966 shader->nr_ps_color_exports = 0; 1967 shader->nr_ps_max_color_exports = 0; 1968 1969 1970 /* register allocations */ 1971 /* Values [0,127] correspond to GPR[0..127]. 1972 * Values [128,159] correspond to constant buffer bank 0 1973 * Values [160,191] correspond to constant buffer bank 1 1974 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 1975 * Values [256,287] correspond to constant buffer bank 2 (EG) 1976 * Values [288,319] correspond to constant buffer bank 3 (EG) 1977 * Other special values are shown in the list below. 1978 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 1979 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 1980 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 1981 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 1982 * 248 SQ_ALU_SRC_0: special constant 0.0. 1983 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 1984 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 1985 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 1986 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 1987 * 253 SQ_ALU_SRC_LITERAL: literal constant. 1988 * 254 SQ_ALU_SRC_PV: previous vector result. 1989 * 255 SQ_ALU_SRC_PS: previous scalar result. 1990 */ 1991 for (i = 0; i < TGSI_FILE_COUNT; i++) { 1992 ctx.file_offset[i] = 0; 1993 } 1994 1995#ifdef R600_USE_LLVM 1996 if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) { 1997 fprintf(stderr, "Warning: R600 LLVM backend does not support " 1998 "indirect adressing. Falling back to TGSI " 1999 "backend.\n"); 2000 use_llvm = 0; 2001 } 2002#endif 2003 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 2004 ctx.file_offset[TGSI_FILE_INPUT] = 1; 2005 if (!use_llvm) { 2006 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); 2007 } 2008 } 2009 if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { 2010 if (ctx.bc->chip_class >= EVERGREEN) 2011 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 2012 else 2013 ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); 2014 } 2015 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2016 /* FIXME 1 would be enough in some cases (3 or less input vertices) */ 2017 ctx.file_offset[TGSI_FILE_INPUT] = 2; 2018 } 2019 ctx.use_llvm = use_llvm; 2020 2021 if (use_llvm) { 2022 ctx.file_offset[TGSI_FILE_OUTPUT] = 2023 ctx.file_offset[TGSI_FILE_INPUT]; 2024 } else { 2025 ctx.file_offset[TGSI_FILE_OUTPUT] = 2026 ctx.file_offset[TGSI_FILE_INPUT] + 2027 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 2028 } 2029 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 2030 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 2031 2032 /* Outside the GPR range. This will be translated to one of the 2033 * kcache banks later. */ 2034 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 2035 2036 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 2037 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 2038 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 2039 ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1; 2040 ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2; 2041 2042 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2043 ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3; 2044 ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4; 2045 ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5; 2046 ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6; 2047 ctx.temp_reg = ctx.bc->ar_reg + 7; 2048 } else { 2049 ctx.temp_reg = ctx.bc->ar_reg + 3; 2050 } 2051 2052 shader->max_arrays = 0; 2053 shader->num_arrays = 0; 2054 if (indirect_gprs) { 2055 2056 if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { 2057 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], 2058 ctx.file_offset[TGSI_FILE_OUTPUT] - 2059 ctx.file_offset[TGSI_FILE_INPUT], 2060 0x0F); 2061 } 2062 if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2063 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT], 2064 ctx.file_offset[TGSI_FILE_TEMPORARY] - 2065 ctx.file_offset[TGSI_FILE_OUTPUT], 2066 0x0F); 2067 } 2068 } 2069 2070 ctx.nliterals = 0; 2071 ctx.literals = NULL; 2072 2073 shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]; 2074 shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 2075 shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]; 2076 2077 if (shader->vs_as_gs_a) 2078 vs_add_primid_output(&ctx, key.vs.prim_id_out); 2079 2080 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 2081 tgsi_parse_token(&ctx.parse); 2082 switch (ctx.parse.FullToken.Token.Type) { 2083 case TGSI_TOKEN_TYPE_IMMEDIATE: 2084 immediate = &ctx.parse.FullToken.FullImmediate; 2085 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 2086 if(ctx.literals == NULL) { 2087 r = -ENOMEM; 2088 goto out_err; 2089 } 2090 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 2091 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 2092 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 2093 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 2094 ctx.nliterals++; 2095 break; 2096 case TGSI_TOKEN_TYPE_DECLARATION: 2097 r = tgsi_declaration(&ctx); 2098 if (r) 2099 goto out_err; 2100 break; 2101 case TGSI_TOKEN_TYPE_INSTRUCTION: 2102 case TGSI_TOKEN_TYPE_PROPERTY: 2103 break; 2104 default: 2105 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 2106 r = -EINVAL; 2107 goto out_err; 2108 } 2109 } 2110 2111 shader->ring_item_sizes[0] = ctx.next_ring_offset; 2112 shader->ring_item_sizes[1] = 0; 2113 shader->ring_item_sizes[2] = 0; 2114 shader->ring_item_sizes[3] = 0; 2115 2116 /* Process two side if needed */ 2117 if (shader->two_side && ctx.colors_used) { 2118 int i, count = ctx.shader->ninput; 2119 unsigned next_lds_loc = ctx.shader->nlds; 2120 2121 /* additional inputs will be allocated right after the existing inputs, 2122 * we won't need them after the color selection, so we don't need to 2123 * reserve these gprs for the rest of the shader code and to adjust 2124 * output offsets etc. */ 2125 int gpr = ctx.file_offset[TGSI_FILE_INPUT] + 2126 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 2127 2128 /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */ 2129 if (ctx.face_gpr == -1) { 2130 i = ctx.shader->ninput++; 2131 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; 2132 ctx.shader->input[i].spi_sid = 0; 2133 ctx.shader->input[i].gpr = gpr++; 2134 ctx.face_gpr = ctx.shader->input[i].gpr; 2135 } 2136 2137 for (i = 0; i < count; i++) { 2138 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { 2139 int ni = ctx.shader->ninput++; 2140 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); 2141 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 2142 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); 2143 ctx.shader->input[ni].gpr = gpr++; 2144 // TGSI to LLVM needs to know the lds position of inputs. 2145 // Non LLVM path computes it later (in process_twoside_color) 2146 ctx.shader->input[ni].lds_pos = next_lds_loc++; 2147 ctx.shader->input[i].back_color_input = ni; 2148 if (ctx.bc->chip_class >= EVERGREEN) { 2149 if ((r = evergreen_interp_input(&ctx, ni))) 2150 return r; 2151 } 2152 } 2153 } 2154 } 2155 2156/* LLVM backend setup */ 2157#ifdef R600_USE_LLVM 2158 if (use_llvm) { 2159 struct radeon_llvm_context radeon_llvm_ctx; 2160 LLVMModuleRef mod; 2161 bool dump = r600_can_dump_shader(&rscreen->b, tokens); 2162 boolean use_kill = false; 2163 2164 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); 2165 radeon_llvm_ctx.type = ctx.type; 2166 radeon_llvm_ctx.two_side = shader->two_side; 2167 radeon_llvm_ctx.face_gpr = ctx.face_gpr; 2168 radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1; 2169 radeon_llvm_ctx.r600_inputs = ctx.shader->input; 2170 radeon_llvm_ctx.r600_outputs = ctx.shader->output; 2171 radeon_llvm_ctx.color_buffer_count = max_color_exports; 2172 radeon_llvm_ctx.chip_class = ctx.bc->chip_class; 2173 radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN); 2174 radeon_llvm_ctx.stream_outputs = &so; 2175 radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one; 2176 radeon_llvm_ctx.has_compressed_msaa_texturing = 2177 ctx.bc->has_compressed_msaa_texturing; 2178 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); 2179 ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp; 2180 ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers; 2181 2182 if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, dump)) { 2183 radeon_llvm_dispose(&radeon_llvm_ctx); 2184 use_llvm = 0; 2185 fprintf(stderr, "R600 LLVM backend failed to compile " 2186 "shader. Falling back to TGSI\n"); 2187 } else { 2188 ctx.file_offset[TGSI_FILE_OUTPUT] = 2189 ctx.file_offset[TGSI_FILE_INPUT]; 2190 } 2191 if (use_kill) 2192 ctx.shader->uses_kill = use_kill; 2193 radeon_llvm_dispose(&radeon_llvm_ctx); 2194 } 2195#endif 2196/* End of LLVM backend setup */ 2197 2198 if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) 2199 shader->nr_ps_max_color_exports = 8; 2200 2201 if (!use_llvm) { 2202 if (ctx.fragcoord_input >= 0) { 2203 if (ctx.bc->chip_class == CAYMAN) { 2204 for (j = 0 ; j < 4; j++) { 2205 struct r600_bytecode_alu alu; 2206 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2207 alu.op = ALU_OP1_RECIP_IEEE; 2208 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 2209 alu.src[0].chan = 3; 2210 2211 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 2212 alu.dst.chan = j; 2213 alu.dst.write = (j == 3); 2214 alu.last = 1; 2215 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 2216 return r; 2217 } 2218 } else { 2219 struct r600_bytecode_alu alu; 2220 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2221 alu.op = ALU_OP1_RECIP_IEEE; 2222 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 2223 alu.src[0].chan = 3; 2224 2225 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 2226 alu.dst.chan = 3; 2227 alu.dst.write = 1; 2228 alu.last = 1; 2229 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 2230 return r; 2231 } 2232 } 2233 2234 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2235 struct r600_bytecode_alu alu; 2236 int r; 2237 2238 /* GS thread with no output workaround - emit a cut at start of GS */ 2239 if (ctx.bc->chip_class == R600) 2240 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); 2241 2242 for (j = 0; j < 4; j++) { 2243 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2244 alu.op = ALU_OP1_MOV; 2245 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 2246 alu.src[0].value = 0; 2247 alu.dst.sel = ctx.gs_export_gpr_tregs[j]; 2248 alu.dst.write = 1; 2249 alu.last = 1; 2250 r = r600_bytecode_add_alu(ctx.bc, &alu); 2251 if (r) 2252 return r; 2253 } 2254 } 2255 if (shader->two_side && ctx.colors_used) { 2256 if ((r = process_twoside_color_inputs(&ctx))) 2257 return r; 2258 } 2259 2260 tgsi_parse_init(&ctx.parse, tokens); 2261 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 2262 tgsi_parse_token(&ctx.parse); 2263 switch (ctx.parse.FullToken.Token.Type) { 2264 case TGSI_TOKEN_TYPE_INSTRUCTION: 2265 r = tgsi_is_supported(&ctx); 2266 if (r) 2267 goto out_err; 2268 ctx.max_driver_temp_used = 0; 2269 /* reserve first tmp for everyone */ 2270 r600_get_temp(&ctx); 2271 2272 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 2273 if ((r = tgsi_split_constant(&ctx))) 2274 goto out_err; 2275 if ((r = tgsi_split_literal_constant(&ctx))) 2276 goto out_err; 2277 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) 2278 if ((r = tgsi_split_gs_inputs(&ctx))) 2279 goto out_err; 2280 if (ctx.bc->chip_class == CAYMAN) 2281 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 2282 else if (ctx.bc->chip_class >= EVERGREEN) 2283 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 2284 else 2285 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 2286 r = ctx.inst_info->process(&ctx); 2287 if (r) 2288 goto out_err; 2289 break; 2290 default: 2291 break; 2292 } 2293 } 2294 } 2295 2296 /* Reset the temporary register counter. */ 2297 ctx.max_driver_temp_used = 0; 2298 2299 noutput = shader->noutput; 2300 2301 if (!ring_outputs && ctx.clip_vertex_write) { 2302 unsigned clipdist_temp[2]; 2303 2304 clipdist_temp[0] = r600_get_temp(&ctx); 2305 clipdist_temp[1] = r600_get_temp(&ctx); 2306 2307 /* need to convert a clipvertex write into clipdistance writes and not export 2308 the clip vertex anymore */ 2309 2310 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 2311 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 2312 shader->output[noutput].gpr = clipdist_temp[0]; 2313 noutput++; 2314 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 2315 shader->output[noutput].gpr = clipdist_temp[1]; 2316 noutput++; 2317 2318 /* reset spi_sid for clipvertex output to avoid confusing spi */ 2319 shader->output[ctx.cv_output].spi_sid = 0; 2320 2321 shader->clip_dist_write = 0xFF; 2322 2323 for (i = 0; i < 8; i++) { 2324 int oreg = i >> 2; 2325 int ochan = i & 3; 2326 2327 for (j = 0; j < 4; j++) { 2328 struct r600_bytecode_alu alu; 2329 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2330 alu.op = ALU_OP2_DOT4; 2331 alu.src[0].sel = shader->output[ctx.cv_output].gpr; 2332 alu.src[0].chan = j; 2333 2334 alu.src[1].sel = 512 + i; 2335 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 2336 alu.src[1].chan = j; 2337 2338 alu.dst.sel = clipdist_temp[oreg]; 2339 alu.dst.chan = j; 2340 alu.dst.write = (j == ochan); 2341 if (j == 3) 2342 alu.last = 1; 2343 if (!use_llvm) 2344 r = r600_bytecode_add_alu(ctx.bc, &alu); 2345 if (r) 2346 return r; 2347 } 2348 } 2349 } 2350 2351 /* Add stream outputs. */ 2352 if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX && 2353 so.num_outputs && !use_llvm) 2354 emit_streamout(&ctx, &so, -1, NULL); 2355 2356 pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 2357 convert_edgeflag_to_int(&ctx); 2358 2359 if (ring_outputs) { 2360 if (shader->vs_as_es) { 2361 ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx); 2362 ctx.gs_export_gpr_tregs[1] = -1; 2363 ctx.gs_export_gpr_tregs[2] = -1; 2364 ctx.gs_export_gpr_tregs[3] = -1; 2365 2366 emit_gs_ring_writes(&ctx, &so, -1, FALSE); 2367 } 2368 } else { 2369 /* Export output */ 2370 next_clip_base = shader->vs_out_misc_write ? 62 : 61; 2371 2372 for (i = 0, j = 0; i < noutput; i++, j++) { 2373 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2374 output[j].gpr = shader->output[i].gpr; 2375 output[j].elem_size = 3; 2376 output[j].swizzle_x = 0; 2377 output[j].swizzle_y = 1; 2378 output[j].swizzle_z = 2; 2379 output[j].swizzle_w = 3; 2380 output[j].burst_count = 1; 2381 output[j].type = -1; 2382 output[j].op = CF_OP_EXPORT; 2383 switch (ctx.type) { 2384 case TGSI_PROCESSOR_VERTEX: 2385 switch (shader->output[i].name) { 2386 case TGSI_SEMANTIC_POSITION: 2387 output[j].array_base = 60; 2388 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2389 pos_emitted = true; 2390 break; 2391 2392 case TGSI_SEMANTIC_PSIZE: 2393 output[j].array_base = 61; 2394 output[j].swizzle_y = 7; 2395 output[j].swizzle_z = 7; 2396 output[j].swizzle_w = 7; 2397 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2398 pos_emitted = true; 2399 break; 2400 case TGSI_SEMANTIC_EDGEFLAG: 2401 output[j].array_base = 61; 2402 output[j].swizzle_x = 7; 2403 output[j].swizzle_y = 0; 2404 output[j].swizzle_z = 7; 2405 output[j].swizzle_w = 7; 2406 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2407 pos_emitted = true; 2408 break; 2409 case TGSI_SEMANTIC_LAYER: 2410 /* spi_sid is 0 for outputs that are 2411 * not consumed by PS */ 2412 if (shader->output[i].spi_sid) { 2413 output[j].array_base = next_param_base++; 2414 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2415 j++; 2416 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 2417 } 2418 output[j].array_base = 61; 2419 output[j].swizzle_x = 7; 2420 output[j].swizzle_y = 7; 2421 output[j].swizzle_z = 0; 2422 output[j].swizzle_w = 7; 2423 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2424 pos_emitted = true; 2425 break; 2426 case TGSI_SEMANTIC_VIEWPORT_INDEX: 2427 /* spi_sid is 0 for outputs that are 2428 * not consumed by PS */ 2429 if (shader->output[i].spi_sid) { 2430 output[j].array_base = next_param_base++; 2431 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2432 j++; 2433 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 2434 } 2435 output[j].array_base = 61; 2436 output[j].swizzle_x = 7; 2437 output[j].swizzle_y = 7; 2438 output[j].swizzle_z = 7; 2439 output[j].swizzle_w = 0; 2440 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2441 pos_emitted = true; 2442 break; 2443 case TGSI_SEMANTIC_CLIPVERTEX: 2444 j--; 2445 break; 2446 case TGSI_SEMANTIC_CLIPDIST: 2447 output[j].array_base = next_clip_base++; 2448 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2449 pos_emitted = true; 2450 /* spi_sid is 0 for clipdistance outputs that were generated 2451 * for clipvertex - we don't need to pass them to PS */ 2452 if (shader->output[i].spi_sid) { 2453 j++; 2454 /* duplicate it as PARAM to pass to the pixel shader */ 2455 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 2456 output[j].array_base = next_param_base++; 2457 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2458 } 2459 break; 2460 case TGSI_SEMANTIC_FOG: 2461 output[j].swizzle_y = 4; /* 0 */ 2462 output[j].swizzle_z = 4; /* 0 */ 2463 output[j].swizzle_w = 5; /* 1 */ 2464 break; 2465 case TGSI_SEMANTIC_PRIMID: 2466 output[j].swizzle_x = 2; 2467 output[j].swizzle_y = 4; /* 0 */ 2468 output[j].swizzle_z = 4; /* 0 */ 2469 output[j].swizzle_w = 4; /* 0 */ 2470 break; 2471 } 2472 2473 break; 2474 case TGSI_PROCESSOR_FRAGMENT: 2475 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 2476 /* never export more colors than the number of CBs */ 2477 if (shader->output[i].sid >= max_color_exports) { 2478 /* skip export */ 2479 j--; 2480 continue; 2481 } 2482 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 2483 output[j].array_base = shader->output[i].sid; 2484 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2485 shader->nr_ps_color_exports++; 2486 if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { 2487 for (k = 1; k < max_color_exports; k++) { 2488 j++; 2489 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2490 output[j].gpr = shader->output[i].gpr; 2491 output[j].elem_size = 3; 2492 output[j].swizzle_x = 0; 2493 output[j].swizzle_y = 1; 2494 output[j].swizzle_z = 2; 2495 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 2496 output[j].burst_count = 1; 2497 output[j].array_base = k; 2498 output[j].op = CF_OP_EXPORT; 2499 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2500 shader->nr_ps_color_exports++; 2501 } 2502 } 2503 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 2504 output[j].array_base = 61; 2505 output[j].swizzle_x = 2; 2506 output[j].swizzle_y = 7; 2507 output[j].swizzle_z = output[j].swizzle_w = 7; 2508 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2509 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 2510 output[j].array_base = 61; 2511 output[j].swizzle_x = 7; 2512 output[j].swizzle_y = 1; 2513 output[j].swizzle_z = output[j].swizzle_w = 7; 2514 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2515 } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 2516 output[j].array_base = 61; 2517 output[j].swizzle_x = 7; 2518 output[j].swizzle_y = 7; 2519 output[j].swizzle_z = 0; 2520 output[j].swizzle_w = 7; 2521 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2522 } else { 2523 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 2524 r = -EINVAL; 2525 goto out_err; 2526 } 2527 break; 2528 default: 2529 R600_ERR("unsupported processor type %d\n", ctx.type); 2530 r = -EINVAL; 2531 goto out_err; 2532 } 2533 2534 if (output[j].type==-1) { 2535 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2536 output[j].array_base = next_param_base++; 2537 } 2538 } 2539 2540 /* add fake position export */ 2541 if (ctx.type == TGSI_PROCESSOR_VERTEX && pos_emitted == false) { 2542 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2543 output[j].gpr = 0; 2544 output[j].elem_size = 3; 2545 output[j].swizzle_x = 7; 2546 output[j].swizzle_y = 7; 2547 output[j].swizzle_z = 7; 2548 output[j].swizzle_w = 7; 2549 output[j].burst_count = 1; 2550 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2551 output[j].array_base = 60; 2552 output[j].op = CF_OP_EXPORT; 2553 j++; 2554 } 2555 2556 /* add fake param output for vertex shader if no param is exported */ 2557 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { 2558 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2559 output[j].gpr = 0; 2560 output[j].elem_size = 3; 2561 output[j].swizzle_x = 7; 2562 output[j].swizzle_y = 7; 2563 output[j].swizzle_z = 7; 2564 output[j].swizzle_w = 7; 2565 output[j].burst_count = 1; 2566 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2567 output[j].array_base = 0; 2568 output[j].op = CF_OP_EXPORT; 2569 j++; 2570 } 2571 2572 /* add fake pixel export */ 2573 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { 2574 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2575 output[j].gpr = 0; 2576 output[j].elem_size = 3; 2577 output[j].swizzle_x = 7; 2578 output[j].swizzle_y = 7; 2579 output[j].swizzle_z = 7; 2580 output[j].swizzle_w = 7; 2581 output[j].burst_count = 1; 2582 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2583 output[j].array_base = 0; 2584 output[j].op = CF_OP_EXPORT; 2585 j++; 2586 shader->nr_ps_color_exports++; 2587 } 2588 2589 noutput = j; 2590 2591 /* set export done on last export of each type */ 2592 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 2593 if (!(output_done & (1 << output[i].type))) { 2594 output_done |= (1 << output[i].type); 2595 output[i].op = CF_OP_EXPORT_DONE; 2596 } 2597 } 2598 /* add output to bytecode */ 2599 if (!use_llvm) { 2600 for (i = 0; i < noutput; i++) { 2601 r = r600_bytecode_add_output(ctx.bc, &output[i]); 2602 if (r) 2603 goto out_err; 2604 } 2605 } 2606 } 2607 2608 /* add program end */ 2609 if (!use_llvm) { 2610 if (ctx.bc->chip_class == CAYMAN) 2611 cm_bytecode_add_cf_end(ctx.bc); 2612 else { 2613 const struct cf_op_info *last = NULL; 2614 2615 if (ctx.bc->cf_last) 2616 last = r600_isa_cf(ctx.bc->cf_last->op); 2617 2618 /* alu clause instructions don't have EOP bit, so add NOP */ 2619 if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS) 2620 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2621 2622 ctx.bc->cf_last->end_of_program = 1; 2623 } 2624 } 2625 2626 /* check GPR limit - we have 124 = 128 - 4 2627 * (4 are reserved as alu clause temporary registers) */ 2628 if (ctx.bc->ngpr > 124) { 2629 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 2630 r = -ENOMEM; 2631 goto out_err; 2632 } 2633 2634 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2635 if ((r = generate_gs_copy_shader(rctx, pipeshader, &so))) 2636 return r; 2637 } 2638 2639 free(ctx.literals); 2640 tgsi_parse_free(&ctx.parse); 2641 return 0; 2642out_err: 2643 free(ctx.literals); 2644 tgsi_parse_free(&ctx.parse); 2645 return r; 2646} 2647 2648static int tgsi_unsupported(struct r600_shader_ctx *ctx) 2649{ 2650 const unsigned tgsi_opcode = 2651 ctx->parse.FullToken.FullInstruction.Instruction.Opcode; 2652 R600_ERR("%s tgsi opcode unsupported\n", 2653 tgsi_get_opcode_name(tgsi_opcode)); 2654 return -EINVAL; 2655} 2656 2657static int tgsi_end(struct r600_shader_ctx *ctx) 2658{ 2659 return 0; 2660} 2661 2662static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 2663 const struct r600_shader_src *shader_src, 2664 unsigned chan) 2665{ 2666 bc_src->sel = shader_src->sel; 2667 bc_src->chan = shader_src->swizzle[chan]; 2668 bc_src->neg = shader_src->neg; 2669 bc_src->abs = shader_src->abs; 2670 bc_src->rel = shader_src->rel; 2671 bc_src->value = shader_src->value[bc_src->chan]; 2672 bc_src->kc_bank = shader_src->kc_bank; 2673 bc_src->kc_rel = shader_src->kc_rel; 2674} 2675 2676static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 2677{ 2678 bc_src->abs = 1; 2679 bc_src->neg = 0; 2680} 2681 2682static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 2683{ 2684 bc_src->neg = !bc_src->neg; 2685} 2686 2687static void tgsi_dst(struct r600_shader_ctx *ctx, 2688 const struct tgsi_full_dst_register *tgsi_dst, 2689 unsigned swizzle, 2690 struct r600_bytecode_alu_dst *r600_dst) 2691{ 2692 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2693 2694 r600_dst->sel = tgsi_dst->Register.Index; 2695 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 2696 r600_dst->chan = swizzle; 2697 r600_dst->write = 1; 2698 if (tgsi_dst->Register.Indirect) 2699 r600_dst->rel = V_SQ_REL_RELATIVE; 2700 if (inst->Instruction.Saturate) { 2701 r600_dst->clamp = 1; 2702 } 2703} 2704 2705static int tgsi_last_instruction(unsigned writemask) 2706{ 2707 int i, lasti = 0; 2708 2709 for (i = 0; i < 4; i++) { 2710 if (writemask & (1 << i)) { 2711 lasti = i; 2712 } 2713 } 2714 return lasti; 2715} 2716 2717 2718 2719static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap) 2720{ 2721 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2722 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2723 struct r600_bytecode_alu alu; 2724 int i, j, r, lasti = tgsi_last_instruction(write_mask); 2725 int use_tmp = 0; 2726 2727 if (singledest) { 2728 switch (write_mask) { 2729 case 0x1: 2730 write_mask = 0x3; 2731 break; 2732 case 0x2: 2733 use_tmp = 1; 2734 write_mask = 0x3; 2735 break; 2736 case 0x4: 2737 write_mask = 0xc; 2738 break; 2739 case 0x8: 2740 write_mask = 0xc; 2741 use_tmp = 3; 2742 break; 2743 } 2744 } 2745 2746 lasti = tgsi_last_instruction(write_mask); 2747 for (i = 0; i <= lasti; i++) { 2748 2749 if (!(write_mask & (1 << i))) 2750 continue; 2751 2752 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2753 2754 if (singledest) { 2755 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2756 if (use_tmp) { 2757 alu.dst.sel = ctx->temp_reg; 2758 alu.dst.chan = i; 2759 alu.dst.write = 1; 2760 } 2761 if (i == 1 || i == 3) 2762 alu.dst.write = 0; 2763 } else 2764 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2765 2766 alu.op = ctx->inst_info->op; 2767 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) { 2768 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2769 } else if (!swap) { 2770 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2771 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 2772 } 2773 } else { 2774 r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i)); 2775 r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i)); 2776 } 2777 2778 /* handle some special cases */ 2779 if (i == 1 || i == 3) { 2780 switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { 2781 case TGSI_OPCODE_SUB: 2782 r600_bytecode_src_toggle_neg(&alu.src[1]); 2783 break; 2784 case TGSI_OPCODE_DABS: 2785 r600_bytecode_src_set_abs(&alu.src[0]); 2786 break; 2787 default: 2788 break; 2789 } 2790 } 2791 if (i == lasti) { 2792 alu.last = 1; 2793 } 2794 r = r600_bytecode_add_alu(ctx->bc, &alu); 2795 if (r) 2796 return r; 2797 } 2798 2799 if (use_tmp) { 2800 write_mask = inst->Dst[0].Register.WriteMask; 2801 2802 /* move result from temp to dst */ 2803 for (i = 0; i <= lasti; i++) { 2804 if (!(write_mask & (1 << i))) 2805 continue; 2806 2807 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2808 alu.op = ALU_OP1_MOV; 2809 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2810 alu.src[0].sel = ctx->temp_reg; 2811 alu.src[0].chan = use_tmp - 1; 2812 alu.last = (i == lasti); 2813 2814 r = r600_bytecode_add_alu(ctx->bc, &alu); 2815 if (r) 2816 return r; 2817 } 2818 } 2819 return 0; 2820} 2821 2822static int tgsi_op2_64(struct r600_shader_ctx *ctx) 2823{ 2824 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2825 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2826 /* confirm writemasking */ 2827 if ((write_mask & 0x3) != 0x3 && 2828 (write_mask & 0xc) != 0xc) { 2829 fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask); 2830 return -1; 2831 } 2832 return tgsi_op2_64_params(ctx, false, false); 2833} 2834 2835static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx) 2836{ 2837 return tgsi_op2_64_params(ctx, true, false); 2838} 2839 2840static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx) 2841{ 2842 return tgsi_op2_64_params(ctx, true, true); 2843} 2844 2845static int tgsi_op3_64(struct r600_shader_ctx *ctx) 2846{ 2847 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2848 struct r600_bytecode_alu alu; 2849 int i, j, r; 2850 int lasti = 3; 2851 int tmp = r600_get_temp(ctx); 2852 2853 for (i = 0; i < lasti + 1; i++) { 2854 2855 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2856 alu.op = ctx->inst_info->op; 2857 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2858 r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1); 2859 } 2860 2861 if (inst->Dst[0].Register.WriteMask & (1 << i)) 2862 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2863 else 2864 alu.dst.sel = tmp; 2865 2866 alu.dst.chan = i; 2867 alu.is_op3 = 1; 2868 if (i == lasti) { 2869 alu.last = 1; 2870 } 2871 r = r600_bytecode_add_alu(ctx->bc, &alu); 2872 if (r) 2873 return r; 2874 } 2875 return 0; 2876} 2877 2878static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 2879{ 2880 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2881 struct r600_bytecode_alu alu; 2882 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2883 int i, j, r, lasti = tgsi_last_instruction(write_mask); 2884 /* use temp register if trans_only and more than one dst component */ 2885 int use_tmp = trans_only && (write_mask ^ (1 << lasti)); 2886 2887 for (i = 0; i <= lasti; i++) { 2888 if (!(write_mask & (1 << i))) 2889 continue; 2890 2891 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2892 if (use_tmp) { 2893 alu.dst.sel = ctx->temp_reg; 2894 alu.dst.chan = i; 2895 alu.dst.write = 1; 2896 } else 2897 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2898 2899 alu.op = ctx->inst_info->op; 2900 if (!swap) { 2901 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2902 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 2903 } 2904 } else { 2905 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2906 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2907 } 2908 /* handle some special cases */ 2909 switch (inst->Instruction.Opcode) { 2910 case TGSI_OPCODE_SUB: 2911 r600_bytecode_src_toggle_neg(&alu.src[1]); 2912 break; 2913 case TGSI_OPCODE_ABS: 2914 r600_bytecode_src_set_abs(&alu.src[0]); 2915 break; 2916 default: 2917 break; 2918 } 2919 if (i == lasti || trans_only) { 2920 alu.last = 1; 2921 } 2922 r = r600_bytecode_add_alu(ctx->bc, &alu); 2923 if (r) 2924 return r; 2925 } 2926 2927 if (use_tmp) { 2928 /* move result from temp to dst */ 2929 for (i = 0; i <= lasti; i++) { 2930 if (!(write_mask & (1 << i))) 2931 continue; 2932 2933 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2934 alu.op = ALU_OP1_MOV; 2935 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2936 alu.src[0].sel = ctx->temp_reg; 2937 alu.src[0].chan = i; 2938 alu.last = (i == lasti); 2939 2940 r = r600_bytecode_add_alu(ctx->bc, &alu); 2941 if (r) 2942 return r; 2943 } 2944 } 2945 return 0; 2946} 2947 2948static int tgsi_op2(struct r600_shader_ctx *ctx) 2949{ 2950 return tgsi_op2_s(ctx, 0, 0); 2951} 2952 2953static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 2954{ 2955 return tgsi_op2_s(ctx, 1, 0); 2956} 2957 2958static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 2959{ 2960 return tgsi_op2_s(ctx, 0, 1); 2961} 2962 2963static int tgsi_ineg(struct r600_shader_ctx *ctx) 2964{ 2965 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2966 struct r600_bytecode_alu alu; 2967 int i, r; 2968 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2969 2970 for (i = 0; i < lasti + 1; i++) { 2971 2972 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2973 continue; 2974 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2975 alu.op = ctx->inst_info->op; 2976 2977 alu.src[0].sel = V_SQ_ALU_SRC_0; 2978 2979 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2980 2981 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2982 2983 if (i == lasti) { 2984 alu.last = 1; 2985 } 2986 r = r600_bytecode_add_alu(ctx->bc, &alu); 2987 if (r) 2988 return r; 2989 } 2990 return 0; 2991 2992} 2993 2994static int tgsi_dneg(struct r600_shader_ctx *ctx) 2995{ 2996 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2997 struct r600_bytecode_alu alu; 2998 int i, r; 2999 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3000 3001 for (i = 0; i < lasti + 1; i++) { 3002 3003 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3004 continue; 3005 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3006 alu.op = ALU_OP1_MOV; 3007 3008 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3009 3010 if (i == 1 || i == 3) 3011 r600_bytecode_src_toggle_neg(&alu.src[0]); 3012 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3013 3014 if (i == lasti) { 3015 alu.last = 1; 3016 } 3017 r = r600_bytecode_add_alu(ctx->bc, &alu); 3018 if (r) 3019 return r; 3020 } 3021 return 0; 3022 3023} 3024 3025static int tgsi_dfracexp(struct r600_shader_ctx *ctx) 3026{ 3027 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3028 struct r600_bytecode_alu alu; 3029 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3030 int i, j, r; 3031 int firsti = write_mask == 0xc ? 2 : 0; 3032 3033 for (i = 0; i <= 3; i++) { 3034 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3035 alu.op = ctx->inst_info->op; 3036 3037 alu.dst.sel = ctx->temp_reg; 3038 alu.dst.chan = i; 3039 alu.dst.write = 1; 3040 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3041 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 3042 } 3043 3044 if (i == 3) 3045 alu.last = 1; 3046 3047 r = r600_bytecode_add_alu(ctx->bc, &alu); 3048 if (r) 3049 return r; 3050 } 3051 3052 /* MOV first two channels to writemask dst0 */ 3053 for (i = 0; i <= 1; i++) { 3054 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3055 alu.op = ALU_OP1_MOV; 3056 alu.src[0].chan = i + 2; 3057 alu.src[0].sel = ctx->temp_reg; 3058 3059 tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst); 3060 alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1; 3061 alu.last = 1; 3062 r = r600_bytecode_add_alu(ctx->bc, &alu); 3063 if (r) 3064 return r; 3065 } 3066 3067 for (i = 0; i <= 3; i++) { 3068 if (inst->Dst[1].Register.WriteMask & (1 << i)) { 3069 /* MOV third channels to writemask dst1 */ 3070 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3071 alu.op = ALU_OP1_MOV; 3072 alu.src[0].chan = 1; 3073 alu.src[0].sel = ctx->temp_reg; 3074 3075 tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst); 3076 alu.last = 1; 3077 r = r600_bytecode_add_alu(ctx->bc, &alu); 3078 if (r) 3079 return r; 3080 break; 3081 } 3082 } 3083 return 0; 3084} 3085 3086 3087static int egcm_int_to_double(struct r600_shader_ctx *ctx) 3088{ 3089 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3090 struct r600_bytecode_alu alu; 3091 int i, r; 3092 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3093 3094 assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D || 3095 inst->Instruction.Opcode == TGSI_OPCODE_U2D); 3096 3097 for (i = 0; i <= (lasti+1)/2; i++) { 3098 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3099 alu.op = ctx->inst_info->op; 3100 3101 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3102 alu.dst.sel = ctx->temp_reg; 3103 alu.dst.chan = i; 3104 alu.dst.write = 1; 3105 alu.last = 1; 3106 3107 r = r600_bytecode_add_alu(ctx->bc, &alu); 3108 if (r) 3109 return r; 3110 } 3111 3112 for (i = 0; i <= lasti; i++) { 3113 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3114 alu.op = ALU_OP1_FLT32_TO_FLT64; 3115 3116 alu.src[0].chan = i/2; 3117 if (i%2 == 0) 3118 alu.src[0].sel = ctx->temp_reg; 3119 else { 3120 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 3121 alu.src[0].value = 0x0; 3122 } 3123 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3124 alu.last = i == lasti; 3125 3126 r = r600_bytecode_add_alu(ctx->bc, &alu); 3127 if (r) 3128 return r; 3129 } 3130 3131 return 0; 3132} 3133 3134static int egcm_double_to_int(struct r600_shader_ctx *ctx) 3135{ 3136 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3137 struct r600_bytecode_alu alu; 3138 int i, r; 3139 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3140 3141 assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I || 3142 inst->Instruction.Opcode == TGSI_OPCODE_D2U); 3143 3144 for (i = 0; i <= lasti; i++) { 3145 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3146 alu.op = ALU_OP1_FLT64_TO_FLT32; 3147 3148 r600_bytecode_src(&alu.src[0], &ctx->src[0], fp64_switch(i)); 3149 alu.dst.chan = i; 3150 alu.dst.sel = ctx->temp_reg; 3151 alu.dst.write = i%2 == 0; 3152 alu.last = i == lasti; 3153 3154 r = r600_bytecode_add_alu(ctx->bc, &alu); 3155 if (r) 3156 return r; 3157 } 3158 3159 for (i = 0; i <= (lasti+1)/2; i++) { 3160 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3161 alu.op = ctx->inst_info->op; 3162 3163 alu.src[0].chan = i*2; 3164 alu.src[0].sel = ctx->temp_reg; 3165 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 3166 alu.last = 1; 3167 3168 r = r600_bytecode_add_alu(ctx->bc, &alu); 3169 if (r) 3170 return r; 3171 } 3172 3173 return 0; 3174} 3175 3176static int cayman_emit_double_instr(struct r600_shader_ctx *ctx) 3177{ 3178 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3179 int i, r; 3180 struct r600_bytecode_alu alu; 3181 int last_slot = 3; 3182 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3183 int t1 = ctx->temp_reg; 3184 3185 /* these have to write the result to X/Y by the looks of it */ 3186 for (i = 0 ; i < last_slot; i++) { 3187 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3188 alu.op = ctx->inst_info->op; 3189 3190 /* should only be one src regs */ 3191 assert (inst->Instruction.NumSrcRegs == 1); 3192 3193 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 3194 r600_bytecode_src(&alu.src[1], &ctx->src[0], 0); 3195 3196 /* RSQ should take the absolute value of src */ 3197 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ || 3198 ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) { 3199 r600_bytecode_src_set_abs(&alu.src[1]); 3200 } 3201 alu.dst.sel = t1; 3202 alu.dst.chan = i; 3203 alu.dst.write = (i == 0 || i == 1); 3204 3205 if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1) 3206 alu.last = 1; 3207 r = r600_bytecode_add_alu(ctx->bc, &alu); 3208 if (r) 3209 return r; 3210 } 3211 3212 for (i = 0 ; i <= lasti; i++) { 3213 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3214 continue; 3215 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3216 alu.op = ALU_OP1_MOV; 3217 alu.src[0].sel = t1; 3218 alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1; 3219 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3220 alu.dst.write = 1; 3221 if (i == lasti) 3222 alu.last = 1; 3223 r = r600_bytecode_add_alu(ctx->bc, &alu); 3224 if (r) 3225 return r; 3226 } 3227 return 0; 3228} 3229 3230static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 3231{ 3232 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3233 int i, j, r; 3234 struct r600_bytecode_alu alu; 3235 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 3236 3237 for (i = 0 ; i < last_slot; i++) { 3238 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3239 alu.op = ctx->inst_info->op; 3240 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3241 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 3242 3243 /* RSQ should take the absolute value of src */ 3244 if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) { 3245 r600_bytecode_src_set_abs(&alu.src[j]); 3246 } 3247 } 3248 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3249 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3250 3251 if (i == last_slot - 1) 3252 alu.last = 1; 3253 r = r600_bytecode_add_alu(ctx->bc, &alu); 3254 if (r) 3255 return r; 3256 } 3257 return 0; 3258} 3259 3260static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 3261{ 3262 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3263 int i, j, k, r; 3264 struct r600_bytecode_alu alu; 3265 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3266 int t1 = ctx->temp_reg; 3267 3268 for (k = 0; k <= lasti; k++) { 3269 if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 3270 continue; 3271 3272 for (i = 0 ; i < 4; i++) { 3273 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3274 alu.op = ctx->inst_info->op; 3275 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3276 r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 3277 } 3278 alu.dst.sel = t1; 3279 alu.dst.chan = i; 3280 alu.dst.write = (i == k); 3281 if (i == 3) 3282 alu.last = 1; 3283 r = r600_bytecode_add_alu(ctx->bc, &alu); 3284 if (r) 3285 return r; 3286 } 3287 } 3288 3289 for (i = 0 ; i <= lasti; i++) { 3290 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3291 continue; 3292 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3293 alu.op = ALU_OP1_MOV; 3294 alu.src[0].sel = t1; 3295 alu.src[0].chan = i; 3296 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3297 alu.dst.write = 1; 3298 if (i == lasti) 3299 alu.last = 1; 3300 r = r600_bytecode_add_alu(ctx->bc, &alu); 3301 if (r) 3302 return r; 3303 } 3304 3305 return 0; 3306} 3307 3308 3309static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) 3310{ 3311 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3312 int i, j, k, r; 3313 struct r600_bytecode_alu alu; 3314 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3315 int t1 = ctx->temp_reg; 3316 3317 for (k = 0; k < 2; k++) { 3318 if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2)))) 3319 continue; 3320 3321 for (i = 0; i < 4; i++) { 3322 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3323 alu.op = ctx->inst_info->op; 3324 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3325 r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));; 3326 } 3327 alu.dst.sel = t1; 3328 alu.dst.chan = i; 3329 alu.dst.write = 1; 3330 if (i == 3) 3331 alu.last = 1; 3332 r = r600_bytecode_add_alu(ctx->bc, &alu); 3333 if (r) 3334 return r; 3335 } 3336 } 3337 3338 for (i = 0; i <= lasti; i++) { 3339 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3340 continue; 3341 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3342 alu.op = ALU_OP1_MOV; 3343 alu.src[0].sel = t1; 3344 alu.src[0].chan = i; 3345 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3346 alu.dst.write = 1; 3347 if (i == lasti) 3348 alu.last = 1; 3349 r = r600_bytecode_add_alu(ctx->bc, &alu); 3350 if (r) 3351 return r; 3352 } 3353 3354 return 0; 3355} 3356 3357/* 3358 * r600 - trunc to -PI..PI range 3359 * r700 - normalize by dividing by 2PI 3360 * see fdo bug 27901 3361 */ 3362static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 3363{ 3364 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 3365 static float double_pi = 3.1415926535 * 2; 3366 static float neg_pi = -3.1415926535; 3367 3368 int r; 3369 struct r600_bytecode_alu alu; 3370 3371 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3372 alu.op = ALU_OP3_MULADD; 3373 alu.is_op3 = 1; 3374 3375 alu.dst.chan = 0; 3376 alu.dst.sel = ctx->temp_reg; 3377 alu.dst.write = 1; 3378 3379 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3380 3381 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 3382 alu.src[1].chan = 0; 3383 alu.src[1].value = *(uint32_t *)&half_inv_pi; 3384 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 3385 alu.src[2].chan = 0; 3386 alu.last = 1; 3387 r = r600_bytecode_add_alu(ctx->bc, &alu); 3388 if (r) 3389 return r; 3390 3391 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3392 alu.op = ALU_OP1_FRACT; 3393 3394 alu.dst.chan = 0; 3395 alu.dst.sel = ctx->temp_reg; 3396 alu.dst.write = 1; 3397 3398 alu.src[0].sel = ctx->temp_reg; 3399 alu.src[0].chan = 0; 3400 alu.last = 1; 3401 r = r600_bytecode_add_alu(ctx->bc, &alu); 3402 if (r) 3403 return r; 3404 3405 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3406 alu.op = ALU_OP3_MULADD; 3407 alu.is_op3 = 1; 3408 3409 alu.dst.chan = 0; 3410 alu.dst.sel = ctx->temp_reg; 3411 alu.dst.write = 1; 3412 3413 alu.src[0].sel = ctx->temp_reg; 3414 alu.src[0].chan = 0; 3415 3416 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 3417 alu.src[1].chan = 0; 3418 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 3419 alu.src[2].chan = 0; 3420 3421 if (ctx->bc->chip_class == R600) { 3422 alu.src[1].value = *(uint32_t *)&double_pi; 3423 alu.src[2].value = *(uint32_t *)&neg_pi; 3424 } else { 3425 alu.src[1].sel = V_SQ_ALU_SRC_1; 3426 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 3427 alu.src[2].neg = 1; 3428 } 3429 3430 alu.last = 1; 3431 r = r600_bytecode_add_alu(ctx->bc, &alu); 3432 if (r) 3433 return r; 3434 return 0; 3435} 3436 3437static int cayman_trig(struct r600_shader_ctx *ctx) 3438{ 3439 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3440 struct r600_bytecode_alu alu; 3441 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 3442 int i, r; 3443 3444 r = tgsi_setup_trig(ctx); 3445 if (r) 3446 return r; 3447 3448 3449 for (i = 0; i < last_slot; i++) { 3450 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3451 alu.op = ctx->inst_info->op; 3452 alu.dst.chan = i; 3453 3454 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3455 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3456 3457 alu.src[0].sel = ctx->temp_reg; 3458 alu.src[0].chan = 0; 3459 if (i == last_slot - 1) 3460 alu.last = 1; 3461 r = r600_bytecode_add_alu(ctx->bc, &alu); 3462 if (r) 3463 return r; 3464 } 3465 return 0; 3466} 3467 3468static int tgsi_trig(struct r600_shader_ctx *ctx) 3469{ 3470 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3471 struct r600_bytecode_alu alu; 3472 int i, r; 3473 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3474 3475 r = tgsi_setup_trig(ctx); 3476 if (r) 3477 return r; 3478 3479 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3480 alu.op = ctx->inst_info->op; 3481 alu.dst.chan = 0; 3482 alu.dst.sel = ctx->temp_reg; 3483 alu.dst.write = 1; 3484 3485 alu.src[0].sel = ctx->temp_reg; 3486 alu.src[0].chan = 0; 3487 alu.last = 1; 3488 r = r600_bytecode_add_alu(ctx->bc, &alu); 3489 if (r) 3490 return r; 3491 3492 /* replicate result */ 3493 for (i = 0; i < lasti + 1; i++) { 3494 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3495 continue; 3496 3497 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3498 alu.op = ALU_OP1_MOV; 3499 3500 alu.src[0].sel = ctx->temp_reg; 3501 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3502 if (i == lasti) 3503 alu.last = 1; 3504 r = r600_bytecode_add_alu(ctx->bc, &alu); 3505 if (r) 3506 return r; 3507 } 3508 return 0; 3509} 3510 3511static int tgsi_scs(struct r600_shader_ctx *ctx) 3512{ 3513 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3514 struct r600_bytecode_alu alu; 3515 int i, r; 3516 3517 /* We'll only need the trig stuff if we are going to write to the 3518 * X or Y components of the destination vector. 3519 */ 3520 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 3521 r = tgsi_setup_trig(ctx); 3522 if (r) 3523 return r; 3524 } 3525 3526 /* dst.x = COS */ 3527 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3528 if (ctx->bc->chip_class == CAYMAN) { 3529 for (i = 0 ; i < 3; i++) { 3530 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3531 alu.op = ALU_OP1_COS; 3532 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3533 3534 if (i == 0) 3535 alu.dst.write = 1; 3536 else 3537 alu.dst.write = 0; 3538 alu.src[0].sel = ctx->temp_reg; 3539 alu.src[0].chan = 0; 3540 if (i == 2) 3541 alu.last = 1; 3542 r = r600_bytecode_add_alu(ctx->bc, &alu); 3543 if (r) 3544 return r; 3545 } 3546 } else { 3547 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3548 alu.op = ALU_OP1_COS; 3549 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 3550 3551 alu.src[0].sel = ctx->temp_reg; 3552 alu.src[0].chan = 0; 3553 alu.last = 1; 3554 r = r600_bytecode_add_alu(ctx->bc, &alu); 3555 if (r) 3556 return r; 3557 } 3558 } 3559 3560 /* dst.y = SIN */ 3561 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3562 if (ctx->bc->chip_class == CAYMAN) { 3563 for (i = 0 ; i < 3; i++) { 3564 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3565 alu.op = ALU_OP1_SIN; 3566 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3567 if (i == 1) 3568 alu.dst.write = 1; 3569 else 3570 alu.dst.write = 0; 3571 alu.src[0].sel = ctx->temp_reg; 3572 alu.src[0].chan = 0; 3573 if (i == 2) 3574 alu.last = 1; 3575 r = r600_bytecode_add_alu(ctx->bc, &alu); 3576 if (r) 3577 return r; 3578 } 3579 } else { 3580 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3581 alu.op = ALU_OP1_SIN; 3582 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 3583 3584 alu.src[0].sel = ctx->temp_reg; 3585 alu.src[0].chan = 0; 3586 alu.last = 1; 3587 r = r600_bytecode_add_alu(ctx->bc, &alu); 3588 if (r) 3589 return r; 3590 } 3591 } 3592 3593 /* dst.z = 0.0; */ 3594 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3595 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3596 3597 alu.op = ALU_OP1_MOV; 3598 3599 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 3600 3601 alu.src[0].sel = V_SQ_ALU_SRC_0; 3602 alu.src[0].chan = 0; 3603 3604 alu.last = 1; 3605 3606 r = r600_bytecode_add_alu(ctx->bc, &alu); 3607 if (r) 3608 return r; 3609 } 3610 3611 /* dst.w = 1.0; */ 3612 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3613 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3614 3615 alu.op = ALU_OP1_MOV; 3616 3617 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 3618 3619 alu.src[0].sel = V_SQ_ALU_SRC_1; 3620 alu.src[0].chan = 0; 3621 3622 alu.last = 1; 3623 3624 r = r600_bytecode_add_alu(ctx->bc, &alu); 3625 if (r) 3626 return r; 3627 } 3628 3629 return 0; 3630} 3631 3632static int tgsi_kill(struct r600_shader_ctx *ctx) 3633{ 3634 const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3635 struct r600_bytecode_alu alu; 3636 int i, r; 3637 3638 for (i = 0; i < 4; i++) { 3639 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3640 alu.op = ctx->inst_info->op; 3641 3642 alu.dst.chan = i; 3643 3644 alu.src[0].sel = V_SQ_ALU_SRC_0; 3645 3646 if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) { 3647 alu.src[1].sel = V_SQ_ALU_SRC_1; 3648 alu.src[1].neg = 1; 3649 } else { 3650 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3651 } 3652 if (i == 3) { 3653 alu.last = 1; 3654 } 3655 r = r600_bytecode_add_alu(ctx->bc, &alu); 3656 if (r) 3657 return r; 3658 } 3659 3660 /* kill must be last in ALU */ 3661 ctx->bc->force_add_cf = 1; 3662 ctx->shader->uses_kill = TRUE; 3663 return 0; 3664} 3665 3666static int tgsi_lit(struct r600_shader_ctx *ctx) 3667{ 3668 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3669 struct r600_bytecode_alu alu; 3670 int r; 3671 3672 /* tmp.x = max(src.y, 0.0) */ 3673 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3674 alu.op = ALU_OP2_MAX; 3675 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 3676 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 3677 alu.src[1].chan = 1; 3678 3679 alu.dst.sel = ctx->temp_reg; 3680 alu.dst.chan = 0; 3681 alu.dst.write = 1; 3682 3683 alu.last = 1; 3684 r = r600_bytecode_add_alu(ctx->bc, &alu); 3685 if (r) 3686 return r; 3687 3688 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 3689 { 3690 int chan; 3691 int sel; 3692 int i; 3693 3694 if (ctx->bc->chip_class == CAYMAN) { 3695 for (i = 0; i < 3; i++) { 3696 /* tmp.z = log(tmp.x) */ 3697 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3698 alu.op = ALU_OP1_LOG_CLAMPED; 3699 alu.src[0].sel = ctx->temp_reg; 3700 alu.src[0].chan = 0; 3701 alu.dst.sel = ctx->temp_reg; 3702 alu.dst.chan = i; 3703 if (i == 2) { 3704 alu.dst.write = 1; 3705 alu.last = 1; 3706 } else 3707 alu.dst.write = 0; 3708 3709 r = r600_bytecode_add_alu(ctx->bc, &alu); 3710 if (r) 3711 return r; 3712 } 3713 } else { 3714 /* tmp.z = log(tmp.x) */ 3715 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3716 alu.op = ALU_OP1_LOG_CLAMPED; 3717 alu.src[0].sel = ctx->temp_reg; 3718 alu.src[0].chan = 0; 3719 alu.dst.sel = ctx->temp_reg; 3720 alu.dst.chan = 2; 3721 alu.dst.write = 1; 3722 alu.last = 1; 3723 r = r600_bytecode_add_alu(ctx->bc, &alu); 3724 if (r) 3725 return r; 3726 } 3727 3728 chan = alu.dst.chan; 3729 sel = alu.dst.sel; 3730 3731 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 3732 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3733 alu.op = ALU_OP3_MUL_LIT; 3734 alu.src[0].sel = sel; 3735 alu.src[0].chan = chan; 3736 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 3737 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 3738 alu.dst.sel = ctx->temp_reg; 3739 alu.dst.chan = 0; 3740 alu.dst.write = 1; 3741 alu.is_op3 = 1; 3742 alu.last = 1; 3743 r = r600_bytecode_add_alu(ctx->bc, &alu); 3744 if (r) 3745 return r; 3746 3747 if (ctx->bc->chip_class == CAYMAN) { 3748 for (i = 0; i < 3; i++) { 3749 /* dst.z = exp(tmp.x) */ 3750 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3751 alu.op = ALU_OP1_EXP_IEEE; 3752 alu.src[0].sel = ctx->temp_reg; 3753 alu.src[0].chan = 0; 3754 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3755 if (i == 2) { 3756 alu.dst.write = 1; 3757 alu.last = 1; 3758 } else 3759 alu.dst.write = 0; 3760 r = r600_bytecode_add_alu(ctx->bc, &alu); 3761 if (r) 3762 return r; 3763 } 3764 } else { 3765 /* dst.z = exp(tmp.x) */ 3766 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3767 alu.op = ALU_OP1_EXP_IEEE; 3768 alu.src[0].sel = ctx->temp_reg; 3769 alu.src[0].chan = 0; 3770 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 3771 alu.last = 1; 3772 r = r600_bytecode_add_alu(ctx->bc, &alu); 3773 if (r) 3774 return r; 3775 } 3776 } 3777 3778 /* dst.x, <- 1.0 */ 3779 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3780 alu.op = ALU_OP1_MOV; 3781 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 3782 alu.src[0].chan = 0; 3783 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 3784 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 3785 r = r600_bytecode_add_alu(ctx->bc, &alu); 3786 if (r) 3787 return r; 3788 3789 /* dst.y = max(src.x, 0.0) */ 3790 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3791 alu.op = ALU_OP2_MAX; 3792 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3793 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 3794 alu.src[1].chan = 0; 3795 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 3796 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 3797 r = r600_bytecode_add_alu(ctx->bc, &alu); 3798 if (r) 3799 return r; 3800 3801 /* dst.w, <- 1.0 */ 3802 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3803 alu.op = ALU_OP1_MOV; 3804 alu.src[0].sel = V_SQ_ALU_SRC_1; 3805 alu.src[0].chan = 0; 3806 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 3807 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 3808 alu.last = 1; 3809 r = r600_bytecode_add_alu(ctx->bc, &alu); 3810 if (r) 3811 return r; 3812 3813 return 0; 3814} 3815 3816static int tgsi_rsq(struct r600_shader_ctx *ctx) 3817{ 3818 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3819 struct r600_bytecode_alu alu; 3820 int i, r; 3821 3822 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3823 3824 /* XXX: 3825 * For state trackers other than OpenGL, we'll want to use 3826 * _RECIPSQRT_IEEE instead. 3827 */ 3828 alu.op = ALU_OP1_RECIPSQRT_CLAMPED; 3829 3830 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 3831 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 3832 r600_bytecode_src_set_abs(&alu.src[i]); 3833 } 3834 alu.dst.sel = ctx->temp_reg; 3835 alu.dst.write = 1; 3836 alu.last = 1; 3837 r = r600_bytecode_add_alu(ctx->bc, &alu); 3838 if (r) 3839 return r; 3840 /* replicate result */ 3841 return tgsi_helper_tempx_replicate(ctx); 3842} 3843 3844static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 3845{ 3846 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3847 struct r600_bytecode_alu alu; 3848 int i, r; 3849 3850 for (i = 0; i < 4; i++) { 3851 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3852 alu.src[0].sel = ctx->temp_reg; 3853 alu.op = ALU_OP1_MOV; 3854 alu.dst.chan = i; 3855 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3856 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3857 if (i == 3) 3858 alu.last = 1; 3859 r = r600_bytecode_add_alu(ctx->bc, &alu); 3860 if (r) 3861 return r; 3862 } 3863 return 0; 3864} 3865 3866static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 3867{ 3868 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3869 struct r600_bytecode_alu alu; 3870 int i, r; 3871 3872 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3873 alu.op = ctx->inst_info->op; 3874 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 3875 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 3876 } 3877 alu.dst.sel = ctx->temp_reg; 3878 alu.dst.write = 1; 3879 alu.last = 1; 3880 r = r600_bytecode_add_alu(ctx->bc, &alu); 3881 if (r) 3882 return r; 3883 /* replicate result */ 3884 return tgsi_helper_tempx_replicate(ctx); 3885} 3886 3887static int cayman_pow(struct r600_shader_ctx *ctx) 3888{ 3889 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3890 int i, r; 3891 struct r600_bytecode_alu alu; 3892 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 3893 3894 for (i = 0; i < 3; i++) { 3895 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3896 alu.op = ALU_OP1_LOG_IEEE; 3897 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3898 alu.dst.sel = ctx->temp_reg; 3899 alu.dst.chan = i; 3900 alu.dst.write = 1; 3901 if (i == 2) 3902 alu.last = 1; 3903 r = r600_bytecode_add_alu(ctx->bc, &alu); 3904 if (r) 3905 return r; 3906 } 3907 3908 /* b * LOG2(a) */ 3909 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3910 alu.op = ALU_OP2_MUL; 3911 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 3912 alu.src[1].sel = ctx->temp_reg; 3913 alu.dst.sel = ctx->temp_reg; 3914 alu.dst.write = 1; 3915 alu.last = 1; 3916 r = r600_bytecode_add_alu(ctx->bc, &alu); 3917 if (r) 3918 return r; 3919 3920 for (i = 0; i < last_slot; i++) { 3921 /* POW(a,b) = EXP2(b * LOG2(a))*/ 3922 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3923 alu.op = ALU_OP1_EXP_IEEE; 3924 alu.src[0].sel = ctx->temp_reg; 3925 3926 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3927 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3928 if (i == last_slot - 1) 3929 alu.last = 1; 3930 r = r600_bytecode_add_alu(ctx->bc, &alu); 3931 if (r) 3932 return r; 3933 } 3934 return 0; 3935} 3936 3937static int tgsi_pow(struct r600_shader_ctx *ctx) 3938{ 3939 struct r600_bytecode_alu alu; 3940 int r; 3941 3942 /* LOG2(a) */ 3943 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3944 alu.op = ALU_OP1_LOG_IEEE; 3945 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3946 alu.dst.sel = ctx->temp_reg; 3947 alu.dst.write = 1; 3948 alu.last = 1; 3949 r = r600_bytecode_add_alu(ctx->bc, &alu); 3950 if (r) 3951 return r; 3952 /* b * LOG2(a) */ 3953 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3954 alu.op = ALU_OP2_MUL; 3955 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 3956 alu.src[1].sel = ctx->temp_reg; 3957 alu.dst.sel = ctx->temp_reg; 3958 alu.dst.write = 1; 3959 alu.last = 1; 3960 r = r600_bytecode_add_alu(ctx->bc, &alu); 3961 if (r) 3962 return r; 3963 /* POW(a,b) = EXP2(b * LOG2(a))*/ 3964 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3965 alu.op = ALU_OP1_EXP_IEEE; 3966 alu.src[0].sel = ctx->temp_reg; 3967 alu.dst.sel = ctx->temp_reg; 3968 alu.dst.write = 1; 3969 alu.last = 1; 3970 r = r600_bytecode_add_alu(ctx->bc, &alu); 3971 if (r) 3972 return r; 3973 return tgsi_helper_tempx_replicate(ctx); 3974} 3975 3976static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 3977{ 3978 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3979 struct r600_bytecode_alu alu; 3980 int i, r, j; 3981 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3982 int tmp0 = ctx->temp_reg; 3983 int tmp1 = r600_get_temp(ctx); 3984 int tmp2 = r600_get_temp(ctx); 3985 int tmp3 = r600_get_temp(ctx); 3986 /* Unsigned path: 3987 * 3988 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 3989 * 3990 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 3991 * 2. tmp0.z = lo (tmp0.x * src2) 3992 * 3. tmp0.w = -tmp0.z 3993 * 4. tmp0.y = hi (tmp0.x * src2) 3994 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 3995 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 3996 * 7. tmp1.x = tmp0.x - tmp0.w 3997 * 8. tmp1.y = tmp0.x + tmp0.w 3998 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 3999 * 10. tmp0.z = hi(tmp0.x * src1) = q 4000 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 4001 * 4002 * 12. tmp0.w = src1 - tmp0.y = r 4003 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 4004 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 4005 * 4006 * if DIV 4007 * 4008 * 15. tmp1.z = tmp0.z + 1 = q + 1 4009 * 16. tmp1.w = tmp0.z - 1 = q - 1 4010 * 4011 * else MOD 4012 * 4013 * 15. tmp1.z = tmp0.w - src2 = r - src2 4014 * 16. tmp1.w = tmp0.w + src2 = r + src2 4015 * 4016 * endif 4017 * 4018 * 17. tmp1.x = tmp1.x & tmp1.y 4019 * 4020 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 4021 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 4022 * 4023 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 4024 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 4025 * 4026 * Signed path: 4027 * 4028 * Same as unsigned, using abs values of the operands, 4029 * and fixing the sign of the result in the end. 4030 */ 4031 4032 for (i = 0; i < 4; i++) { 4033 if (!(write_mask & (1<<i))) 4034 continue; 4035 4036 if (signed_op) { 4037 4038 /* tmp2.x = -src0 */ 4039 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4040 alu.op = ALU_OP2_SUB_INT; 4041 4042 alu.dst.sel = tmp2; 4043 alu.dst.chan = 0; 4044 alu.dst.write = 1; 4045 4046 alu.src[0].sel = V_SQ_ALU_SRC_0; 4047 4048 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4049 4050 alu.last = 1; 4051 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4052 return r; 4053 4054 /* tmp2.y = -src1 */ 4055 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4056 alu.op = ALU_OP2_SUB_INT; 4057 4058 alu.dst.sel = tmp2; 4059 alu.dst.chan = 1; 4060 alu.dst.write = 1; 4061 4062 alu.src[0].sel = V_SQ_ALU_SRC_0; 4063 4064 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4065 4066 alu.last = 1; 4067 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4068 return r; 4069 4070 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 4071 /* it will be a sign of the quotient */ 4072 if (!mod) { 4073 4074 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4075 alu.op = ALU_OP2_XOR_INT; 4076 4077 alu.dst.sel = tmp2; 4078 alu.dst.chan = 2; 4079 alu.dst.write = 1; 4080 4081 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4082 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4083 4084 alu.last = 1; 4085 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4086 return r; 4087 } 4088 4089 /* tmp2.x = |src0| */ 4090 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4091 alu.op = ALU_OP3_CNDGE_INT; 4092 alu.is_op3 = 1; 4093 4094 alu.dst.sel = tmp2; 4095 alu.dst.chan = 0; 4096 alu.dst.write = 1; 4097 4098 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4099 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4100 alu.src[2].sel = tmp2; 4101 alu.src[2].chan = 0; 4102 4103 alu.last = 1; 4104 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4105 return r; 4106 4107 /* tmp2.y = |src1| */ 4108 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4109 alu.op = ALU_OP3_CNDGE_INT; 4110 alu.is_op3 = 1; 4111 4112 alu.dst.sel = tmp2; 4113 alu.dst.chan = 1; 4114 alu.dst.write = 1; 4115 4116 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4117 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4118 alu.src[2].sel = tmp2; 4119 alu.src[2].chan = 1; 4120 4121 alu.last = 1; 4122 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4123 return r; 4124 4125 } 4126 4127 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 4128 if (ctx->bc->chip_class == CAYMAN) { 4129 /* tmp3.x = u2f(src2) */ 4130 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4131 alu.op = ALU_OP1_UINT_TO_FLT; 4132 4133 alu.dst.sel = tmp3; 4134 alu.dst.chan = 0; 4135 alu.dst.write = 1; 4136 4137 if (signed_op) { 4138 alu.src[0].sel = tmp2; 4139 alu.src[0].chan = 1; 4140 } else { 4141 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4142 } 4143 4144 alu.last = 1; 4145 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4146 return r; 4147 4148 /* tmp0.x = recip(tmp3.x) */ 4149 for (j = 0 ; j < 3; j++) { 4150 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4151 alu.op = ALU_OP1_RECIP_IEEE; 4152 4153 alu.dst.sel = tmp0; 4154 alu.dst.chan = j; 4155 alu.dst.write = (j == 0); 4156 4157 alu.src[0].sel = tmp3; 4158 alu.src[0].chan = 0; 4159 4160 if (j == 2) 4161 alu.last = 1; 4162 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4163 return r; 4164 } 4165 4166 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4167 alu.op = ALU_OP2_MUL; 4168 4169 alu.src[0].sel = tmp0; 4170 alu.src[0].chan = 0; 4171 4172 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4173 alu.src[1].value = 0x4f800000; 4174 4175 alu.dst.sel = tmp3; 4176 alu.dst.write = 1; 4177 alu.last = 1; 4178 r = r600_bytecode_add_alu(ctx->bc, &alu); 4179 if (r) 4180 return r; 4181 4182 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4183 alu.op = ALU_OP1_FLT_TO_UINT; 4184 4185 alu.dst.sel = tmp0; 4186 alu.dst.chan = 0; 4187 alu.dst.write = 1; 4188 4189 alu.src[0].sel = tmp3; 4190 alu.src[0].chan = 0; 4191 4192 alu.last = 1; 4193 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4194 return r; 4195 4196 } else { 4197 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4198 alu.op = ALU_OP1_RECIP_UINT; 4199 4200 alu.dst.sel = tmp0; 4201 alu.dst.chan = 0; 4202 alu.dst.write = 1; 4203 4204 if (signed_op) { 4205 alu.src[0].sel = tmp2; 4206 alu.src[0].chan = 1; 4207 } else { 4208 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4209 } 4210 4211 alu.last = 1; 4212 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4213 return r; 4214 } 4215 4216 /* 2. tmp0.z = lo (tmp0.x * src2) */ 4217 if (ctx->bc->chip_class == CAYMAN) { 4218 for (j = 0 ; j < 4; j++) { 4219 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4220 alu.op = ALU_OP2_MULLO_UINT; 4221 4222 alu.dst.sel = tmp0; 4223 alu.dst.chan = j; 4224 alu.dst.write = (j == 2); 4225 4226 alu.src[0].sel = tmp0; 4227 alu.src[0].chan = 0; 4228 if (signed_op) { 4229 alu.src[1].sel = tmp2; 4230 alu.src[1].chan = 1; 4231 } else { 4232 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4233 } 4234 4235 alu.last = (j == 3); 4236 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4237 return r; 4238 } 4239 } else { 4240 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4241 alu.op = ALU_OP2_MULLO_UINT; 4242 4243 alu.dst.sel = tmp0; 4244 alu.dst.chan = 2; 4245 alu.dst.write = 1; 4246 4247 alu.src[0].sel = tmp0; 4248 alu.src[0].chan = 0; 4249 if (signed_op) { 4250 alu.src[1].sel = tmp2; 4251 alu.src[1].chan = 1; 4252 } else { 4253 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4254 } 4255 4256 alu.last = 1; 4257 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4258 return r; 4259 } 4260 4261 /* 3. tmp0.w = -tmp0.z */ 4262 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4263 alu.op = ALU_OP2_SUB_INT; 4264 4265 alu.dst.sel = tmp0; 4266 alu.dst.chan = 3; 4267 alu.dst.write = 1; 4268 4269 alu.src[0].sel = V_SQ_ALU_SRC_0; 4270 alu.src[1].sel = tmp0; 4271 alu.src[1].chan = 2; 4272 4273 alu.last = 1; 4274 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4275 return r; 4276 4277 /* 4. tmp0.y = hi (tmp0.x * src2) */ 4278 if (ctx->bc->chip_class == CAYMAN) { 4279 for (j = 0 ; j < 4; j++) { 4280 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4281 alu.op = ALU_OP2_MULHI_UINT; 4282 4283 alu.dst.sel = tmp0; 4284 alu.dst.chan = j; 4285 alu.dst.write = (j == 1); 4286 4287 alu.src[0].sel = tmp0; 4288 alu.src[0].chan = 0; 4289 4290 if (signed_op) { 4291 alu.src[1].sel = tmp2; 4292 alu.src[1].chan = 1; 4293 } else { 4294 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4295 } 4296 alu.last = (j == 3); 4297 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4298 return r; 4299 } 4300 } else { 4301 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4302 alu.op = ALU_OP2_MULHI_UINT; 4303 4304 alu.dst.sel = tmp0; 4305 alu.dst.chan = 1; 4306 alu.dst.write = 1; 4307 4308 alu.src[0].sel = tmp0; 4309 alu.src[0].chan = 0; 4310 4311 if (signed_op) { 4312 alu.src[1].sel = tmp2; 4313 alu.src[1].chan = 1; 4314 } else { 4315 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4316 } 4317 4318 alu.last = 1; 4319 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4320 return r; 4321 } 4322 4323 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 4324 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4325 alu.op = ALU_OP3_CNDE_INT; 4326 alu.is_op3 = 1; 4327 4328 alu.dst.sel = tmp0; 4329 alu.dst.chan = 2; 4330 alu.dst.write = 1; 4331 4332 alu.src[0].sel = tmp0; 4333 alu.src[0].chan = 1; 4334 alu.src[1].sel = tmp0; 4335 alu.src[1].chan = 3; 4336 alu.src[2].sel = tmp0; 4337 alu.src[2].chan = 2; 4338 4339 alu.last = 1; 4340 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4341 return r; 4342 4343 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 4344 if (ctx->bc->chip_class == CAYMAN) { 4345 for (j = 0 ; j < 4; j++) { 4346 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4347 alu.op = ALU_OP2_MULHI_UINT; 4348 4349 alu.dst.sel = tmp0; 4350 alu.dst.chan = j; 4351 alu.dst.write = (j == 3); 4352 4353 alu.src[0].sel = tmp0; 4354 alu.src[0].chan = 2; 4355 4356 alu.src[1].sel = tmp0; 4357 alu.src[1].chan = 0; 4358 4359 alu.last = (j == 3); 4360 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4361 return r; 4362 } 4363 } else { 4364 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4365 alu.op = ALU_OP2_MULHI_UINT; 4366 4367 alu.dst.sel = tmp0; 4368 alu.dst.chan = 3; 4369 alu.dst.write = 1; 4370 4371 alu.src[0].sel = tmp0; 4372 alu.src[0].chan = 2; 4373 4374 alu.src[1].sel = tmp0; 4375 alu.src[1].chan = 0; 4376 4377 alu.last = 1; 4378 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4379 return r; 4380 } 4381 4382 /* 7. tmp1.x = tmp0.x - tmp0.w */ 4383 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4384 alu.op = ALU_OP2_SUB_INT; 4385 4386 alu.dst.sel = tmp1; 4387 alu.dst.chan = 0; 4388 alu.dst.write = 1; 4389 4390 alu.src[0].sel = tmp0; 4391 alu.src[0].chan = 0; 4392 alu.src[1].sel = tmp0; 4393 alu.src[1].chan = 3; 4394 4395 alu.last = 1; 4396 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4397 return r; 4398 4399 /* 8. tmp1.y = tmp0.x + tmp0.w */ 4400 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4401 alu.op = ALU_OP2_ADD_INT; 4402 4403 alu.dst.sel = tmp1; 4404 alu.dst.chan = 1; 4405 alu.dst.write = 1; 4406 4407 alu.src[0].sel = tmp0; 4408 alu.src[0].chan = 0; 4409 alu.src[1].sel = tmp0; 4410 alu.src[1].chan = 3; 4411 4412 alu.last = 1; 4413 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4414 return r; 4415 4416 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 4417 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4418 alu.op = ALU_OP3_CNDE_INT; 4419 alu.is_op3 = 1; 4420 4421 alu.dst.sel = tmp0; 4422 alu.dst.chan = 0; 4423 alu.dst.write = 1; 4424 4425 alu.src[0].sel = tmp0; 4426 alu.src[0].chan = 1; 4427 alu.src[1].sel = tmp1; 4428 alu.src[1].chan = 1; 4429 alu.src[2].sel = tmp1; 4430 alu.src[2].chan = 0; 4431 4432 alu.last = 1; 4433 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4434 return r; 4435 4436 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 4437 if (ctx->bc->chip_class == CAYMAN) { 4438 for (j = 0 ; j < 4; j++) { 4439 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4440 alu.op = ALU_OP2_MULHI_UINT; 4441 4442 alu.dst.sel = tmp0; 4443 alu.dst.chan = j; 4444 alu.dst.write = (j == 2); 4445 4446 alu.src[0].sel = tmp0; 4447 alu.src[0].chan = 0; 4448 4449 if (signed_op) { 4450 alu.src[1].sel = tmp2; 4451 alu.src[1].chan = 0; 4452 } else { 4453 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4454 } 4455 4456 alu.last = (j == 3); 4457 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4458 return r; 4459 } 4460 } else { 4461 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4462 alu.op = ALU_OP2_MULHI_UINT; 4463 4464 alu.dst.sel = tmp0; 4465 alu.dst.chan = 2; 4466 alu.dst.write = 1; 4467 4468 alu.src[0].sel = tmp0; 4469 alu.src[0].chan = 0; 4470 4471 if (signed_op) { 4472 alu.src[1].sel = tmp2; 4473 alu.src[1].chan = 0; 4474 } else { 4475 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4476 } 4477 4478 alu.last = 1; 4479 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4480 return r; 4481 } 4482 4483 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 4484 if (ctx->bc->chip_class == CAYMAN) { 4485 for (j = 0 ; j < 4; j++) { 4486 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4487 alu.op = ALU_OP2_MULLO_UINT; 4488 4489 alu.dst.sel = tmp0; 4490 alu.dst.chan = j; 4491 alu.dst.write = (j == 1); 4492 4493 if (signed_op) { 4494 alu.src[0].sel = tmp2; 4495 alu.src[0].chan = 1; 4496 } else { 4497 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4498 } 4499 4500 alu.src[1].sel = tmp0; 4501 alu.src[1].chan = 2; 4502 4503 alu.last = (j == 3); 4504 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4505 return r; 4506 } 4507 } else { 4508 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4509 alu.op = ALU_OP2_MULLO_UINT; 4510 4511 alu.dst.sel = tmp0; 4512 alu.dst.chan = 1; 4513 alu.dst.write = 1; 4514 4515 if (signed_op) { 4516 alu.src[0].sel = tmp2; 4517 alu.src[0].chan = 1; 4518 } else { 4519 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 4520 } 4521 4522 alu.src[1].sel = tmp0; 4523 alu.src[1].chan = 2; 4524 4525 alu.last = 1; 4526 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4527 return r; 4528 } 4529 4530 /* 12. tmp0.w = src1 - tmp0.y = r */ 4531 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4532 alu.op = ALU_OP2_SUB_INT; 4533 4534 alu.dst.sel = tmp0; 4535 alu.dst.chan = 3; 4536 alu.dst.write = 1; 4537 4538 if (signed_op) { 4539 alu.src[0].sel = tmp2; 4540 alu.src[0].chan = 0; 4541 } else { 4542 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4543 } 4544 4545 alu.src[1].sel = tmp0; 4546 alu.src[1].chan = 1; 4547 4548 alu.last = 1; 4549 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4550 return r; 4551 4552 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 4553 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4554 alu.op = ALU_OP2_SETGE_UINT; 4555 4556 alu.dst.sel = tmp1; 4557 alu.dst.chan = 0; 4558 alu.dst.write = 1; 4559 4560 alu.src[0].sel = tmp0; 4561 alu.src[0].chan = 3; 4562 if (signed_op) { 4563 alu.src[1].sel = tmp2; 4564 alu.src[1].chan = 1; 4565 } else { 4566 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4567 } 4568 4569 alu.last = 1; 4570 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4571 return r; 4572 4573 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 4574 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4575 alu.op = ALU_OP2_SETGE_UINT; 4576 4577 alu.dst.sel = tmp1; 4578 alu.dst.chan = 1; 4579 alu.dst.write = 1; 4580 4581 if (signed_op) { 4582 alu.src[0].sel = tmp2; 4583 alu.src[0].chan = 0; 4584 } else { 4585 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4586 } 4587 4588 alu.src[1].sel = tmp0; 4589 alu.src[1].chan = 1; 4590 4591 alu.last = 1; 4592 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4593 return r; 4594 4595 if (mod) { /* UMOD */ 4596 4597 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 4598 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4599 alu.op = ALU_OP2_SUB_INT; 4600 4601 alu.dst.sel = tmp1; 4602 alu.dst.chan = 2; 4603 alu.dst.write = 1; 4604 4605 alu.src[0].sel = tmp0; 4606 alu.src[0].chan = 3; 4607 4608 if (signed_op) { 4609 alu.src[1].sel = tmp2; 4610 alu.src[1].chan = 1; 4611 } else { 4612 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4613 } 4614 4615 alu.last = 1; 4616 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4617 return r; 4618 4619 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 4620 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4621 alu.op = ALU_OP2_ADD_INT; 4622 4623 alu.dst.sel = tmp1; 4624 alu.dst.chan = 3; 4625 alu.dst.write = 1; 4626 4627 alu.src[0].sel = tmp0; 4628 alu.src[0].chan = 3; 4629 if (signed_op) { 4630 alu.src[1].sel = tmp2; 4631 alu.src[1].chan = 1; 4632 } else { 4633 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4634 } 4635 4636 alu.last = 1; 4637 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4638 return r; 4639 4640 } else { /* UDIV */ 4641 4642 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 4643 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4644 alu.op = ALU_OP2_ADD_INT; 4645 4646 alu.dst.sel = tmp1; 4647 alu.dst.chan = 2; 4648 alu.dst.write = 1; 4649 4650 alu.src[0].sel = tmp0; 4651 alu.src[0].chan = 2; 4652 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 4653 4654 alu.last = 1; 4655 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4656 return r; 4657 4658 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 4659 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4660 alu.op = ALU_OP2_ADD_INT; 4661 4662 alu.dst.sel = tmp1; 4663 alu.dst.chan = 3; 4664 alu.dst.write = 1; 4665 4666 alu.src[0].sel = tmp0; 4667 alu.src[0].chan = 2; 4668 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 4669 4670 alu.last = 1; 4671 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4672 return r; 4673 4674 } 4675 4676 /* 17. tmp1.x = tmp1.x & tmp1.y */ 4677 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4678 alu.op = ALU_OP2_AND_INT; 4679 4680 alu.dst.sel = tmp1; 4681 alu.dst.chan = 0; 4682 alu.dst.write = 1; 4683 4684 alu.src[0].sel = tmp1; 4685 alu.src[0].chan = 0; 4686 alu.src[1].sel = tmp1; 4687 alu.src[1].chan = 1; 4688 4689 alu.last = 1; 4690 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4691 return r; 4692 4693 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 4694 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 4695 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4696 alu.op = ALU_OP3_CNDE_INT; 4697 alu.is_op3 = 1; 4698 4699 alu.dst.sel = tmp0; 4700 alu.dst.chan = 2; 4701 alu.dst.write = 1; 4702 4703 alu.src[0].sel = tmp1; 4704 alu.src[0].chan = 0; 4705 alu.src[1].sel = tmp0; 4706 alu.src[1].chan = mod ? 3 : 2; 4707 alu.src[2].sel = tmp1; 4708 alu.src[2].chan = 2; 4709 4710 alu.last = 1; 4711 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4712 return r; 4713 4714 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 4715 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4716 alu.op = ALU_OP3_CNDE_INT; 4717 alu.is_op3 = 1; 4718 4719 if (signed_op) { 4720 alu.dst.sel = tmp0; 4721 alu.dst.chan = 2; 4722 alu.dst.write = 1; 4723 } else { 4724 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4725 } 4726 4727 alu.src[0].sel = tmp1; 4728 alu.src[0].chan = 1; 4729 alu.src[1].sel = tmp1; 4730 alu.src[1].chan = 3; 4731 alu.src[2].sel = tmp0; 4732 alu.src[2].chan = 2; 4733 4734 alu.last = 1; 4735 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4736 return r; 4737 4738 if (signed_op) { 4739 4740 /* fix the sign of the result */ 4741 4742 if (mod) { 4743 4744 /* tmp0.x = -tmp0.z */ 4745 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4746 alu.op = ALU_OP2_SUB_INT; 4747 4748 alu.dst.sel = tmp0; 4749 alu.dst.chan = 0; 4750 alu.dst.write = 1; 4751 4752 alu.src[0].sel = V_SQ_ALU_SRC_0; 4753 alu.src[1].sel = tmp0; 4754 alu.src[1].chan = 2; 4755 4756 alu.last = 1; 4757 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4758 return r; 4759 4760 /* sign of the remainder is the same as the sign of src0 */ 4761 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 4762 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4763 alu.op = ALU_OP3_CNDGE_INT; 4764 alu.is_op3 = 1; 4765 4766 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4767 4768 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4769 alu.src[1].sel = tmp0; 4770 alu.src[1].chan = 2; 4771 alu.src[2].sel = tmp0; 4772 alu.src[2].chan = 0; 4773 4774 alu.last = 1; 4775 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4776 return r; 4777 4778 } else { 4779 4780 /* tmp0.x = -tmp0.z */ 4781 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4782 alu.op = ALU_OP2_SUB_INT; 4783 4784 alu.dst.sel = tmp0; 4785 alu.dst.chan = 0; 4786 alu.dst.write = 1; 4787 4788 alu.src[0].sel = V_SQ_ALU_SRC_0; 4789 alu.src[1].sel = tmp0; 4790 alu.src[1].chan = 2; 4791 4792 alu.last = 1; 4793 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4794 return r; 4795 4796 /* fix the quotient sign (same as the sign of src0*src1) */ 4797 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 4798 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4799 alu.op = ALU_OP3_CNDGE_INT; 4800 alu.is_op3 = 1; 4801 4802 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4803 4804 alu.src[0].sel = tmp2; 4805 alu.src[0].chan = 2; 4806 alu.src[1].sel = tmp0; 4807 alu.src[1].chan = 2; 4808 alu.src[2].sel = tmp0; 4809 alu.src[2].chan = 0; 4810 4811 alu.last = 1; 4812 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4813 return r; 4814 } 4815 } 4816 } 4817 return 0; 4818} 4819 4820static int tgsi_udiv(struct r600_shader_ctx *ctx) 4821{ 4822 return tgsi_divmod(ctx, 0, 0); 4823} 4824 4825static int tgsi_umod(struct r600_shader_ctx *ctx) 4826{ 4827 return tgsi_divmod(ctx, 1, 0); 4828} 4829 4830static int tgsi_idiv(struct r600_shader_ctx *ctx) 4831{ 4832 return tgsi_divmod(ctx, 0, 1); 4833} 4834 4835static int tgsi_imod(struct r600_shader_ctx *ctx) 4836{ 4837 return tgsi_divmod(ctx, 1, 1); 4838} 4839 4840 4841static int tgsi_f2i(struct r600_shader_ctx *ctx) 4842{ 4843 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4844 struct r600_bytecode_alu alu; 4845 int i, r; 4846 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4847 int last_inst = tgsi_last_instruction(write_mask); 4848 4849 for (i = 0; i < 4; i++) { 4850 if (!(write_mask & (1<<i))) 4851 continue; 4852 4853 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4854 alu.op = ALU_OP1_TRUNC; 4855 4856 alu.dst.sel = ctx->temp_reg; 4857 alu.dst.chan = i; 4858 alu.dst.write = 1; 4859 4860 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4861 if (i == last_inst) 4862 alu.last = 1; 4863 r = r600_bytecode_add_alu(ctx->bc, &alu); 4864 if (r) 4865 return r; 4866 } 4867 4868 for (i = 0; i < 4; i++) { 4869 if (!(write_mask & (1<<i))) 4870 continue; 4871 4872 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4873 alu.op = ctx->inst_info->op; 4874 4875 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4876 4877 alu.src[0].sel = ctx->temp_reg; 4878 alu.src[0].chan = i; 4879 4880 if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT) 4881 alu.last = 1; 4882 r = r600_bytecode_add_alu(ctx->bc, &alu); 4883 if (r) 4884 return r; 4885 } 4886 4887 return 0; 4888} 4889 4890static int tgsi_iabs(struct r600_shader_ctx *ctx) 4891{ 4892 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4893 struct r600_bytecode_alu alu; 4894 int i, r; 4895 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4896 int last_inst = tgsi_last_instruction(write_mask); 4897 4898 /* tmp = -src */ 4899 for (i = 0; i < 4; i++) { 4900 if (!(write_mask & (1<<i))) 4901 continue; 4902 4903 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4904 alu.op = ALU_OP2_SUB_INT; 4905 4906 alu.dst.sel = ctx->temp_reg; 4907 alu.dst.chan = i; 4908 alu.dst.write = 1; 4909 4910 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4911 alu.src[0].sel = V_SQ_ALU_SRC_0; 4912 4913 if (i == last_inst) 4914 alu.last = 1; 4915 r = r600_bytecode_add_alu(ctx->bc, &alu); 4916 if (r) 4917 return r; 4918 } 4919 4920 /* dst = (src >= 0 ? src : tmp) */ 4921 for (i = 0; i < 4; i++) { 4922 if (!(write_mask & (1<<i))) 4923 continue; 4924 4925 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4926 alu.op = ALU_OP3_CNDGE_INT; 4927 alu.is_op3 = 1; 4928 alu.dst.write = 1; 4929 4930 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4931 4932 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4933 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4934 alu.src[2].sel = ctx->temp_reg; 4935 alu.src[2].chan = i; 4936 4937 if (i == last_inst) 4938 alu.last = 1; 4939 r = r600_bytecode_add_alu(ctx->bc, &alu); 4940 if (r) 4941 return r; 4942 } 4943 return 0; 4944} 4945 4946static int tgsi_issg(struct r600_shader_ctx *ctx) 4947{ 4948 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4949 struct r600_bytecode_alu alu; 4950 int i, r; 4951 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4952 int last_inst = tgsi_last_instruction(write_mask); 4953 4954 /* tmp = (src >= 0 ? src : -1) */ 4955 for (i = 0; i < 4; i++) { 4956 if (!(write_mask & (1<<i))) 4957 continue; 4958 4959 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4960 alu.op = ALU_OP3_CNDGE_INT; 4961 alu.is_op3 = 1; 4962 4963 alu.dst.sel = ctx->temp_reg; 4964 alu.dst.chan = i; 4965 alu.dst.write = 1; 4966 4967 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4968 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4969 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 4970 4971 if (i == last_inst) 4972 alu.last = 1; 4973 r = r600_bytecode_add_alu(ctx->bc, &alu); 4974 if (r) 4975 return r; 4976 } 4977 4978 /* dst = (tmp > 0 ? 1 : tmp) */ 4979 for (i = 0; i < 4; i++) { 4980 if (!(write_mask & (1<<i))) 4981 continue; 4982 4983 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4984 alu.op = ALU_OP3_CNDGT_INT; 4985 alu.is_op3 = 1; 4986 alu.dst.write = 1; 4987 4988 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4989 4990 alu.src[0].sel = ctx->temp_reg; 4991 alu.src[0].chan = i; 4992 4993 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 4994 4995 alu.src[2].sel = ctx->temp_reg; 4996 alu.src[2].chan = i; 4997 4998 if (i == last_inst) 4999 alu.last = 1; 5000 r = r600_bytecode_add_alu(ctx->bc, &alu); 5001 if (r) 5002 return r; 5003 } 5004 return 0; 5005} 5006 5007 5008 5009static int tgsi_ssg(struct r600_shader_ctx *ctx) 5010{ 5011 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5012 struct r600_bytecode_alu alu; 5013 int i, r; 5014 5015 /* tmp = (src > 0 ? 1 : src) */ 5016 for (i = 0; i < 4; i++) { 5017 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5018 alu.op = ALU_OP3_CNDGT; 5019 alu.is_op3 = 1; 5020 5021 alu.dst.sel = ctx->temp_reg; 5022 alu.dst.chan = i; 5023 5024 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5025 alu.src[1].sel = V_SQ_ALU_SRC_1; 5026 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 5027 5028 if (i == 3) 5029 alu.last = 1; 5030 r = r600_bytecode_add_alu(ctx->bc, &alu); 5031 if (r) 5032 return r; 5033 } 5034 5035 /* dst = (-tmp > 0 ? -1 : tmp) */ 5036 for (i = 0; i < 4; i++) { 5037 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5038 alu.op = ALU_OP3_CNDGT; 5039 alu.is_op3 = 1; 5040 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5041 5042 alu.src[0].sel = ctx->temp_reg; 5043 alu.src[0].chan = i; 5044 alu.src[0].neg = 1; 5045 5046 alu.src[1].sel = V_SQ_ALU_SRC_1; 5047 alu.src[1].neg = 1; 5048 5049 alu.src[2].sel = ctx->temp_reg; 5050 alu.src[2].chan = i; 5051 5052 if (i == 3) 5053 alu.last = 1; 5054 r = r600_bytecode_add_alu(ctx->bc, &alu); 5055 if (r) 5056 return r; 5057 } 5058 return 0; 5059} 5060 5061static int tgsi_bfi(struct r600_shader_ctx *ctx) 5062{ 5063 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5064 struct r600_bytecode_alu alu; 5065 int i, r, t1, t2; 5066 5067 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5068 int last_inst = tgsi_last_instruction(write_mask); 5069 5070 t1 = ctx->temp_reg; 5071 5072 for (i = 0; i < 4; i++) { 5073 if (!(write_mask & (1<<i))) 5074 continue; 5075 5076 /* create mask tmp */ 5077 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5078 alu.op = ALU_OP2_BFM_INT; 5079 alu.dst.sel = t1; 5080 alu.dst.chan = i; 5081 alu.dst.write = 1; 5082 alu.last = i == last_inst; 5083 5084 r600_bytecode_src(&alu.src[0], &ctx->src[3], i); 5085 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 5086 5087 r = r600_bytecode_add_alu(ctx->bc, &alu); 5088 if (r) 5089 return r; 5090 } 5091 5092 t2 = r600_get_temp(ctx); 5093 5094 for (i = 0; i < 4; i++) { 5095 if (!(write_mask & (1<<i))) 5096 continue; 5097 5098 /* shift insert left */ 5099 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5100 alu.op = ALU_OP2_LSHL_INT; 5101 alu.dst.sel = t2; 5102 alu.dst.chan = i; 5103 alu.dst.write = 1; 5104 alu.last = i == last_inst; 5105 5106 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5107 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 5108 5109 r = r600_bytecode_add_alu(ctx->bc, &alu); 5110 if (r) 5111 return r; 5112 } 5113 5114 for (i = 0; i < 4; i++) { 5115 if (!(write_mask & (1<<i))) 5116 continue; 5117 5118 /* actual bitfield insert */ 5119 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5120 alu.op = ALU_OP3_BFI_INT; 5121 alu.is_op3 = 1; 5122 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5123 alu.dst.chan = i; 5124 alu.dst.write = 1; 5125 alu.last = i == last_inst; 5126 5127 alu.src[0].sel = t1; 5128 alu.src[0].chan = i; 5129 alu.src[1].sel = t2; 5130 alu.src[1].chan = i; 5131 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 5132 5133 r = r600_bytecode_add_alu(ctx->bc, &alu); 5134 if (r) 5135 return r; 5136 } 5137 5138 return 0; 5139} 5140 5141static int tgsi_msb(struct r600_shader_ctx *ctx) 5142{ 5143 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5144 struct r600_bytecode_alu alu; 5145 int i, r, t1, t2; 5146 5147 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5148 int last_inst = tgsi_last_instruction(write_mask); 5149 5150 assert(ctx->inst_info->op == ALU_OP1_FFBH_INT || 5151 ctx->inst_info->op == ALU_OP1_FFBH_UINT); 5152 5153 t1 = ctx->temp_reg; 5154 5155 /* bit position is indexed from lsb by TGSI, and from msb by the hardware */ 5156 for (i = 0; i < 4; i++) { 5157 if (!(write_mask & (1<<i))) 5158 continue; 5159 5160 /* t1 = FFBH_INT / FFBH_UINT */ 5161 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5162 alu.op = ctx->inst_info->op; 5163 alu.dst.sel = t1; 5164 alu.dst.chan = i; 5165 alu.dst.write = 1; 5166 alu.last = i == last_inst; 5167 5168 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5169 5170 r = r600_bytecode_add_alu(ctx->bc, &alu); 5171 if (r) 5172 return r; 5173 } 5174 5175 t2 = r600_get_temp(ctx); 5176 5177 for (i = 0; i < 4; i++) { 5178 if (!(write_mask & (1<<i))) 5179 continue; 5180 5181 /* t2 = 31 - t1 */ 5182 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5183 alu.op = ALU_OP2_SUB_INT; 5184 alu.dst.sel = t2; 5185 alu.dst.chan = i; 5186 alu.dst.write = 1; 5187 alu.last = i == last_inst; 5188 5189 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 5190 alu.src[0].value = 31; 5191 alu.src[1].sel = t1; 5192 alu.src[1].chan = i; 5193 5194 r = r600_bytecode_add_alu(ctx->bc, &alu); 5195 if (r) 5196 return r; 5197 } 5198 5199 for (i = 0; i < 4; i++) { 5200 if (!(write_mask & (1<<i))) 5201 continue; 5202 5203 /* result = t1 >= 0 ? t2 : t1 */ 5204 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5205 alu.op = ALU_OP3_CNDGE_INT; 5206 alu.is_op3 = 1; 5207 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5208 alu.dst.chan = i; 5209 alu.dst.write = 1; 5210 alu.last = i == last_inst; 5211 5212 alu.src[0].sel = t1; 5213 alu.src[0].chan = i; 5214 alu.src[1].sel = t2; 5215 alu.src[1].chan = i; 5216 alu.src[2].sel = t1; 5217 alu.src[2].chan = i; 5218 5219 r = r600_bytecode_add_alu(ctx->bc, &alu); 5220 if (r) 5221 return r; 5222 } 5223 5224 return 0; 5225} 5226 5227static int tgsi_interp_egcm(struct r600_shader_ctx *ctx) 5228{ 5229 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5230 struct r600_bytecode_alu alu; 5231 int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti; 5232 unsigned location; 5233 int input; 5234 5235 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); 5236 5237 input = inst->Src[0].Register.Index; 5238 5239 /* Interpolators have been marked for use already by allocate_system_value_inputs */ 5240 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 5241 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5242 location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */ 5243 } 5244 else { 5245 location = TGSI_INTERPOLATE_LOC_CENTROID; 5246 } 5247 5248 k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location); 5249 if (k < 0) 5250 k = 0; 5251 interp_gpr = ctx->eg_interpolators[k].ij_index / 2; 5252 interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2); 5253 5254 /* NOTE: currently offset is not perspective correct */ 5255 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 5256 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5257 int sample_gpr = -1; 5258 int gradientsH, gradientsV; 5259 struct r600_bytecode_tex tex; 5260 5261 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5262 sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]); 5263 } 5264 5265 gradientsH = r600_get_temp(ctx); 5266 gradientsV = r600_get_temp(ctx); 5267 for (i = 0; i < 2; i++) { 5268 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 5269 tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V; 5270 tex.src_gpr = interp_gpr; 5271 tex.src_sel_x = interp_base_chan + 0; 5272 tex.src_sel_y = interp_base_chan + 1; 5273 tex.src_sel_z = 0; 5274 tex.src_sel_w = 0; 5275 tex.dst_gpr = i == 0 ? gradientsH : gradientsV; 5276 tex.dst_sel_x = 0; 5277 tex.dst_sel_y = 1; 5278 tex.dst_sel_z = 7; 5279 tex.dst_sel_w = 7; 5280 tex.inst_mod = 1; // Use per pixel gradient calculation 5281 tex.sampler_id = 0; 5282 tex.resource_id = tex.sampler_id; 5283 r = r600_bytecode_add_tex(ctx->bc, &tex); 5284 if (r) 5285 return r; 5286 } 5287 5288 for (i = 0; i < 2; i++) { 5289 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5290 alu.op = ALU_OP3_MULADD; 5291 alu.is_op3 = 1; 5292 alu.src[0].sel = gradientsH; 5293 alu.src[0].chan = i; 5294 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5295 alu.src[1].sel = sample_gpr; 5296 alu.src[1].chan = 2; 5297 } 5298 else { 5299 r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 5300 } 5301 alu.src[2].sel = interp_gpr; 5302 alu.src[2].chan = interp_base_chan + i; 5303 alu.dst.sel = ctx->temp_reg; 5304 alu.dst.chan = i; 5305 alu.last = i == 1; 5306 5307 r = r600_bytecode_add_alu(ctx->bc, &alu); 5308 if (r) 5309 return r; 5310 } 5311 5312 for (i = 0; i < 2; i++) { 5313 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5314 alu.op = ALU_OP3_MULADD; 5315 alu.is_op3 = 1; 5316 alu.src[0].sel = gradientsV; 5317 alu.src[0].chan = i; 5318 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5319 alu.src[1].sel = sample_gpr; 5320 alu.src[1].chan = 3; 5321 } 5322 else { 5323 r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 5324 } 5325 alu.src[2].sel = ctx->temp_reg; 5326 alu.src[2].chan = i; 5327 alu.dst.sel = ctx->temp_reg; 5328 alu.dst.chan = i; 5329 alu.last = i == 1; 5330 5331 r = r600_bytecode_add_alu(ctx->bc, &alu); 5332 if (r) 5333 return r; 5334 } 5335 } 5336 5337 tmp = r600_get_temp(ctx); 5338 for (i = 0; i < 8; i++) { 5339 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5340 alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY; 5341 5342 alu.dst.sel = tmp; 5343 if ((i > 1 && i < 6)) { 5344 alu.dst.write = 1; 5345 } 5346 else { 5347 alu.dst.write = 0; 5348 } 5349 alu.dst.chan = i % 4; 5350 5351 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 5352 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 5353 alu.src[0].sel = ctx->temp_reg; 5354 alu.src[0].chan = 1 - (i % 2); 5355 } else { 5356 alu.src[0].sel = interp_gpr; 5357 alu.src[0].chan = interp_base_chan + 1 - (i % 2); 5358 } 5359 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 5360 alu.src[1].chan = 0; 5361 5362 alu.last = i % 4 == 3; 5363 alu.bank_swizzle_force = SQ_ALU_VEC_210; 5364 5365 r = r600_bytecode_add_alu(ctx->bc, &alu); 5366 if (r) 5367 return r; 5368 } 5369 5370 // INTERP can't swizzle dst 5371 lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5372 for (i = 0; i <= lasti; i++) { 5373 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5374 continue; 5375 5376 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5377 alu.op = ALU_OP1_MOV; 5378 alu.src[0].sel = tmp; 5379 alu.src[0].chan = ctx->src[0].swizzle[i]; 5380 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5381 alu.dst.write = 1; 5382 alu.last = i == lasti; 5383 r = r600_bytecode_add_alu(ctx->bc, &alu); 5384 if (r) 5385 return r; 5386 } 5387 5388 return 0; 5389} 5390 5391 5392static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 5393{ 5394 struct r600_bytecode_alu alu; 5395 int i, r; 5396 5397 for (i = 0; i < 4; i++) { 5398 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5399 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 5400 alu.op = ALU_OP0_NOP; 5401 alu.dst.chan = i; 5402 } else { 5403 alu.op = ALU_OP1_MOV; 5404 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5405 alu.src[0].sel = ctx->temp_reg; 5406 alu.src[0].chan = i; 5407 } 5408 if (i == 3) { 5409 alu.last = 1; 5410 } 5411 r = r600_bytecode_add_alu(ctx->bc, &alu); 5412 if (r) 5413 return r; 5414 } 5415 return 0; 5416} 5417 5418static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, 5419 unsigned temp, int chan, 5420 struct r600_bytecode_alu_src *bc_src, 5421 const struct r600_shader_src *shader_src) 5422{ 5423 struct r600_bytecode_alu alu; 5424 int r; 5425 5426 r600_bytecode_src(bc_src, shader_src, chan); 5427 5428 /* op3 operands don't support abs modifier */ 5429 if (bc_src->abs) { 5430 assert(temp!=0); /* we actually need the extra register, make sure it is allocated. */ 5431 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5432 alu.op = ALU_OP1_MOV; 5433 alu.dst.sel = temp; 5434 alu.dst.chan = chan; 5435 alu.dst.write = 1; 5436 5437 alu.src[0] = *bc_src; 5438 alu.last = true; // sufficient? 5439 r = r600_bytecode_add_alu(ctx->bc, &alu); 5440 if (r) 5441 return r; 5442 5443 memset(bc_src, 0, sizeof(*bc_src)); 5444 bc_src->sel = temp; 5445 bc_src->chan = chan; 5446 } 5447 return 0; 5448} 5449 5450static int tgsi_op3(struct r600_shader_ctx *ctx) 5451{ 5452 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5453 struct r600_bytecode_alu alu; 5454 int i, j, r; 5455 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5456 int temp_regs[4]; 5457 5458 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 5459 temp_regs[j] = 0; 5460 if (ctx->src[j].abs) 5461 temp_regs[j] = r600_get_temp(ctx); 5462 } 5463 for (i = 0; i < lasti + 1; i++) { 5464 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5465 continue; 5466 5467 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5468 alu.op = ctx->inst_info->op; 5469 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 5470 r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]); 5471 if (r) 5472 return r; 5473 } 5474 5475 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5476 alu.dst.chan = i; 5477 alu.dst.write = 1; 5478 alu.is_op3 = 1; 5479 if (i == lasti) { 5480 alu.last = 1; 5481 } 5482 r = r600_bytecode_add_alu(ctx->bc, &alu); 5483 if (r) 5484 return r; 5485 } 5486 return 0; 5487} 5488 5489static int tgsi_dp(struct r600_shader_ctx *ctx) 5490{ 5491 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5492 struct r600_bytecode_alu alu; 5493 int i, j, r; 5494 5495 for (i = 0; i < 4; i++) { 5496 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5497 alu.op = ctx->inst_info->op; 5498 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 5499 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 5500 } 5501 5502 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5503 alu.dst.chan = i; 5504 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 5505 /* handle some special cases */ 5506 switch (inst->Instruction.Opcode) { 5507 case TGSI_OPCODE_DP2: 5508 if (i > 1) { 5509 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 5510 alu.src[0].chan = alu.src[1].chan = 0; 5511 } 5512 break; 5513 case TGSI_OPCODE_DP3: 5514 if (i > 2) { 5515 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 5516 alu.src[0].chan = alu.src[1].chan = 0; 5517 } 5518 break; 5519 case TGSI_OPCODE_DPH: 5520 if (i == 3) { 5521 alu.src[0].sel = V_SQ_ALU_SRC_1; 5522 alu.src[0].chan = 0; 5523 alu.src[0].neg = 0; 5524 } 5525 break; 5526 default: 5527 break; 5528 } 5529 if (i == 3) { 5530 alu.last = 1; 5531 } 5532 r = r600_bytecode_add_alu(ctx->bc, &alu); 5533 if (r) 5534 return r; 5535 } 5536 return 0; 5537} 5538 5539static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 5540 unsigned index) 5541{ 5542 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5543 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 5544 inst->Src[index].Register.File != TGSI_FILE_INPUT && 5545 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 5546 ctx->src[index].neg || ctx->src[index].abs || 5547 (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == TGSI_PROCESSOR_GEOMETRY); 5548} 5549 5550static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 5551 unsigned index) 5552{ 5553 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5554 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 5555} 5556 5557static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) 5558{ 5559 struct r600_bytecode_vtx vtx; 5560 struct r600_bytecode_alu alu; 5561 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5562 int src_gpr, r, i; 5563 int id = tgsi_tex_get_src_gpr(ctx, 1); 5564 5565 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 5566 if (src_requires_loading) { 5567 for (i = 0; i < 4; i++) { 5568 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5569 alu.op = ALU_OP1_MOV; 5570 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5571 alu.dst.sel = ctx->temp_reg; 5572 alu.dst.chan = i; 5573 if (i == 3) 5574 alu.last = 1; 5575 alu.dst.write = 1; 5576 r = r600_bytecode_add_alu(ctx->bc, &alu); 5577 if (r) 5578 return r; 5579 } 5580 src_gpr = ctx->temp_reg; 5581 } 5582 5583 memset(&vtx, 0, sizeof(vtx)); 5584 vtx.op = FETCH_OP_VFETCH; 5585 vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; 5586 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 5587 vtx.src_gpr = src_gpr; 5588 vtx.mega_fetch_count = 16; 5589 vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 5590 vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 5591 vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 5592 vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 5593 vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 5594 vtx.use_const_fields = 1; 5595 5596 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 5597 return r; 5598 5599 if (ctx->bc->chip_class >= EVERGREEN) 5600 return 0; 5601 5602 for (i = 0; i < 4; i++) { 5603 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5604 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5605 continue; 5606 5607 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5608 alu.op = ALU_OP2_AND_INT; 5609 5610 alu.dst.chan = i; 5611 alu.dst.sel = vtx.dst_gpr; 5612 alu.dst.write = 1; 5613 5614 alu.src[0].sel = vtx.dst_gpr; 5615 alu.src[0].chan = i; 5616 5617 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL; 5618 alu.src[1].sel += (id * 2); 5619 alu.src[1].chan = i % 4; 5620 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 5621 5622 if (i == lasti) 5623 alu.last = 1; 5624 r = r600_bytecode_add_alu(ctx->bc, &alu); 5625 if (r) 5626 return r; 5627 } 5628 5629 if (inst->Dst[0].Register.WriteMask & 3) { 5630 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5631 alu.op = ALU_OP2_OR_INT; 5632 5633 alu.dst.chan = 3; 5634 alu.dst.sel = vtx.dst_gpr; 5635 alu.dst.write = 1; 5636 5637 alu.src[0].sel = vtx.dst_gpr; 5638 alu.src[0].chan = 3; 5639 5640 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1; 5641 alu.src[1].chan = 0; 5642 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 5643 5644 alu.last = 1; 5645 r = r600_bytecode_add_alu(ctx->bc, &alu); 5646 if (r) 5647 return r; 5648 } 5649 return 0; 5650} 5651 5652static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) 5653{ 5654 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5655 struct r600_bytecode_alu alu; 5656 int r; 5657 int id = tgsi_tex_get_src_gpr(ctx, 1); 5658 5659 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5660 alu.op = ALU_OP1_MOV; 5661 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 5662 if (ctx->bc->chip_class >= EVERGREEN) { 5663 /* channel 0 or 2 of each word */ 5664 alu.src[0].sel += (id / 2); 5665 alu.src[0].chan = (id % 2) * 2; 5666 } else { 5667 /* r600 we have them at channel 2 of the second dword */ 5668 alu.src[0].sel += (id * 2) + 1; 5669 alu.src[0].chan = 1; 5670 } 5671 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 5672 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 5673 alu.last = 1; 5674 r = r600_bytecode_add_alu(ctx->bc, &alu); 5675 if (r) 5676 return r; 5677 return 0; 5678} 5679 5680static int tgsi_tex(struct r600_shader_ctx *ctx) 5681{ 5682 static float one_point_five = 1.5f; 5683 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5684 struct r600_bytecode_tex tex; 5685 struct r600_bytecode_alu alu; 5686 unsigned src_gpr; 5687 int r, i, j; 5688 int opcode; 5689 bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing && 5690 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 5691 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || 5692 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); 5693 5694 bool txf_add_offsets = inst->Texture.NumOffsets && 5695 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 5696 inst->Texture.Texture != TGSI_TEXTURE_BUFFER; 5697 5698 /* Texture fetch instructions can only use gprs as source. 5699 * Also they cannot negate the source or take the absolute value */ 5700 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && 5701 inst->Instruction.Opcode != TGSI_OPCODE_TXQS && 5702 tgsi_tex_src_requires_loading(ctx, 0)) || 5703 read_compressed_msaa || txf_add_offsets; 5704 5705 boolean src_loaded = FALSE; 5706 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; 5707 int8_t offset_x = 0, offset_y = 0, offset_z = 0; 5708 boolean has_txq_cube_array_z = false; 5709 unsigned sampler_index_mode; 5710 5711 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && 5712 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 5713 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) 5714 if (inst->Dst[0].Register.WriteMask & 4) { 5715 ctx->shader->has_txq_cube_array_z_comp = true; 5716 has_txq_cube_array_z = true; 5717 } 5718 5719 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 5720 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 5721 inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || 5722 inst->Instruction.Opcode == TGSI_OPCODE_TG4) 5723 sampler_src_reg = 2; 5724 5725 /* TGSI moves the sampler to src reg 3 for TXD */ 5726 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) 5727 sampler_src_reg = 3; 5728 5729 sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 5730 5731 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 5732 5733 if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { 5734 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { 5735 ctx->shader->uses_tex_buffers = true; 5736 return r600_do_buffer_txq(ctx); 5737 } 5738 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 5739 if (ctx->bc->chip_class < EVERGREEN) 5740 ctx->shader->uses_tex_buffers = true; 5741 return do_vtx_fetch_inst(ctx, src_requires_loading); 5742 } 5743 } 5744 5745 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 5746 int out_chan; 5747 /* Add perspective divide */ 5748 if (ctx->bc->chip_class == CAYMAN) { 5749 out_chan = 2; 5750 for (i = 0; i < 3; i++) { 5751 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5752 alu.op = ALU_OP1_RECIP_IEEE; 5753 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 5754 5755 alu.dst.sel = ctx->temp_reg; 5756 alu.dst.chan = i; 5757 if (i == 2) 5758 alu.last = 1; 5759 if (out_chan == i) 5760 alu.dst.write = 1; 5761 r = r600_bytecode_add_alu(ctx->bc, &alu); 5762 if (r) 5763 return r; 5764 } 5765 5766 } else { 5767 out_chan = 3; 5768 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5769 alu.op = ALU_OP1_RECIP_IEEE; 5770 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 5771 5772 alu.dst.sel = ctx->temp_reg; 5773 alu.dst.chan = out_chan; 5774 alu.last = 1; 5775 alu.dst.write = 1; 5776 r = r600_bytecode_add_alu(ctx->bc, &alu); 5777 if (r) 5778 return r; 5779 } 5780 5781 for (i = 0; i < 3; i++) { 5782 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5783 alu.op = ALU_OP2_MUL; 5784 alu.src[0].sel = ctx->temp_reg; 5785 alu.src[0].chan = out_chan; 5786 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5787 alu.dst.sel = ctx->temp_reg; 5788 alu.dst.chan = i; 5789 alu.dst.write = 1; 5790 r = r600_bytecode_add_alu(ctx->bc, &alu); 5791 if (r) 5792 return r; 5793 } 5794 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5795 alu.op = ALU_OP1_MOV; 5796 alu.src[0].sel = V_SQ_ALU_SRC_1; 5797 alu.src[0].chan = 0; 5798 alu.dst.sel = ctx->temp_reg; 5799 alu.dst.chan = 3; 5800 alu.last = 1; 5801 alu.dst.write = 1; 5802 r = r600_bytecode_add_alu(ctx->bc, &alu); 5803 if (r) 5804 return r; 5805 src_loaded = TRUE; 5806 src_gpr = ctx->temp_reg; 5807 } 5808 5809 5810 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 5811 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 5812 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 5813 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 5814 inst->Instruction.Opcode != TGSI_OPCODE_TXQ && 5815 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { 5816 5817 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 5818 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 5819 5820 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 5821 for (i = 0; i < 4; i++) { 5822 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5823 alu.op = ALU_OP2_CUBE; 5824 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 5825 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 5826 alu.dst.sel = ctx->temp_reg; 5827 alu.dst.chan = i; 5828 if (i == 3) 5829 alu.last = 1; 5830 alu.dst.write = 1; 5831 r = r600_bytecode_add_alu(ctx->bc, &alu); 5832 if (r) 5833 return r; 5834 } 5835 5836 /* tmp1.z = RCP_e(|tmp1.z|) */ 5837 if (ctx->bc->chip_class == CAYMAN) { 5838 for (i = 0; i < 3; i++) { 5839 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5840 alu.op = ALU_OP1_RECIP_IEEE; 5841 alu.src[0].sel = ctx->temp_reg; 5842 alu.src[0].chan = 2; 5843 alu.src[0].abs = 1; 5844 alu.dst.sel = ctx->temp_reg; 5845 alu.dst.chan = i; 5846 if (i == 2) 5847 alu.dst.write = 1; 5848 if (i == 2) 5849 alu.last = 1; 5850 r = r600_bytecode_add_alu(ctx->bc, &alu); 5851 if (r) 5852 return r; 5853 } 5854 } else { 5855 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5856 alu.op = ALU_OP1_RECIP_IEEE; 5857 alu.src[0].sel = ctx->temp_reg; 5858 alu.src[0].chan = 2; 5859 alu.src[0].abs = 1; 5860 alu.dst.sel = ctx->temp_reg; 5861 alu.dst.chan = 2; 5862 alu.dst.write = 1; 5863 alu.last = 1; 5864 r = r600_bytecode_add_alu(ctx->bc, &alu); 5865 if (r) 5866 return r; 5867 } 5868 5869 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 5870 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 5871 * muladd has no writemask, have to use another temp 5872 */ 5873 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5874 alu.op = ALU_OP3_MULADD; 5875 alu.is_op3 = 1; 5876 5877 alu.src[0].sel = ctx->temp_reg; 5878 alu.src[0].chan = 0; 5879 alu.src[1].sel = ctx->temp_reg; 5880 alu.src[1].chan = 2; 5881 5882 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 5883 alu.src[2].chan = 0; 5884 alu.src[2].value = *(uint32_t *)&one_point_five; 5885 5886 alu.dst.sel = ctx->temp_reg; 5887 alu.dst.chan = 0; 5888 alu.dst.write = 1; 5889 5890 r = r600_bytecode_add_alu(ctx->bc, &alu); 5891 if (r) 5892 return r; 5893 5894 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5895 alu.op = ALU_OP3_MULADD; 5896 alu.is_op3 = 1; 5897 5898 alu.src[0].sel = ctx->temp_reg; 5899 alu.src[0].chan = 1; 5900 alu.src[1].sel = ctx->temp_reg; 5901 alu.src[1].chan = 2; 5902 5903 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 5904 alu.src[2].chan = 0; 5905 alu.src[2].value = *(uint32_t *)&one_point_five; 5906 5907 alu.dst.sel = ctx->temp_reg; 5908 alu.dst.chan = 1; 5909 alu.dst.write = 1; 5910 5911 alu.last = 1; 5912 r = r600_bytecode_add_alu(ctx->bc, &alu); 5913 if (r) 5914 return r; 5915 /* write initial compare value into Z component 5916 - W src 0 for shadow cube 5917 - X src 1 for shadow cube array */ 5918 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 5919 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 5920 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5921 alu.op = ALU_OP1_MOV; 5922 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) 5923 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 5924 else 5925 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 5926 alu.dst.sel = ctx->temp_reg; 5927 alu.dst.chan = 2; 5928 alu.dst.write = 1; 5929 alu.last = 1; 5930 r = r600_bytecode_add_alu(ctx->bc, &alu); 5931 if (r) 5932 return r; 5933 } 5934 5935 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 5936 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 5937 if (ctx->bc->chip_class >= EVERGREEN) { 5938 int mytmp = r600_get_temp(ctx); 5939 static const float eight = 8.0f; 5940 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5941 alu.op = ALU_OP1_MOV; 5942 alu.src[0].sel = ctx->temp_reg; 5943 alu.src[0].chan = 3; 5944 alu.dst.sel = mytmp; 5945 alu.dst.chan = 0; 5946 alu.dst.write = 1; 5947 alu.last = 1; 5948 r = r600_bytecode_add_alu(ctx->bc, &alu); 5949 if (r) 5950 return r; 5951 5952 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ 5953 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5954 alu.op = ALU_OP3_MULADD; 5955 alu.is_op3 = 1; 5956 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 5957 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 5958 alu.src[1].chan = 0; 5959 alu.src[1].value = *(uint32_t *)&eight; 5960 alu.src[2].sel = mytmp; 5961 alu.src[2].chan = 0; 5962 alu.dst.sel = ctx->temp_reg; 5963 alu.dst.chan = 3; 5964 alu.dst.write = 1; 5965 alu.last = 1; 5966 r = r600_bytecode_add_alu(ctx->bc, &alu); 5967 if (r) 5968 return r; 5969 } else if (ctx->bc->chip_class < EVERGREEN) { 5970 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 5971 tex.op = FETCH_OP_SET_CUBEMAP_INDEX; 5972 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 5973 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 5974 tex.src_gpr = r600_get_temp(ctx); 5975 tex.src_sel_x = 0; 5976 tex.src_sel_y = 0; 5977 tex.src_sel_z = 0; 5978 tex.src_sel_w = 0; 5979 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 5980 tex.coord_type_x = 1; 5981 tex.coord_type_y = 1; 5982 tex.coord_type_z = 1; 5983 tex.coord_type_w = 1; 5984 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5985 alu.op = ALU_OP1_MOV; 5986 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 5987 alu.dst.sel = tex.src_gpr; 5988 alu.dst.chan = 0; 5989 alu.last = 1; 5990 alu.dst.write = 1; 5991 r = r600_bytecode_add_alu(ctx->bc, &alu); 5992 if (r) 5993 return r; 5994 5995 r = r600_bytecode_add_tex(ctx->bc, &tex); 5996 if (r) 5997 return r; 5998 } 5999 6000 } 6001 6002 /* for cube forms of lod and bias we need to route things */ 6003 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || 6004 inst->Instruction.Opcode == TGSI_OPCODE_TXL || 6005 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 6006 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { 6007 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6008 alu.op = ALU_OP1_MOV; 6009 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 6010 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 6011 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 6012 else 6013 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6014 alu.dst.sel = ctx->temp_reg; 6015 alu.dst.chan = 2; 6016 alu.last = 1; 6017 alu.dst.write = 1; 6018 r = r600_bytecode_add_alu(ctx->bc, &alu); 6019 if (r) 6020 return r; 6021 } 6022 6023 src_loaded = TRUE; 6024 src_gpr = ctx->temp_reg; 6025 } 6026 6027 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 6028 int temp_h = 0, temp_v = 0; 6029 int start_val = 0; 6030 6031 /* if we've already loaded the src (i.e. CUBE don't reload it). */ 6032 if (src_loaded == TRUE) 6033 start_val = 1; 6034 else 6035 src_loaded = TRUE; 6036 for (i = start_val; i < 3; i++) { 6037 int treg = r600_get_temp(ctx); 6038 6039 if (i == 0) 6040 src_gpr = treg; 6041 else if (i == 1) 6042 temp_h = treg; 6043 else 6044 temp_v = treg; 6045 6046 for (j = 0; j < 4; j++) { 6047 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6048 alu.op = ALU_OP1_MOV; 6049 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 6050 alu.dst.sel = treg; 6051 alu.dst.chan = j; 6052 if (j == 3) 6053 alu.last = 1; 6054 alu.dst.write = 1; 6055 r = r600_bytecode_add_alu(ctx->bc, &alu); 6056 if (r) 6057 return r; 6058 } 6059 } 6060 for (i = 1; i < 3; i++) { 6061 /* set gradients h/v */ 6062 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6063 tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H : 6064 FETCH_OP_SET_GRADIENTS_V; 6065 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6066 tex.sampler_index_mode = sampler_index_mode; 6067 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6068 tex.resource_index_mode = sampler_index_mode; 6069 6070 tex.src_gpr = (i == 1) ? temp_h : temp_v; 6071 tex.src_sel_x = 0; 6072 tex.src_sel_y = 1; 6073 tex.src_sel_z = 2; 6074 tex.src_sel_w = 3; 6075 6076 tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */ 6077 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 6078 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 6079 tex.coord_type_x = 1; 6080 tex.coord_type_y = 1; 6081 tex.coord_type_z = 1; 6082 tex.coord_type_w = 1; 6083 } 6084 r = r600_bytecode_add_tex(ctx->bc, &tex); 6085 if (r) 6086 return r; 6087 } 6088 } 6089 6090 if (src_requires_loading && !src_loaded) { 6091 for (i = 0; i < 4; i++) { 6092 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6093 alu.op = ALU_OP1_MOV; 6094 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6095 alu.dst.sel = ctx->temp_reg; 6096 alu.dst.chan = i; 6097 if (i == 3) 6098 alu.last = 1; 6099 alu.dst.write = 1; 6100 r = r600_bytecode_add_alu(ctx->bc, &alu); 6101 if (r) 6102 return r; 6103 } 6104 src_loaded = TRUE; 6105 src_gpr = ctx->temp_reg; 6106 } 6107 6108 /* get offset values */ 6109 if (inst->Texture.NumOffsets) { 6110 assert(inst->Texture.NumOffsets == 1); 6111 6112 /* The texture offset feature doesn't work with the TXF instruction 6113 * and must be emulated by adding the offset to the texture coordinates. */ 6114 if (txf_add_offsets) { 6115 const struct tgsi_texture_offset *off = inst->TexOffsets; 6116 6117 switch (inst->Texture.Texture) { 6118 case TGSI_TEXTURE_3D: 6119 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6120 alu.op = ALU_OP2_ADD_INT; 6121 alu.src[0].sel = src_gpr; 6122 alu.src[0].chan = 2; 6123 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6124 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ]; 6125 alu.dst.sel = src_gpr; 6126 alu.dst.chan = 2; 6127 alu.dst.write = 1; 6128 alu.last = 1; 6129 r = r600_bytecode_add_alu(ctx->bc, &alu); 6130 if (r) 6131 return r; 6132 /* fall through */ 6133 6134 case TGSI_TEXTURE_2D: 6135 case TGSI_TEXTURE_SHADOW2D: 6136 case TGSI_TEXTURE_RECT: 6137 case TGSI_TEXTURE_SHADOWRECT: 6138 case TGSI_TEXTURE_2D_ARRAY: 6139 case TGSI_TEXTURE_SHADOW2D_ARRAY: 6140 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6141 alu.op = ALU_OP2_ADD_INT; 6142 alu.src[0].sel = src_gpr; 6143 alu.src[0].chan = 1; 6144 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6145 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY]; 6146 alu.dst.sel = src_gpr; 6147 alu.dst.chan = 1; 6148 alu.dst.write = 1; 6149 alu.last = 1; 6150 r = r600_bytecode_add_alu(ctx->bc, &alu); 6151 if (r) 6152 return r; 6153 /* fall through */ 6154 6155 case TGSI_TEXTURE_1D: 6156 case TGSI_TEXTURE_SHADOW1D: 6157 case TGSI_TEXTURE_1D_ARRAY: 6158 case TGSI_TEXTURE_SHADOW1D_ARRAY: 6159 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6160 alu.op = ALU_OP2_ADD_INT; 6161 alu.src[0].sel = src_gpr; 6162 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6163 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX]; 6164 alu.dst.sel = src_gpr; 6165 alu.dst.write = 1; 6166 alu.last = 1; 6167 r = r600_bytecode_add_alu(ctx->bc, &alu); 6168 if (r) 6169 return r; 6170 break; 6171 /* texture offsets do not apply to other texture targets */ 6172 } 6173 } else { 6174 switch (inst->Texture.Texture) { 6175 case TGSI_TEXTURE_3D: 6176 offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 6177 /* fallthrough */ 6178 case TGSI_TEXTURE_2D: 6179 case TGSI_TEXTURE_SHADOW2D: 6180 case TGSI_TEXTURE_RECT: 6181 case TGSI_TEXTURE_SHADOWRECT: 6182 case TGSI_TEXTURE_2D_ARRAY: 6183 case TGSI_TEXTURE_SHADOW2D_ARRAY: 6184 offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 6185 /* fallthrough */ 6186 case TGSI_TEXTURE_1D: 6187 case TGSI_TEXTURE_SHADOW1D: 6188 case TGSI_TEXTURE_1D_ARRAY: 6189 case TGSI_TEXTURE_SHADOW1D_ARRAY: 6190 offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 6191 } 6192 } 6193 } 6194 6195 /* Obtain the sample index for reading a compressed MSAA color texture. 6196 * To read the FMASK, we use the ldfptr instruction, which tells us 6197 * where the samples are stored. 6198 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, 6199 * which is the identity mapping. Each nibble says which physical sample 6200 * should be fetched to get that sample. 6201 * 6202 * Assume src.z contains the sample index. It should be modified like this: 6203 * src.z = (ldfptr() >> (src.z * 4)) & 0xF; 6204 * Then fetch the texel with src. 6205 */ 6206 if (read_compressed_msaa) { 6207 unsigned sample_chan = 3; 6208 unsigned temp = r600_get_temp(ctx); 6209 assert(src_loaded); 6210 6211 /* temp.w = ldfptr() */ 6212 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6213 tex.op = FETCH_OP_LD; 6214 tex.inst_mod = 1; /* to indicate this is ldfptr */ 6215 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6216 tex.sampler_index_mode = sampler_index_mode; 6217 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6218 tex.resource_index_mode = sampler_index_mode; 6219 tex.src_gpr = src_gpr; 6220 tex.dst_gpr = temp; 6221 tex.dst_sel_x = 7; /* mask out these components */ 6222 tex.dst_sel_y = 7; 6223 tex.dst_sel_z = 7; 6224 tex.dst_sel_w = 0; /* store X */ 6225 tex.src_sel_x = 0; 6226 tex.src_sel_y = 1; 6227 tex.src_sel_z = 2; 6228 tex.src_sel_w = 3; 6229 tex.offset_x = offset_x; 6230 tex.offset_y = offset_y; 6231 tex.offset_z = offset_z; 6232 r = r600_bytecode_add_tex(ctx->bc, &tex); 6233 if (r) 6234 return r; 6235 6236 /* temp.x = sample_index*4 */ 6237 if (ctx->bc->chip_class == CAYMAN) { 6238 for (i = 0 ; i < 4; i++) { 6239 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6240 alu.op = ALU_OP2_MULLO_INT; 6241 alu.src[0].sel = src_gpr; 6242 alu.src[0].chan = sample_chan; 6243 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6244 alu.src[1].value = 4; 6245 alu.dst.sel = temp; 6246 alu.dst.chan = i; 6247 alu.dst.write = i == 0; 6248 if (i == 3) 6249 alu.last = 1; 6250 r = r600_bytecode_add_alu(ctx->bc, &alu); 6251 if (r) 6252 return r; 6253 } 6254 } else { 6255 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6256 alu.op = ALU_OP2_MULLO_INT; 6257 alu.src[0].sel = src_gpr; 6258 alu.src[0].chan = sample_chan; 6259 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6260 alu.src[1].value = 4; 6261 alu.dst.sel = temp; 6262 alu.dst.chan = 0; 6263 alu.dst.write = 1; 6264 alu.last = 1; 6265 r = r600_bytecode_add_alu(ctx->bc, &alu); 6266 if (r) 6267 return r; 6268 } 6269 6270 /* sample_index = temp.w >> temp.x */ 6271 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6272 alu.op = ALU_OP2_LSHR_INT; 6273 alu.src[0].sel = temp; 6274 alu.src[0].chan = 3; 6275 alu.src[1].sel = temp; 6276 alu.src[1].chan = 0; 6277 alu.dst.sel = src_gpr; 6278 alu.dst.chan = sample_chan; 6279 alu.dst.write = 1; 6280 alu.last = 1; 6281 r = r600_bytecode_add_alu(ctx->bc, &alu); 6282 if (r) 6283 return r; 6284 6285 /* sample_index & 0xF */ 6286 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6287 alu.op = ALU_OP2_AND_INT; 6288 alu.src[0].sel = src_gpr; 6289 alu.src[0].chan = sample_chan; 6290 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 6291 alu.src[1].value = 0xF; 6292 alu.dst.sel = src_gpr; 6293 alu.dst.chan = sample_chan; 6294 alu.dst.write = 1; 6295 alu.last = 1; 6296 r = r600_bytecode_add_alu(ctx->bc, &alu); 6297 if (r) 6298 return r; 6299#if 0 6300 /* visualize the FMASK */ 6301 for (i = 0; i < 4; i++) { 6302 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6303 alu.op = ALU_OP1_INT_TO_FLT; 6304 alu.src[0].sel = src_gpr; 6305 alu.src[0].chan = sample_chan; 6306 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 6307 alu.dst.chan = i; 6308 alu.dst.write = 1; 6309 alu.last = 1; 6310 r = r600_bytecode_add_alu(ctx->bc, &alu); 6311 if (r) 6312 return r; 6313 } 6314 return 0; 6315#endif 6316 } 6317 6318 /* does this shader want a num layers from TXQ for a cube array? */ 6319 if (has_txq_cube_array_z) { 6320 int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6321 6322 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6323 alu.op = ALU_OP1_MOV; 6324 6325 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 6326 if (ctx->bc->chip_class >= EVERGREEN) { 6327 /* channel 1 or 3 of each word */ 6328 alu.src[0].sel += (id / 2); 6329 alu.src[0].chan = ((id % 2) * 2) + 1; 6330 } else { 6331 /* r600 we have them at channel 2 of the second dword */ 6332 alu.src[0].sel += (id * 2) + 1; 6333 alu.src[0].chan = 2; 6334 } 6335 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6336 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 6337 alu.last = 1; 6338 r = r600_bytecode_add_alu(ctx->bc, &alu); 6339 if (r) 6340 return r; 6341 /* disable writemask from texture instruction */ 6342 inst->Dst[0].Register.WriteMask &= ~4; 6343 } 6344 6345 opcode = ctx->inst_info->op; 6346 if (opcode == FETCH_OP_GATHER4 && 6347 inst->TexOffsets[0].File != TGSI_FILE_NULL && 6348 inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) { 6349 opcode = FETCH_OP_GATHER4_O; 6350 6351 /* GATHER4_O/GATHER4_C_O use offset values loaded by 6352 SET_TEXTURE_OFFSETS instruction. The immediate offset values 6353 encoded in the instruction are ignored. */ 6354 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6355 tex.op = FETCH_OP_SET_TEXTURE_OFFSETS; 6356 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6357 tex.sampler_index_mode = sampler_index_mode; 6358 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6359 tex.resource_index_mode = sampler_index_mode; 6360 6361 tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index; 6362 tex.src_sel_x = inst->TexOffsets[0].SwizzleX; 6363 tex.src_sel_y = inst->TexOffsets[0].SwizzleY; 6364 tex.src_sel_z = inst->TexOffsets[0].SwizzleZ; 6365 tex.src_sel_w = 4; 6366 6367 tex.dst_sel_x = 7; 6368 tex.dst_sel_y = 7; 6369 tex.dst_sel_z = 7; 6370 tex.dst_sel_w = 7; 6371 6372 r = r600_bytecode_add_tex(ctx->bc, &tex); 6373 if (r) 6374 return r; 6375 } 6376 6377 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 6378 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 6379 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 6380 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6381 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 6382 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 6383 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6384 switch (opcode) { 6385 case FETCH_OP_SAMPLE: 6386 opcode = FETCH_OP_SAMPLE_C; 6387 break; 6388 case FETCH_OP_SAMPLE_L: 6389 opcode = FETCH_OP_SAMPLE_C_L; 6390 break; 6391 case FETCH_OP_SAMPLE_LB: 6392 opcode = FETCH_OP_SAMPLE_C_LB; 6393 break; 6394 case FETCH_OP_SAMPLE_G: 6395 opcode = FETCH_OP_SAMPLE_C_G; 6396 break; 6397 /* Texture gather variants */ 6398 case FETCH_OP_GATHER4: 6399 opcode = FETCH_OP_GATHER4_C; 6400 break; 6401 case FETCH_OP_GATHER4_O: 6402 opcode = FETCH_OP_GATHER4_C_O; 6403 break; 6404 } 6405 } 6406 6407 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6408 tex.op = opcode; 6409 6410 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 6411 tex.sampler_index_mode = sampler_index_mode; 6412 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 6413 tex.resource_index_mode = sampler_index_mode; 6414 tex.src_gpr = src_gpr; 6415 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 6416 6417 if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE || 6418 inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) { 6419 tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */ 6420 } 6421 6422 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) { 6423 int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]; 6424 tex.inst_mod = texture_component_select; 6425 6426 if (ctx->bc->chip_class == CAYMAN) { 6427 /* GATHER4 result order is different from TGSI TG4 */ 6428 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 0 : 7; 6429 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 1 : 7; 6430 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 2 : 7; 6431 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 6432 } else { 6433 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 6434 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 6435 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 6436 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 6437 } 6438 } 6439 else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) { 6440 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 6441 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 6442 tex.dst_sel_z = 7; 6443 tex.dst_sel_w = 7; 6444 } 6445 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 6446 tex.dst_sel_x = 3; 6447 tex.dst_sel_y = 7; 6448 tex.dst_sel_z = 7; 6449 tex.dst_sel_w = 7; 6450 } 6451 else { 6452 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 6453 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 6454 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 6455 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 6456 } 6457 6458 6459 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ || 6460 inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 6461 tex.src_sel_x = 4; 6462 tex.src_sel_y = 4; 6463 tex.src_sel_z = 4; 6464 tex.src_sel_w = 4; 6465 } else if (src_loaded) { 6466 tex.src_sel_x = 0; 6467 tex.src_sel_y = 1; 6468 tex.src_sel_z = 2; 6469 tex.src_sel_w = 3; 6470 } else { 6471 tex.src_sel_x = ctx->src[0].swizzle[0]; 6472 tex.src_sel_y = ctx->src[0].swizzle[1]; 6473 tex.src_sel_z = ctx->src[0].swizzle[2]; 6474 tex.src_sel_w = ctx->src[0].swizzle[3]; 6475 tex.src_rel = ctx->src[0].rel; 6476 } 6477 6478 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || 6479 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6480 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6481 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 6482 tex.src_sel_x = 1; 6483 tex.src_sel_y = 0; 6484 tex.src_sel_z = 3; 6485 tex.src_sel_w = 2; /* route Z compare or Lod value into W */ 6486 } 6487 6488 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 6489 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 6490 tex.coord_type_x = 1; 6491 tex.coord_type_y = 1; 6492 } 6493 tex.coord_type_z = 1; 6494 tex.coord_type_w = 1; 6495 6496 tex.offset_x = offset_x; 6497 tex.offset_y = offset_y; 6498 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && 6499 (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 6500 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) { 6501 tex.offset_z = 0; 6502 } 6503 else { 6504 tex.offset_z = offset_z; 6505 } 6506 6507 /* Put the depth for comparison in W. 6508 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 6509 * Some instructions expect the depth in Z. */ 6510 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 6511 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 6512 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 6513 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 6514 opcode != FETCH_OP_SAMPLE_C_L && 6515 opcode != FETCH_OP_SAMPLE_C_LB) { 6516 tex.src_sel_w = tex.src_sel_z; 6517 } 6518 6519 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 6520 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 6521 if (opcode == FETCH_OP_SAMPLE_C_L || 6522 opcode == FETCH_OP_SAMPLE_C_LB) { 6523 /* the array index is read from Y */ 6524 tex.coord_type_y = 0; 6525 } else { 6526 /* the array index is read from Z */ 6527 tex.coord_type_z = 0; 6528 tex.src_sel_z = tex.src_sel_y; 6529 } 6530 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 6531 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 6532 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6533 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 6534 (ctx->bc->chip_class >= EVERGREEN))) 6535 /* the array index is read from Z */ 6536 tex.coord_type_z = 0; 6537 6538 /* mask unused source components */ 6539 if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { 6540 switch (inst->Texture.Texture) { 6541 case TGSI_TEXTURE_2D: 6542 case TGSI_TEXTURE_RECT: 6543 tex.src_sel_z = 7; 6544 tex.src_sel_w = 7; 6545 break; 6546 case TGSI_TEXTURE_1D_ARRAY: 6547 tex.src_sel_y = 7; 6548 tex.src_sel_w = 7; 6549 break; 6550 case TGSI_TEXTURE_1D: 6551 tex.src_sel_y = 7; 6552 tex.src_sel_z = 7; 6553 tex.src_sel_w = 7; 6554 break; 6555 } 6556 } 6557 6558 r = r600_bytecode_add_tex(ctx->bc, &tex); 6559 if (r) 6560 return r; 6561 6562 /* add shadow ambient support - gallium doesn't do it yet */ 6563 return 0; 6564} 6565 6566static int tgsi_lrp(struct r600_shader_ctx *ctx) 6567{ 6568 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6569 struct r600_bytecode_alu alu; 6570 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6571 unsigned i, temp_regs[2]; 6572 int r; 6573 6574 /* optimize if it's just an equal balance */ 6575 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 6576 for (i = 0; i < lasti + 1; i++) { 6577 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6578 continue; 6579 6580 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6581 alu.op = ALU_OP2_ADD; 6582 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 6583 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6584 alu.omod = 3; 6585 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6586 alu.dst.chan = i; 6587 if (i == lasti) { 6588 alu.last = 1; 6589 } 6590 r = r600_bytecode_add_alu(ctx->bc, &alu); 6591 if (r) 6592 return r; 6593 } 6594 return 0; 6595 } 6596 6597 /* 1 - src0 */ 6598 for (i = 0; i < lasti + 1; i++) { 6599 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6600 continue; 6601 6602 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6603 alu.op = ALU_OP2_ADD; 6604 alu.src[0].sel = V_SQ_ALU_SRC_1; 6605 alu.src[0].chan = 0; 6606 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6607 r600_bytecode_src_toggle_neg(&alu.src[1]); 6608 alu.dst.sel = ctx->temp_reg; 6609 alu.dst.chan = i; 6610 if (i == lasti) { 6611 alu.last = 1; 6612 } 6613 alu.dst.write = 1; 6614 r = r600_bytecode_add_alu(ctx->bc, &alu); 6615 if (r) 6616 return r; 6617 } 6618 6619 /* (1 - src0) * src2 */ 6620 for (i = 0; i < lasti + 1; i++) { 6621 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6622 continue; 6623 6624 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6625 alu.op = ALU_OP2_MUL; 6626 alu.src[0].sel = ctx->temp_reg; 6627 alu.src[0].chan = i; 6628 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6629 alu.dst.sel = ctx->temp_reg; 6630 alu.dst.chan = i; 6631 if (i == lasti) { 6632 alu.last = 1; 6633 } 6634 alu.dst.write = 1; 6635 r = r600_bytecode_add_alu(ctx->bc, &alu); 6636 if (r) 6637 return r; 6638 } 6639 6640 /* src0 * src1 + (1 - src0) * src2 */ 6641 if (ctx->src[0].abs) 6642 temp_regs[0] = r600_get_temp(ctx); 6643 else 6644 temp_regs[0] = 0; 6645 if (ctx->src[1].abs) 6646 temp_regs[1] = r600_get_temp(ctx); 6647 else 6648 temp_regs[1] = 0; 6649 6650 for (i = 0; i < lasti + 1; i++) { 6651 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6652 continue; 6653 6654 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6655 alu.op = ALU_OP3_MULADD; 6656 alu.is_op3 = 1; 6657 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 6658 if (r) 6659 return r; 6660 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[1]); 6661 if (r) 6662 return r; 6663 alu.src[2].sel = ctx->temp_reg; 6664 alu.src[2].chan = i; 6665 6666 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6667 alu.dst.chan = i; 6668 if (i == lasti) { 6669 alu.last = 1; 6670 } 6671 r = r600_bytecode_add_alu(ctx->bc, &alu); 6672 if (r) 6673 return r; 6674 } 6675 return 0; 6676} 6677 6678static int tgsi_cmp(struct r600_shader_ctx *ctx) 6679{ 6680 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6681 struct r600_bytecode_alu alu; 6682 int i, r, j; 6683 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6684 int temp_regs[3]; 6685 6686 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6687 temp_regs[j] = 0; 6688 if (ctx->src[j].abs) 6689 temp_regs[j] = r600_get_temp(ctx); 6690 } 6691 6692 for (i = 0; i < lasti + 1; i++) { 6693 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6694 continue; 6695 6696 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6697 alu.op = ALU_OP3_CNDGE; 6698 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 6699 if (r) 6700 return r; 6701 r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]); 6702 if (r) 6703 return r; 6704 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]); 6705 if (r) 6706 return r; 6707 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6708 alu.dst.chan = i; 6709 alu.dst.write = 1; 6710 alu.is_op3 = 1; 6711 if (i == lasti) 6712 alu.last = 1; 6713 r = r600_bytecode_add_alu(ctx->bc, &alu); 6714 if (r) 6715 return r; 6716 } 6717 return 0; 6718} 6719 6720static int tgsi_ucmp(struct r600_shader_ctx *ctx) 6721{ 6722 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6723 struct r600_bytecode_alu alu; 6724 int i, r; 6725 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6726 6727 for (i = 0; i < lasti + 1; i++) { 6728 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6729 continue; 6730 6731 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6732 alu.op = ALU_OP3_CNDE_INT; 6733 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6734 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6735 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 6736 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6737 alu.dst.chan = i; 6738 alu.dst.write = 1; 6739 alu.is_op3 = 1; 6740 if (i == lasti) 6741 alu.last = 1; 6742 r = r600_bytecode_add_alu(ctx->bc, &alu); 6743 if (r) 6744 return r; 6745 } 6746 return 0; 6747} 6748 6749static int tgsi_xpd(struct r600_shader_ctx *ctx) 6750{ 6751 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6752 static const unsigned int src0_swizzle[] = {2, 0, 1}; 6753 static const unsigned int src1_swizzle[] = {1, 2, 0}; 6754 struct r600_bytecode_alu alu; 6755 uint32_t use_temp = 0; 6756 int i, r; 6757 6758 if (inst->Dst[0].Register.WriteMask != 0xf) 6759 use_temp = 1; 6760 6761 for (i = 0; i < 4; i++) { 6762 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6763 alu.op = ALU_OP2_MUL; 6764 if (i < 3) { 6765 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 6766 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 6767 } else { 6768 alu.src[0].sel = V_SQ_ALU_SRC_0; 6769 alu.src[0].chan = i; 6770 alu.src[1].sel = V_SQ_ALU_SRC_0; 6771 alu.src[1].chan = i; 6772 } 6773 6774 alu.dst.sel = ctx->temp_reg; 6775 alu.dst.chan = i; 6776 alu.dst.write = 1; 6777 6778 if (i == 3) 6779 alu.last = 1; 6780 r = r600_bytecode_add_alu(ctx->bc, &alu); 6781 if (r) 6782 return r; 6783 } 6784 6785 for (i = 0; i < 4; i++) { 6786 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6787 alu.op = ALU_OP3_MULADD; 6788 6789 if (i < 3) { 6790 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 6791 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 6792 } else { 6793 alu.src[0].sel = V_SQ_ALU_SRC_0; 6794 alu.src[0].chan = i; 6795 alu.src[1].sel = V_SQ_ALU_SRC_0; 6796 alu.src[1].chan = i; 6797 } 6798 6799 alu.src[2].sel = ctx->temp_reg; 6800 alu.src[2].neg = 1; 6801 alu.src[2].chan = i; 6802 6803 if (use_temp) 6804 alu.dst.sel = ctx->temp_reg; 6805 else 6806 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6807 alu.dst.chan = i; 6808 alu.dst.write = 1; 6809 alu.is_op3 = 1; 6810 if (i == 3) 6811 alu.last = 1; 6812 r = r600_bytecode_add_alu(ctx->bc, &alu); 6813 if (r) 6814 return r; 6815 } 6816 if (use_temp) 6817 return tgsi_helper_copy(ctx, inst); 6818 return 0; 6819} 6820 6821static int tgsi_exp(struct r600_shader_ctx *ctx) 6822{ 6823 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6824 struct r600_bytecode_alu alu; 6825 int r; 6826 int i; 6827 6828 /* result.x = 2^floor(src); */ 6829 if (inst->Dst[0].Register.WriteMask & 1) { 6830 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6831 6832 alu.op = ALU_OP1_FLOOR; 6833 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6834 6835 alu.dst.sel = ctx->temp_reg; 6836 alu.dst.chan = 0; 6837 alu.dst.write = 1; 6838 alu.last = 1; 6839 r = r600_bytecode_add_alu(ctx->bc, &alu); 6840 if (r) 6841 return r; 6842 6843 if (ctx->bc->chip_class == CAYMAN) { 6844 for (i = 0; i < 3; i++) { 6845 alu.op = ALU_OP1_EXP_IEEE; 6846 alu.src[0].sel = ctx->temp_reg; 6847 alu.src[0].chan = 0; 6848 6849 alu.dst.sel = ctx->temp_reg; 6850 alu.dst.chan = i; 6851 alu.dst.write = i == 0; 6852 alu.last = i == 2; 6853 r = r600_bytecode_add_alu(ctx->bc, &alu); 6854 if (r) 6855 return r; 6856 } 6857 } else { 6858 alu.op = ALU_OP1_EXP_IEEE; 6859 alu.src[0].sel = ctx->temp_reg; 6860 alu.src[0].chan = 0; 6861 6862 alu.dst.sel = ctx->temp_reg; 6863 alu.dst.chan = 0; 6864 alu.dst.write = 1; 6865 alu.last = 1; 6866 r = r600_bytecode_add_alu(ctx->bc, &alu); 6867 if (r) 6868 return r; 6869 } 6870 } 6871 6872 /* result.y = tmp - floor(tmp); */ 6873 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 6874 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6875 6876 alu.op = ALU_OP1_FRACT; 6877 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6878 6879 alu.dst.sel = ctx->temp_reg; 6880#if 0 6881 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6882 if (r) 6883 return r; 6884#endif 6885 alu.dst.write = 1; 6886 alu.dst.chan = 1; 6887 6888 alu.last = 1; 6889 6890 r = r600_bytecode_add_alu(ctx->bc, &alu); 6891 if (r) 6892 return r; 6893 } 6894 6895 /* result.z = RoughApprox2ToX(tmp);*/ 6896 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 6897 if (ctx->bc->chip_class == CAYMAN) { 6898 for (i = 0; i < 3; i++) { 6899 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6900 alu.op = ALU_OP1_EXP_IEEE; 6901 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6902 6903 alu.dst.sel = ctx->temp_reg; 6904 alu.dst.chan = i; 6905 if (i == 2) { 6906 alu.dst.write = 1; 6907 alu.last = 1; 6908 } 6909 6910 r = r600_bytecode_add_alu(ctx->bc, &alu); 6911 if (r) 6912 return r; 6913 } 6914 } else { 6915 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6916 alu.op = ALU_OP1_EXP_IEEE; 6917 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6918 6919 alu.dst.sel = ctx->temp_reg; 6920 alu.dst.write = 1; 6921 alu.dst.chan = 2; 6922 6923 alu.last = 1; 6924 6925 r = r600_bytecode_add_alu(ctx->bc, &alu); 6926 if (r) 6927 return r; 6928 } 6929 } 6930 6931 /* result.w = 1.0;*/ 6932 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 6933 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6934 6935 alu.op = ALU_OP1_MOV; 6936 alu.src[0].sel = V_SQ_ALU_SRC_1; 6937 alu.src[0].chan = 0; 6938 6939 alu.dst.sel = ctx->temp_reg; 6940 alu.dst.chan = 3; 6941 alu.dst.write = 1; 6942 alu.last = 1; 6943 r = r600_bytecode_add_alu(ctx->bc, &alu); 6944 if (r) 6945 return r; 6946 } 6947 return tgsi_helper_copy(ctx, inst); 6948} 6949 6950static int tgsi_log(struct r600_shader_ctx *ctx) 6951{ 6952 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6953 struct r600_bytecode_alu alu; 6954 int r; 6955 int i; 6956 6957 /* result.x = floor(log2(|src|)); */ 6958 if (inst->Dst[0].Register.WriteMask & 1) { 6959 if (ctx->bc->chip_class == CAYMAN) { 6960 for (i = 0; i < 3; i++) { 6961 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6962 6963 alu.op = ALU_OP1_LOG_IEEE; 6964 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6965 r600_bytecode_src_set_abs(&alu.src[0]); 6966 6967 alu.dst.sel = ctx->temp_reg; 6968 alu.dst.chan = i; 6969 if (i == 0) 6970 alu.dst.write = 1; 6971 if (i == 2) 6972 alu.last = 1; 6973 r = r600_bytecode_add_alu(ctx->bc, &alu); 6974 if (r) 6975 return r; 6976 } 6977 6978 } else { 6979 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6980 6981 alu.op = ALU_OP1_LOG_IEEE; 6982 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 6983 r600_bytecode_src_set_abs(&alu.src[0]); 6984 6985 alu.dst.sel = ctx->temp_reg; 6986 alu.dst.chan = 0; 6987 alu.dst.write = 1; 6988 alu.last = 1; 6989 r = r600_bytecode_add_alu(ctx->bc, &alu); 6990 if (r) 6991 return r; 6992 } 6993 6994 alu.op = ALU_OP1_FLOOR; 6995 alu.src[0].sel = ctx->temp_reg; 6996 alu.src[0].chan = 0; 6997 6998 alu.dst.sel = ctx->temp_reg; 6999 alu.dst.chan = 0; 7000 alu.dst.write = 1; 7001 alu.last = 1; 7002 7003 r = r600_bytecode_add_alu(ctx->bc, &alu); 7004 if (r) 7005 return r; 7006 } 7007 7008 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 7009 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 7010 7011 if (ctx->bc->chip_class == CAYMAN) { 7012 for (i = 0; i < 3; i++) { 7013 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7014 7015 alu.op = ALU_OP1_LOG_IEEE; 7016 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7017 r600_bytecode_src_set_abs(&alu.src[0]); 7018 7019 alu.dst.sel = ctx->temp_reg; 7020 alu.dst.chan = i; 7021 if (i == 1) 7022 alu.dst.write = 1; 7023 if (i == 2) 7024 alu.last = 1; 7025 7026 r = r600_bytecode_add_alu(ctx->bc, &alu); 7027 if (r) 7028 return r; 7029 } 7030 } else { 7031 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7032 7033 alu.op = ALU_OP1_LOG_IEEE; 7034 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7035 r600_bytecode_src_set_abs(&alu.src[0]); 7036 7037 alu.dst.sel = ctx->temp_reg; 7038 alu.dst.chan = 1; 7039 alu.dst.write = 1; 7040 alu.last = 1; 7041 7042 r = r600_bytecode_add_alu(ctx->bc, &alu); 7043 if (r) 7044 return r; 7045 } 7046 7047 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7048 7049 alu.op = ALU_OP1_FLOOR; 7050 alu.src[0].sel = ctx->temp_reg; 7051 alu.src[0].chan = 1; 7052 7053 alu.dst.sel = ctx->temp_reg; 7054 alu.dst.chan = 1; 7055 alu.dst.write = 1; 7056 alu.last = 1; 7057 7058 r = r600_bytecode_add_alu(ctx->bc, &alu); 7059 if (r) 7060 return r; 7061 7062 if (ctx->bc->chip_class == CAYMAN) { 7063 for (i = 0; i < 3; i++) { 7064 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7065 alu.op = ALU_OP1_EXP_IEEE; 7066 alu.src[0].sel = ctx->temp_reg; 7067 alu.src[0].chan = 1; 7068 7069 alu.dst.sel = ctx->temp_reg; 7070 alu.dst.chan = i; 7071 if (i == 1) 7072 alu.dst.write = 1; 7073 if (i == 2) 7074 alu.last = 1; 7075 7076 r = r600_bytecode_add_alu(ctx->bc, &alu); 7077 if (r) 7078 return r; 7079 } 7080 } else { 7081 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7082 alu.op = ALU_OP1_EXP_IEEE; 7083 alu.src[0].sel = ctx->temp_reg; 7084 alu.src[0].chan = 1; 7085 7086 alu.dst.sel = ctx->temp_reg; 7087 alu.dst.chan = 1; 7088 alu.dst.write = 1; 7089 alu.last = 1; 7090 7091 r = r600_bytecode_add_alu(ctx->bc, &alu); 7092 if (r) 7093 return r; 7094 } 7095 7096 if (ctx->bc->chip_class == CAYMAN) { 7097 for (i = 0; i < 3; i++) { 7098 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7099 alu.op = ALU_OP1_RECIP_IEEE; 7100 alu.src[0].sel = ctx->temp_reg; 7101 alu.src[0].chan = 1; 7102 7103 alu.dst.sel = ctx->temp_reg; 7104 alu.dst.chan = i; 7105 if (i == 1) 7106 alu.dst.write = 1; 7107 if (i == 2) 7108 alu.last = 1; 7109 7110 r = r600_bytecode_add_alu(ctx->bc, &alu); 7111 if (r) 7112 return r; 7113 } 7114 } else { 7115 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7116 alu.op = ALU_OP1_RECIP_IEEE; 7117 alu.src[0].sel = ctx->temp_reg; 7118 alu.src[0].chan = 1; 7119 7120 alu.dst.sel = ctx->temp_reg; 7121 alu.dst.chan = 1; 7122 alu.dst.write = 1; 7123 alu.last = 1; 7124 7125 r = r600_bytecode_add_alu(ctx->bc, &alu); 7126 if (r) 7127 return r; 7128 } 7129 7130 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7131 7132 alu.op = ALU_OP2_MUL; 7133 7134 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7135 r600_bytecode_src_set_abs(&alu.src[0]); 7136 7137 alu.src[1].sel = ctx->temp_reg; 7138 alu.src[1].chan = 1; 7139 7140 alu.dst.sel = ctx->temp_reg; 7141 alu.dst.chan = 1; 7142 alu.dst.write = 1; 7143 alu.last = 1; 7144 7145 r = r600_bytecode_add_alu(ctx->bc, &alu); 7146 if (r) 7147 return r; 7148 } 7149 7150 /* result.z = log2(|src|);*/ 7151 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 7152 if (ctx->bc->chip_class == CAYMAN) { 7153 for (i = 0; i < 3; i++) { 7154 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7155 7156 alu.op = ALU_OP1_LOG_IEEE; 7157 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7158 r600_bytecode_src_set_abs(&alu.src[0]); 7159 7160 alu.dst.sel = ctx->temp_reg; 7161 if (i == 2) 7162 alu.dst.write = 1; 7163 alu.dst.chan = i; 7164 if (i == 2) 7165 alu.last = 1; 7166 7167 r = r600_bytecode_add_alu(ctx->bc, &alu); 7168 if (r) 7169 return r; 7170 } 7171 } else { 7172 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7173 7174 alu.op = ALU_OP1_LOG_IEEE; 7175 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7176 r600_bytecode_src_set_abs(&alu.src[0]); 7177 7178 alu.dst.sel = ctx->temp_reg; 7179 alu.dst.write = 1; 7180 alu.dst.chan = 2; 7181 alu.last = 1; 7182 7183 r = r600_bytecode_add_alu(ctx->bc, &alu); 7184 if (r) 7185 return r; 7186 } 7187 } 7188 7189 /* result.w = 1.0; */ 7190 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 7191 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7192 7193 alu.op = ALU_OP1_MOV; 7194 alu.src[0].sel = V_SQ_ALU_SRC_1; 7195 alu.src[0].chan = 0; 7196 7197 alu.dst.sel = ctx->temp_reg; 7198 alu.dst.chan = 3; 7199 alu.dst.write = 1; 7200 alu.last = 1; 7201 7202 r = r600_bytecode_add_alu(ctx->bc, &alu); 7203 if (r) 7204 return r; 7205 } 7206 7207 return tgsi_helper_copy(ctx, inst); 7208} 7209 7210static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 7211{ 7212 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7213 struct r600_bytecode_alu alu; 7214 int r; 7215 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7216 unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index); 7217 7218 assert(inst->Dst[0].Register.Index < 3); 7219 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7220 7221 switch (inst->Instruction.Opcode) { 7222 case TGSI_OPCODE_ARL: 7223 alu.op = ALU_OP1_FLT_TO_INT_FLOOR; 7224 break; 7225 case TGSI_OPCODE_ARR: 7226 alu.op = ALU_OP1_FLT_TO_INT; 7227 break; 7228 case TGSI_OPCODE_UARL: 7229 alu.op = ALU_OP1_MOV; 7230 break; 7231 default: 7232 assert(0); 7233 return -1; 7234 } 7235 7236 for (i = 0; i <= lasti; ++i) { 7237 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7238 continue; 7239 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7240 alu.last = i == lasti; 7241 alu.dst.sel = reg; 7242 alu.dst.chan = i; 7243 alu.dst.write = 1; 7244 r = r600_bytecode_add_alu(ctx->bc, &alu); 7245 if (r) 7246 return r; 7247 } 7248 7249 if (inst->Dst[0].Register.Index > 0) 7250 ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0; 7251 else 7252 ctx->bc->ar_loaded = 0; 7253 7254 return 0; 7255} 7256static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 7257{ 7258 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7259 struct r600_bytecode_alu alu; 7260 int r; 7261 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7262 7263 switch (inst->Instruction.Opcode) { 7264 case TGSI_OPCODE_ARL: 7265 memset(&alu, 0, sizeof(alu)); 7266 alu.op = ALU_OP1_FLOOR; 7267 alu.dst.sel = ctx->bc->ar_reg; 7268 alu.dst.write = 1; 7269 for (i = 0; i <= lasti; ++i) { 7270 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 7271 alu.dst.chan = i; 7272 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7273 alu.last = i == lasti; 7274 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 7275 return r; 7276 } 7277 } 7278 7279 memset(&alu, 0, sizeof(alu)); 7280 alu.op = ALU_OP1_FLT_TO_INT; 7281 alu.src[0].sel = ctx->bc->ar_reg; 7282 alu.dst.sel = ctx->bc->ar_reg; 7283 alu.dst.write = 1; 7284 /* FLT_TO_INT is trans-only on r600/r700 */ 7285 alu.last = TRUE; 7286 for (i = 0; i <= lasti; ++i) { 7287 alu.dst.chan = i; 7288 alu.src[0].chan = i; 7289 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 7290 return r; 7291 } 7292 break; 7293 case TGSI_OPCODE_ARR: 7294 memset(&alu, 0, sizeof(alu)); 7295 alu.op = ALU_OP1_FLT_TO_INT; 7296 alu.dst.sel = ctx->bc->ar_reg; 7297 alu.dst.write = 1; 7298 /* FLT_TO_INT is trans-only on r600/r700 */ 7299 alu.last = TRUE; 7300 for (i = 0; i <= lasti; ++i) { 7301 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 7302 alu.dst.chan = i; 7303 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7304 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 7305 return r; 7306 } 7307 } 7308 break; 7309 case TGSI_OPCODE_UARL: 7310 memset(&alu, 0, sizeof(alu)); 7311 alu.op = ALU_OP1_MOV; 7312 alu.dst.sel = ctx->bc->ar_reg; 7313 alu.dst.write = 1; 7314 for (i = 0; i <= lasti; ++i) { 7315 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 7316 alu.dst.chan = i; 7317 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7318 alu.last = i == lasti; 7319 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 7320 return r; 7321 } 7322 } 7323 break; 7324 default: 7325 assert(0); 7326 return -1; 7327 } 7328 7329 ctx->bc->ar_loaded = 0; 7330 return 0; 7331} 7332 7333static int tgsi_opdst(struct r600_shader_ctx *ctx) 7334{ 7335 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7336 struct r600_bytecode_alu alu; 7337 int i, r = 0; 7338 7339 for (i = 0; i < 4; i++) { 7340 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7341 7342 alu.op = ALU_OP2_MUL; 7343 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7344 7345 if (i == 0 || i == 3) { 7346 alu.src[0].sel = V_SQ_ALU_SRC_1; 7347 } else { 7348 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7349 } 7350 7351 if (i == 0 || i == 2) { 7352 alu.src[1].sel = V_SQ_ALU_SRC_1; 7353 } else { 7354 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 7355 } 7356 if (i == 3) 7357 alu.last = 1; 7358 r = r600_bytecode_add_alu(ctx->bc, &alu); 7359 if (r) 7360 return r; 7361 } 7362 return 0; 7363} 7364 7365static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type) 7366{ 7367 struct r600_bytecode_alu alu; 7368 int r; 7369 7370 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7371 alu.op = opcode; 7372 alu.execute_mask = 1; 7373 alu.update_pred = 1; 7374 7375 alu.dst.sel = ctx->temp_reg; 7376 alu.dst.write = 1; 7377 alu.dst.chan = 0; 7378 7379 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7380 alu.src[1].sel = V_SQ_ALU_SRC_0; 7381 alu.src[1].chan = 0; 7382 7383 alu.last = 1; 7384 7385 r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); 7386 if (r) 7387 return r; 7388 return 0; 7389} 7390 7391static int pops(struct r600_shader_ctx *ctx, int pops) 7392{ 7393 unsigned force_pop = ctx->bc->force_add_cf; 7394 7395 if (!force_pop) { 7396 int alu_pop = 3; 7397 if (ctx->bc->cf_last) { 7398 if (ctx->bc->cf_last->op == CF_OP_ALU) 7399 alu_pop = 0; 7400 else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER) 7401 alu_pop = 1; 7402 } 7403 alu_pop += pops; 7404 if (alu_pop == 1) { 7405 ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER; 7406 ctx->bc->force_add_cf = 1; 7407 } else if (alu_pop == 2) { 7408 ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER; 7409 ctx->bc->force_add_cf = 1; 7410 } else { 7411 force_pop = 1; 7412 } 7413 } 7414 7415 if (force_pop) { 7416 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 7417 ctx->bc->cf_last->pop_count = pops; 7418 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 7419 } 7420 7421 return 0; 7422} 7423 7424static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, 7425 unsigned reason) 7426{ 7427 struct r600_stack_info *stack = &ctx->bc->stack; 7428 unsigned elements, entries; 7429 7430 unsigned entry_size = stack->entry_size; 7431 7432 elements = (stack->loop + stack->push_wqm ) * entry_size; 7433 elements += stack->push; 7434 7435 switch (ctx->bc->chip_class) { 7436 case R600: 7437 case R700: 7438 /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on 7439 * the stack must be reserved to hold the current active/continue 7440 * masks */ 7441 if (reason == FC_PUSH_VPM) { 7442 elements += 2; 7443 } 7444 break; 7445 7446 case CAYMAN: 7447 /* r9xx: any stack operation on empty stack consumes 2 additional 7448 * elements */ 7449 elements += 2; 7450 7451 /* fallthrough */ 7452 /* FIXME: do the two elements added above cover the cases for the 7453 * r8xx+ below? */ 7454 7455 case EVERGREEN: 7456 /* r8xx+: 2 extra elements are not always required, but one extra 7457 * element must be added for each of the following cases: 7458 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest 7459 * stack usage. 7460 * (Currently we don't use ALU_ELSE_AFTER.) 7461 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM 7462 * PUSH instruction executed. 7463 * 7464 * NOTE: it seems we also need to reserve additional element in some 7465 * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, 7466 * then STACK_SIZE should be 2 instead of 1 */ 7467 if (reason == FC_PUSH_VPM) { 7468 elements += 1; 7469 } 7470 break; 7471 7472 default: 7473 assert(0); 7474 break; 7475 } 7476 7477 /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4 7478 * for all chips, so we use 4 in the final formula, not the real entry_size 7479 * for the chip */ 7480 entry_size = 4; 7481 7482 entries = (elements + (entry_size - 1)) / entry_size; 7483 7484 if (entries > stack->max_entries) 7485 stack->max_entries = entries; 7486} 7487 7488static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) 7489{ 7490 switch(reason) { 7491 case FC_PUSH_VPM: 7492 --ctx->bc->stack.push; 7493 assert(ctx->bc->stack.push >= 0); 7494 break; 7495 case FC_PUSH_WQM: 7496 --ctx->bc->stack.push_wqm; 7497 assert(ctx->bc->stack.push_wqm >= 0); 7498 break; 7499 case FC_LOOP: 7500 --ctx->bc->stack.loop; 7501 assert(ctx->bc->stack.loop >= 0); 7502 break; 7503 default: 7504 assert(0); 7505 break; 7506 } 7507} 7508 7509static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) 7510{ 7511 switch (reason) { 7512 case FC_PUSH_VPM: 7513 ++ctx->bc->stack.push; 7514 break; 7515 case FC_PUSH_WQM: 7516 ++ctx->bc->stack.push_wqm; 7517 case FC_LOOP: 7518 ++ctx->bc->stack.loop; 7519 break; 7520 default: 7521 assert(0); 7522 } 7523 7524 callstack_update_max_depth(ctx, reason); 7525} 7526 7527static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 7528{ 7529 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 7530 7531 sp->mid = realloc((void *)sp->mid, 7532 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 7533 sp->mid[sp->num_mid] = ctx->bc->cf_last; 7534 sp->num_mid++; 7535} 7536 7537static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 7538{ 7539 ctx->bc->fc_sp++; 7540 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 7541 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 7542} 7543 7544static void fc_poplevel(struct r600_shader_ctx *ctx) 7545{ 7546 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 7547 free(sp->mid); 7548 sp->mid = NULL; 7549 sp->num_mid = 0; 7550 sp->start = NULL; 7551 sp->type = 0; 7552 ctx->bc->fc_sp--; 7553} 7554 7555#if 0 7556static int emit_return(struct r600_shader_ctx *ctx) 7557{ 7558 r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN)); 7559 return 0; 7560} 7561 7562static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 7563{ 7564 7565 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP)); 7566 ctx->bc->cf_last->pop_count = pops; 7567 /* XXX work out offset */ 7568 return 0; 7569} 7570 7571static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 7572{ 7573 return 0; 7574} 7575 7576static void emit_testflag(struct r600_shader_ctx *ctx) 7577{ 7578 7579} 7580 7581static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 7582{ 7583 emit_testflag(ctx); 7584 emit_jump_to_offset(ctx, 1, 4); 7585 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 7586 pops(ctx, ifidx + 1); 7587 emit_return(ctx); 7588} 7589 7590static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 7591{ 7592 emit_testflag(ctx); 7593 7594 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 7595 ctx->bc->cf_last->pop_count = 1; 7596 7597 fc_set_mid(ctx, fc_sp); 7598 7599 pops(ctx, 1); 7600} 7601#endif 7602 7603static int emit_if(struct r600_shader_ctx *ctx, int opcode) 7604{ 7605 int alu_type = CF_OP_ALU_PUSH_BEFORE; 7606 7607 /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by 7608 * LOOP_STARTxxx for nested loops may put the branch stack into a state 7609 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this 7610 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ 7611 if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) { 7612 r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); 7613 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 7614 alu_type = CF_OP_ALU; 7615 } 7616 7617 emit_logic_pred(ctx, opcode, alu_type); 7618 7619 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 7620 7621 fc_pushlevel(ctx, FC_IF); 7622 7623 callstack_push(ctx, FC_PUSH_VPM); 7624 return 0; 7625} 7626 7627static int tgsi_if(struct r600_shader_ctx *ctx) 7628{ 7629 return emit_if(ctx, ALU_OP2_PRED_SETNE); 7630} 7631 7632static int tgsi_uif(struct r600_shader_ctx *ctx) 7633{ 7634 return emit_if(ctx, ALU_OP2_PRED_SETNE_INT); 7635} 7636 7637static int tgsi_else(struct r600_shader_ctx *ctx) 7638{ 7639 r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); 7640 ctx->bc->cf_last->pop_count = 1; 7641 7642 fc_set_mid(ctx, ctx->bc->fc_sp); 7643 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 7644 return 0; 7645} 7646 7647static int tgsi_endif(struct r600_shader_ctx *ctx) 7648{ 7649 pops(ctx, 1); 7650 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 7651 R600_ERR("if/endif unbalanced in shader\n"); 7652 return -1; 7653 } 7654 7655 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 7656 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 7657 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 7658 } else { 7659 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 7660 } 7661 fc_poplevel(ctx); 7662 7663 callstack_pop(ctx, FC_PUSH_VPM); 7664 return 0; 7665} 7666 7667static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 7668{ 7669 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not 7670 * limited to 4096 iterations, like the other LOOP_* instructions. */ 7671 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10); 7672 7673 fc_pushlevel(ctx, FC_LOOP); 7674 7675 /* check stack depth */ 7676 callstack_push(ctx, FC_LOOP); 7677 return 0; 7678} 7679 7680static int tgsi_endloop(struct r600_shader_ctx *ctx) 7681{ 7682 int i; 7683 7684 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); 7685 7686 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 7687 R600_ERR("loop/endloop in shader code are not paired.\n"); 7688 return -EINVAL; 7689 } 7690 7691 /* fixup loop pointers - from r600isa 7692 LOOP END points to CF after LOOP START, 7693 LOOP START point to CF after LOOP END 7694 BRK/CONT point to LOOP END CF 7695 */ 7696 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 7697 7698 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 7699 7700 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 7701 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 7702 } 7703 /* XXX add LOOPRET support */ 7704 fc_poplevel(ctx); 7705 callstack_pop(ctx, FC_LOOP); 7706 return 0; 7707} 7708 7709static int tgsi_loop_breakc(struct r600_shader_ctx *ctx) 7710{ 7711 int r; 7712 unsigned int fscp; 7713 7714 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 7715 { 7716 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 7717 break; 7718 } 7719 if (fscp == 0) { 7720 R600_ERR("BREAKC not inside loop/endloop pair\n"); 7721 return -EINVAL; 7722 } 7723 7724 if (ctx->bc->chip_class == EVERGREEN && 7725 ctx->bc->family != CHIP_CYPRESS && 7726 ctx->bc->family != CHIP_JUNIPER) { 7727 /* HW bug: ALU_BREAK does not save the active mask correctly */ 7728 r = tgsi_uif(ctx); 7729 if (r) 7730 return r; 7731 7732 r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK); 7733 if (r) 7734 return r; 7735 fc_set_mid(ctx, fscp); 7736 7737 return tgsi_endif(ctx); 7738 } else { 7739 r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK); 7740 if (r) 7741 return r; 7742 fc_set_mid(ctx, fscp); 7743 } 7744 7745 return 0; 7746} 7747 7748static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 7749{ 7750 unsigned int fscp; 7751 7752 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 7753 { 7754 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 7755 break; 7756 } 7757 7758 if (fscp == 0) { 7759 R600_ERR("Break not inside loop/endloop pair\n"); 7760 return -EINVAL; 7761 } 7762 7763 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 7764 7765 fc_set_mid(ctx, fscp); 7766 7767 return 0; 7768} 7769 7770static int tgsi_gs_emit(struct r600_shader_ctx *ctx) 7771{ 7772 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7773 int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX]; 7774 int r; 7775 7776 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 7777 emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE); 7778 7779 r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 7780 if (!r) { 7781 ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream 7782 return emit_inc_ring_offset(ctx, stream, TRUE); 7783 } 7784 return r; 7785} 7786 7787static int tgsi_umad(struct r600_shader_ctx *ctx) 7788{ 7789 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7790 struct r600_bytecode_alu alu; 7791 int i, j, k, r; 7792 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7793 7794 /* src0 * src1 */ 7795 for (i = 0; i < lasti + 1; i++) { 7796 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7797 continue; 7798 7799 if (ctx->bc->chip_class == CAYMAN) { 7800 for (j = 0 ; j < 4; j++) { 7801 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7802 7803 alu.op = ALU_OP2_MULLO_UINT; 7804 for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { 7805 r600_bytecode_src(&alu.src[k], &ctx->src[k], i); 7806 } 7807 alu.dst.chan = j; 7808 alu.dst.sel = ctx->temp_reg; 7809 alu.dst.write = (j == i); 7810 if (j == 3) 7811 alu.last = 1; 7812 r = r600_bytecode_add_alu(ctx->bc, &alu); 7813 if (r) 7814 return r; 7815 } 7816 } else { 7817 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7818 7819 alu.dst.chan = i; 7820 alu.dst.sel = ctx->temp_reg; 7821 alu.dst.write = 1; 7822 7823 alu.op = ALU_OP2_MULLO_UINT; 7824 for (j = 0; j < 2; j++) { 7825 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 7826 } 7827 7828 alu.last = 1; 7829 r = r600_bytecode_add_alu(ctx->bc, &alu); 7830 if (r) 7831 return r; 7832 } 7833 } 7834 7835 7836 for (i = 0; i < lasti + 1; i++) { 7837 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7838 continue; 7839 7840 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7841 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7842 7843 alu.op = ALU_OP2_ADD_INT; 7844 7845 alu.src[0].sel = ctx->temp_reg; 7846 alu.src[0].chan = i; 7847 7848 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7849 if (i == lasti) { 7850 alu.last = 1; 7851 } 7852 r = r600_bytecode_add_alu(ctx->bc, &alu); 7853 if (r) 7854 return r; 7855 } 7856 return 0; 7857} 7858 7859static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 7860 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, 7861 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 7862 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 7863 7864 /* XXX: 7865 * For state trackers other than OpenGL, we'll want to use 7866 * _RECIP_IEEE instead. 7867 */ 7868 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 7869 7870 [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, 7871 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 7872 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 7873 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 7874 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 7875 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 7876 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 7877 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 7878 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 7879 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 7880 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 7881 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 7882 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 7883 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 7884 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 7885 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 7886 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 7887 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 7888 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 7889 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 7890 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 7891 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 7892 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 7893 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 7894 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 7895 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 7896 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 7897 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 7898 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 7899 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 7900 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 7901 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 7902 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 7903 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 7904 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 7905 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 7906 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 7907 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 7908 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 7909 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 7910 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 7911 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 7912 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 7913 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 7914 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 7915 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 7916 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 7917 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 7918 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 7919 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 7920 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 7921 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 7922 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 7923 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 7924 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 7925 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 7926 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 7927 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_r600_arl}, 7928 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 7929 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 7930 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 7931 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 7932 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 7933 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 7934 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 7935 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 7936 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 7937 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 7938 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 7939 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 7940 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 7941 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 7942 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 7943 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 7944 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 7945 [TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 7946 [TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 7947 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 7948 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 7949 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 7950 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 7951 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 7952 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 7953 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2_trans}, 7954 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 7955 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 7956 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 7957 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 7958 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 7959 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 7960 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 7961 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 7962 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 7963 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 7964 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 7965 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 7966 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 7967 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 7968 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 7969 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 7970 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 7971 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 7972 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 7973 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 7974 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 7975 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 7976 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 7977 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 7978 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 7979 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 7980 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 7981 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc}, 7982 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 7983 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 7984 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 7985 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, 7986 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 7987 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 7988 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 7989 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 7990 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 7991 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2_trans}, 7992 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 7993 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, 7994 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 7995 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 7996 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 7997 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 7998 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 7999 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 8000 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 8001 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 8002 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 8003 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 8004 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2_trans}, 8005 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 8006 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2_swap}, 8007 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8008 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 8009 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 8010 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8011 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 8012 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 8013 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 8014 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 8015 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 8016 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 8017 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 8018 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 8019 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 8020 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 8021 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 8022 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 8023 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_r600_arl}, 8024 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 8025 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 8026 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 8027 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8028 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 8029 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8030 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8031 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8032 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 8033 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 8034 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 8035 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 8036 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 8037 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8038 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8039 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8040 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8041 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8042 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8043 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 8044 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8045 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8046 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 8047 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 8048 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_unsupported}, 8049 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_unsupported}, 8050 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_unsupported}, 8051 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_unsupported}, 8052 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_unsupported}, 8053 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_unsupported}, 8054 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_unsupported}, 8055 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_unsupported}, 8056 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_unsupported}, 8057 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_unsupported}, 8058 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_unsupported}, 8059 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_unsupported}, 8060 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_unsupported}, 8061 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 8062}; 8063 8064static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 8065 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 8066 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 8067 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 8068 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 8069 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, 8070 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 8071 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 8072 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 8073 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 8074 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 8075 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 8076 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 8077 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 8078 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 8079 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 8080 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 8081 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 8082 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 8083 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 8084 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 8085 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 8086 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 8087 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 8088 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 8089 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 8090 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 8091 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 8092 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 8093 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 8094 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 8095 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 8096 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 8097 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 8098 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 8099 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 8100 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 8101 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 8102 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8103 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8104 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 8105 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8106 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8107 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8108 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8109 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 8110 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 8111 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 8112 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 8113 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 8114 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 8115 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 8116 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 8117 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 8118 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 8119 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 8120 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8121 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8122 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8123 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8124 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 8125 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 8126 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 8127 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 8128 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 8129 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 8130 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 8131 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 8132 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 8133 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8134 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 8135 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 8136 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 8137 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8138 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 8139 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 8140 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 8141 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 8142 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 8143 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 8144 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8145 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8146 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 8147 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 8148 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 8149 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 8150 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 8151 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 8152 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 8153 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 8154 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 8155 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 8156 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 8157 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 8158 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8159 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 8160 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8161 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 8162 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 8163 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 8164 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 8165 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8166 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 8167 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8168 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8169 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 8170 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 8171 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 8172 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 8173 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 8174 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 8175 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 8176 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 8177 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 8178 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 8179 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 8180 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 8181 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 8182 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 8183 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 8184 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i}, 8185 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 8186 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 8187 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 8188 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 8189 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 8190 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 8191 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 8192 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_f2i}, 8193 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 8194 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 8195 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 8196 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 8197 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 8198 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 8199 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 8200 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 8201 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 8202 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 8203 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 8204 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 8205 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 8206 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8207 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 8208 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 8209 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8210 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 8211 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 8212 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 8213 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 8214 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 8215 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 8216 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 8217 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 8218 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 8219 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 8220 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 8221 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 8222 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 8223 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 8224 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 8225 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 8226 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8227 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 8228 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8229 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8230 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8231 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 8232 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 8233 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 8234 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 8235 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 8236 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8237 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8238 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8239 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8240 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8241 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8242 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 8243 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8244 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8245 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 8246 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 8247 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 8248 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 8249 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 8250 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 8251 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 8252 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 8253 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 8254 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 8255 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 8256 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 8257 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8258 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8259 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8260 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 8261 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 8262 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 8263 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 8264 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 8265 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 8266 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 8267 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 8268 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 8269 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 8270 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 8271 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 8272 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 8273 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 8274 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 8275 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 8276 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 8277 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 8278 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 8279 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 8280 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 8281 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 8282 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 8283 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 8284}; 8285 8286static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 8287 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 8288 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 8289 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 8290 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, 8291 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, 8292 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 8293 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 8294 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 8295 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 8296 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 8297 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 8298 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 8299 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 8300 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 8301 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 8302 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 8303 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 8304 [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, 8305 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 8306 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 8307 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, 8308 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 8309 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 8310 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 8311 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 8312 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 8313 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 8314 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 8315 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, 8316 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, 8317 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, 8318 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 8319 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 8320 [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, 8321 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 8322 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 8323 [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig}, 8324 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8325 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8326 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 8327 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8328 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8329 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8330 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8331 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 8332 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 8333 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 8334 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 8335 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, cayman_trig}, 8336 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 8337 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 8338 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 8339 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 8340 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 8341 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 8342 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 8343 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 8344 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 8345 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 8346 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 8347 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 8348 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 8349 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 8350 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 8351 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 8352 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 8353 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 8354 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 8355 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8356 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 8357 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 8358 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 8359 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8360 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 8361 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 8362 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 8363 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 8364 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 8365 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 8366 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 8367 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 8368 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 8369 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 8370 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 8371 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2}, 8372 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 8373 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 8374 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 8375 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 8376 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 8377 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 8378 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 8379 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 8380 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8381 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 8382 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8383 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 8384 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 8385 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 8386 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 8387 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8388 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 8389 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 8390 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 8391 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 8392 [105] = { ALU_OP0_NOP, tgsi_unsupported}, 8393 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 8394 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 8395 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 8396 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 8397 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 8398 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 8399 [112] = { ALU_OP0_NOP, tgsi_unsupported}, 8400 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 8401 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 8402 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 8403 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 8404 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 8405 [118] = { ALU_OP0_NOP, tgsi_unsupported}, 8406 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2}, 8407 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 8408 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 8409 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 8410 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 8411 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 8412 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 8413 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 8414 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2}, 8415 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2}, 8416 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 8417 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 8418 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 8419 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 8420 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 8421 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 8422 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_INT, cayman_mul_int_instr}, 8423 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 8424 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 8425 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 8426 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 8427 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 8428 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8429 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 8430 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 8431 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 8432 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 8433 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 8434 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 8435 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 8436 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 8437 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 8438 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 8439 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 8440 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 8441 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 8442 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 8443 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 8444 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 8445 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 8446 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 8447 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 8448 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 8449 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 8450 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8451 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8452 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 8453 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 8454 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 8455 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 8456 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 8457 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 8458 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8459 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 8460 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8461 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8462 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 8463 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 8464 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 8465 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 8466 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 8467 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, cayman_mul_int_instr}, 8468 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, cayman_mul_int_instr}, 8469 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 8470 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 8471 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 8472 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 8473 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 8474 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 8475 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 8476 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 8477 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 8478 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 8479 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8480 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8481 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 8482 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 8483 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 8484 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 8485 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 8486 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 8487 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 8488 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 8489 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 8490 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 8491 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 8492 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 8493 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 8494 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 8495 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 8496 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 8497 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 8498 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 8499 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 8500 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 8501 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 8502 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 8503 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 8504 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 8505 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 8506}; 8507