r600_shader.c revision b206f5951cb8639041a27bfc35cd6add1127ba05
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "r600_sq.h" 24#include "r600_llvm.h" 25#include "r600_formats.h" 26#include "r600_opcodes.h" 27#include "r600_shader.h" 28#include "r600d.h" 29 30#include "sb/sb_public.h" 31 32#include "pipe/p_shader_tokens.h" 33#include "tgsi/tgsi_info.h" 34#include "tgsi/tgsi_parse.h" 35#include "tgsi/tgsi_scan.h" 36#include "tgsi/tgsi_dump.h" 37#include "util/u_memory.h" 38#include "util/u_math.h" 39#include <stdio.h> 40#include <errno.h> 41 42/* CAYMAN notes 43Why CAYMAN got loops for lots of instructions is explained here. 44 45-These 8xx t-slot only ops are implemented in all vector slots. 46MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 47These 8xx t-slot only opcodes become vector ops, with all four 48slots expecting the arguments on sources a and b. Result is 49broadcast to all channels. 50MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT 51These 8xx t-slot only opcodes become vector ops in the z, y, and 52x slots. 53EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 54RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 55SQRT_IEEE/_64 56SIN/COS 57The w slot may have an independent co-issued operation, or if the 58result is required to be in the w slot, the opcode above may be 59issued in the w slot as well. 60The compiler must issue the source argument to slots z, y, and x 61*/ 62 63static int r600_shader_from_tgsi(struct r600_context *rctx, 64 struct r600_pipe_shader *pipeshader, 65 struct r600_shader_key key); 66 67static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, 68 int size, unsigned comp_mask) { 69 70 if (!size) 71 return; 72 73 if (ps->num_arrays == ps->max_arrays) { 74 ps->max_arrays += 64; 75 ps->arrays = realloc(ps->arrays, ps->max_arrays * 76 sizeof(struct r600_shader_array)); 77 } 78 79 int n = ps->num_arrays; 80 ++ps->num_arrays; 81 82 ps->arrays[n].comp_mask = comp_mask; 83 ps->arrays[n].gpr_start = start_gpr; 84 ps->arrays[n].gpr_count = size; 85} 86 87static void r600_dump_streamout(struct pipe_stream_output_info *so) 88{ 89 unsigned i; 90 91 fprintf(stderr, "STREAMOUT\n"); 92 for (i = 0; i < so->num_outputs; i++) { 93 unsigned mask = ((1 << so->output[i].num_components) - 1) << 94 so->output[i].start_component; 95 fprintf(stderr, " %i: MEM_STREAM0_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", 96 i, so->output[i].output_buffer, 97 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, 98 so->output[i].register_index, 99 mask & 1 ? "x" : "", 100 mask & 2 ? "y" : "", 101 mask & 4 ? "z" : "", 102 mask & 8 ? "w" : "", 103 so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); 104 } 105} 106 107static int store_shader(struct pipe_context *ctx, 108 struct r600_pipe_shader *shader) 109{ 110 struct r600_context *rctx = (struct r600_context *)ctx; 111 uint32_t *ptr, i; 112 113 if (shader->bo == NULL) { 114 shader->bo = (struct r600_resource*) 115 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); 116 if (shader->bo == NULL) { 117 return -ENOMEM; 118 } 119 ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); 120 if (R600_BIG_ENDIAN) { 121 for (i = 0; i < shader->shader.bc.ndw; ++i) { 122 ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); 123 } 124 } else { 125 memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); 126 } 127 rctx->b.ws->buffer_unmap(shader->bo->cs_buf); 128 } 129 130 return 0; 131} 132 133int r600_pipe_shader_create(struct pipe_context *ctx, 134 struct r600_pipe_shader *shader, 135 struct r600_shader_key key) 136{ 137 struct r600_context *rctx = (struct r600_context *)ctx; 138 struct r600_pipe_shader_selector *sel = shader->selector; 139 int r; 140 bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens); 141 unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); 142 unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 143 unsigned export_shader = key.vs_as_es; 144 145 shader->shader.bc.isa = rctx->isa; 146 147 if (dump) { 148 fprintf(stderr, "--------------------------------------------------------------\n"); 149 tgsi_dump(sel->tokens, 0); 150 151 if (sel->so.num_outputs) { 152 r600_dump_streamout(&sel->so); 153 } 154 } 155 r = r600_shader_from_tgsi(rctx, shader, key); 156 if (r) { 157 R600_ERR("translation from TGSI failed !\n"); 158 goto error; 159 } 160 161 /* disable SB for geom shaders - it can't handle the CF_EMIT instructions */ 162 use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_GEOMETRY); 163 164 /* Check if the bytecode has already been built. When using the llvm 165 * backend, r600_shader_from_tgsi() will take care of building the 166 * bytecode. 167 */ 168 if (!shader->shader.bc.bytecode) { 169 r = r600_bytecode_build(&shader->shader.bc); 170 if (r) { 171 R600_ERR("building bytecode failed !\n"); 172 goto error; 173 } 174 } 175 176 if (dump && !sb_disasm) { 177 fprintf(stderr, "--------------------------------------------------------------\n"); 178 r600_bytecode_disasm(&shader->shader.bc); 179 fprintf(stderr, "______________________________________________________________\n"); 180 } else if ((dump && sb_disasm) || use_sb) { 181 r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, 182 dump, use_sb); 183 if (r) { 184 R600_ERR("r600_sb_bytecode_process failed !\n"); 185 goto error; 186 } 187 } 188 189 if (shader->gs_copy_shader) { 190 if (dump) { 191 // dump copy shader 192 r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, 193 &shader->gs_copy_shader->shader, dump, 0); 194 if (r) 195 goto error; 196 } 197 198 if ((r = store_shader(ctx, shader->gs_copy_shader))) 199 goto error; 200 } 201 202 /* Store the shader in a buffer. */ 203 if ((r = store_shader(ctx, shader))) 204 goto error; 205 206 /* Build state. */ 207 switch (shader->shader.processor_type) { 208 case TGSI_PROCESSOR_GEOMETRY: 209 if (rctx->b.chip_class >= EVERGREEN) { 210 evergreen_update_gs_state(ctx, shader); 211 evergreen_update_vs_state(ctx, shader->gs_copy_shader); 212 } else { 213 r600_update_gs_state(ctx, shader); 214 r600_update_vs_state(ctx, shader->gs_copy_shader); 215 } 216 break; 217 case TGSI_PROCESSOR_VERTEX: 218 if (rctx->b.chip_class >= EVERGREEN) { 219 if (export_shader) 220 evergreen_update_es_state(ctx, shader); 221 else 222 evergreen_update_vs_state(ctx, shader); 223 } else { 224 if (export_shader) 225 r600_update_es_state(ctx, shader); 226 else 227 r600_update_vs_state(ctx, shader); 228 } 229 break; 230 case TGSI_PROCESSOR_FRAGMENT: 231 if (rctx->b.chip_class >= EVERGREEN) { 232 evergreen_update_ps_state(ctx, shader); 233 } else { 234 r600_update_ps_state(ctx, shader); 235 } 236 break; 237 default: 238 r = -EINVAL; 239 goto error; 240 } 241 return 0; 242 243error: 244 r600_pipe_shader_destroy(ctx, shader); 245 return r; 246} 247 248void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 249{ 250 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 251 r600_bytecode_clear(&shader->shader.bc); 252 r600_release_command_buffer(&shader->command_buffer); 253} 254 255/* 256 * tgsi -> r600 shader 257 */ 258struct r600_shader_tgsi_instruction; 259 260struct r600_shader_src { 261 unsigned sel; 262 unsigned swizzle[4]; 263 unsigned neg; 264 unsigned abs; 265 unsigned rel; 266 unsigned kc_bank; 267 uint32_t value[4]; 268}; 269 270struct r600_shader_ctx { 271 struct tgsi_shader_info info; 272 struct tgsi_parse_context parse; 273 const struct tgsi_token *tokens; 274 unsigned type; 275 unsigned file_offset[TGSI_FILE_COUNT]; 276 unsigned temp_reg; 277 struct r600_shader_tgsi_instruction *inst_info; 278 struct r600_bytecode *bc; 279 struct r600_shader *shader; 280 struct r600_shader_src src[4]; 281 uint32_t *literals; 282 uint32_t nliterals; 283 uint32_t max_driver_temp_used; 284 boolean use_llvm; 285 /* needed for evergreen interpolation */ 286 boolean input_centroid; 287 boolean input_linear; 288 boolean input_perspective; 289 int num_interp_gpr; 290 int face_gpr; 291 int colors_used; 292 boolean clip_vertex_write; 293 unsigned cv_output; 294 unsigned edgeflag_output; 295 int fragcoord_input; 296 int native_integers; 297 int next_ring_offset; 298 int gs_out_ring_offset; 299 int gs_next_vertex; 300 struct r600_shader *gs_for_vs; 301 int gs_export_gpr_treg; 302}; 303 304struct r600_shader_tgsi_instruction { 305 unsigned tgsi_opcode; 306 unsigned is_op3; 307 unsigned op; 308 int (*process)(struct r600_shader_ctx *ctx); 309}; 310 311static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind); 312static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 313static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 314static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); 315static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 316static int tgsi_else(struct r600_shader_ctx *ctx); 317static int tgsi_endif(struct r600_shader_ctx *ctx); 318static int tgsi_bgnloop(struct r600_shader_ctx *ctx); 319static int tgsi_endloop(struct r600_shader_ctx *ctx); 320static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 321 322static int tgsi_is_supported(struct r600_shader_ctx *ctx) 323{ 324 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 325 int j; 326 327 if (i->Instruction.NumDstRegs > 1) { 328 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 329 return -EINVAL; 330 } 331 if (i->Instruction.Predicate) { 332 R600_ERR("predicate unsupported\n"); 333 return -EINVAL; 334 } 335#if 0 336 if (i->Instruction.Label) { 337 R600_ERR("label unsupported\n"); 338 return -EINVAL; 339 } 340#endif 341 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 342 if (i->Src[j].Register.Dimension) { 343 switch (i->Src[j].Register.File) { 344 case TGSI_FILE_CONSTANT: 345 break; 346 case TGSI_FILE_INPUT: 347 if (ctx->type == TGSI_PROCESSOR_GEOMETRY) 348 break; 349 default: 350 R600_ERR("unsupported src %d (dimension %d)\n", j, 351 i->Src[j].Register.Dimension); 352 return -EINVAL; 353 } 354 } 355 } 356 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 357 if (i->Dst[j].Register.Dimension) { 358 R600_ERR("unsupported dst (dimension)\n"); 359 return -EINVAL; 360 } 361 } 362 return 0; 363} 364 365static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, 366 int input) 367{ 368 int ij_index = 0; 369 370 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 371 if (ctx->shader->input[input].centroid) 372 ij_index++; 373 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 374 /* if we have perspective add one */ 375 if (ctx->input_perspective) { 376 ij_index++; 377 /* if we have perspective centroid */ 378 if (ctx->input_centroid) 379 ij_index++; 380 } 381 if (ctx->shader->input[input].centroid) 382 ij_index++; 383 } 384 385 ctx->shader->input[input].ij_index = ij_index; 386} 387 388static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 389{ 390 int i, r; 391 struct r600_bytecode_alu alu; 392 int gpr = 0, base_chan = 0; 393 int ij_index = ctx->shader->input[input].ij_index; 394 395 /* work out gpr and base_chan from index */ 396 gpr = ij_index / 2; 397 base_chan = (2 * (ij_index % 2)) + 1; 398 399 for (i = 0; i < 8; i++) { 400 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 401 402 if (i < 4) 403 alu.op = ALU_OP2_INTERP_ZW; 404 else 405 alu.op = ALU_OP2_INTERP_XY; 406 407 if ((i > 1) && (i < 6)) { 408 alu.dst.sel = ctx->shader->input[input].gpr; 409 alu.dst.write = 1; 410 } 411 412 alu.dst.chan = i % 4; 413 414 alu.src[0].sel = gpr; 415 alu.src[0].chan = (base_chan - (i % 2)); 416 417 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 418 419 alu.bank_swizzle_force = SQ_ALU_VEC_210; 420 if ((i % 4) == 3) 421 alu.last = 1; 422 r = r600_bytecode_add_alu(ctx->bc, &alu); 423 if (r) 424 return r; 425 } 426 return 0; 427} 428 429static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 430{ 431 int i, r; 432 struct r600_bytecode_alu alu; 433 434 for (i = 0; i < 4; i++) { 435 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 436 437 alu.op = ALU_OP1_INTERP_LOAD_P0; 438 439 alu.dst.sel = ctx->shader->input[input].gpr; 440 alu.dst.write = 1; 441 442 alu.dst.chan = i; 443 444 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 445 alu.src[0].chan = i; 446 447 if (i == 3) 448 alu.last = 1; 449 r = r600_bytecode_add_alu(ctx->bc, &alu); 450 if (r) 451 return r; 452 } 453 return 0; 454} 455 456/* 457 * Special export handling in shaders 458 * 459 * shader export ARRAY_BASE for EXPORT_POS: 460 * 60 is position 461 * 61 is misc vector 462 * 62, 63 are clip distance vectors 463 * 464 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 465 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 466 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 467 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 468 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 469 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 470 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 471 * exclusive from render target index) 472 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 473 * 474 * 475 * shader export ARRAY_BASE for EXPORT_PIXEL: 476 * 0-7 CB targets 477 * 61 computed Z vector 478 * 479 * The use of the values exported in the computed Z vector are controlled 480 * by DB_SHADER_CONTROL: 481 * Z_EXPORT_ENABLE - Z as a float in RED 482 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 483 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 484 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 485 * DB_SOURCE_FORMAT - export control restrictions 486 * 487 */ 488 489 490/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 491static int r600_spi_sid(struct r600_shader_io * io) 492{ 493 int index, name = io->name; 494 495 /* These params are handled differently, they don't need 496 * semantic indices, so we'll use 0 for them. 497 */ 498 if (name == TGSI_SEMANTIC_POSITION || 499 name == TGSI_SEMANTIC_PSIZE || 500 name == TGSI_SEMANTIC_EDGEFLAG || 501 name == TGSI_SEMANTIC_LAYER || 502 name == TGSI_SEMANTIC_VIEWPORT_INDEX || 503 name == TGSI_SEMANTIC_FACE) 504 index = 0; 505 else { 506 if (name == TGSI_SEMANTIC_GENERIC) { 507 /* For generic params simply use sid from tgsi */ 508 index = io->sid; 509 } else { 510 /* For non-generic params - pack name and sid into 8 bits */ 511 index = 0x80 | (name<<3) | (io->sid); 512 } 513 514 /* Make sure that all really used indices have nonzero value, so 515 * we can just compare it to 0 later instead of comparing the name 516 * with different values to detect special cases. */ 517 index++; 518 } 519 520 return index; 521}; 522 523/* turn input into interpolate on EG */ 524static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 525{ 526 int r = 0; 527 528 if (ctx->shader->input[index].spi_sid) { 529 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 530 if (ctx->shader->input[index].interpolate > 0) { 531 evergreen_interp_assign_ij_index(ctx, index); 532 if (!ctx->use_llvm) 533 r = evergreen_interp_alu(ctx, index); 534 } else { 535 if (!ctx->use_llvm) 536 r = evergreen_interp_flat(ctx, index); 537 } 538 } 539 return r; 540} 541 542static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 543{ 544 struct r600_bytecode_alu alu; 545 int i, r; 546 int gpr_front = ctx->shader->input[front].gpr; 547 int gpr_back = ctx->shader->input[back].gpr; 548 549 for (i = 0; i < 4; i++) { 550 memset(&alu, 0, sizeof(alu)); 551 alu.op = ALU_OP3_CNDGT; 552 alu.is_op3 = 1; 553 alu.dst.write = 1; 554 alu.dst.sel = gpr_front; 555 alu.src[0].sel = ctx->face_gpr; 556 alu.src[1].sel = gpr_front; 557 alu.src[2].sel = gpr_back; 558 559 alu.dst.chan = i; 560 alu.src[1].chan = i; 561 alu.src[2].chan = i; 562 alu.last = (i==3); 563 564 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 565 return r; 566 } 567 568 return 0; 569} 570 571static int tgsi_declaration(struct r600_shader_ctx *ctx) 572{ 573 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 574 int r, i, j, count = d->Range.Last - d->Range.First + 1; 575 576 switch (d->Declaration.File) { 577 case TGSI_FILE_INPUT: 578 i = ctx->shader->ninput; 579 assert(i < Elements(ctx->shader->input)); 580 ctx->shader->ninput += count; 581 ctx->shader->input[i].name = d->Semantic.Name; 582 ctx->shader->input[i].sid = d->Semantic.Index; 583 ctx->shader->input[i].interpolate = d->Interp.Interpolate; 584 ctx->shader->input[i].centroid = d->Interp.Centroid; 585 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; 586 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 587 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 588 switch (ctx->shader->input[i].name) { 589 case TGSI_SEMANTIC_FACE: 590 ctx->face_gpr = ctx->shader->input[i].gpr; 591 break; 592 case TGSI_SEMANTIC_COLOR: 593 ctx->colors_used++; 594 break; 595 case TGSI_SEMANTIC_POSITION: 596 ctx->fragcoord_input = i; 597 break; 598 } 599 if (ctx->bc->chip_class >= EVERGREEN) { 600 if ((r = evergreen_interp_input(ctx, i))) 601 return r; 602 } 603 } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 604 /* FIXME probably skip inputs if they aren't passed in the ring */ 605 ctx->shader->input[i].ring_offset = ctx->next_ring_offset; 606 ctx->next_ring_offset += 16; 607 if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) 608 ctx->shader->gs_prim_id_input = true; 609 } 610 for (j = 1; j < count; ++j) { 611 ctx->shader->input[i + j] = ctx->shader->input[i]; 612 ctx->shader->input[i + j].gpr += j; 613 } 614 break; 615 case TGSI_FILE_OUTPUT: 616 i = ctx->shader->noutput++; 617 assert(i < Elements(ctx->shader->output)); 618 ctx->shader->output[i].name = d->Semantic.Name; 619 ctx->shader->output[i].sid = d->Semantic.Index; 620 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; 621 ctx->shader->output[i].interpolate = d->Interp.Interpolate; 622 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 623 if (ctx->type == TGSI_PROCESSOR_VERTEX || 624 ctx->type == TGSI_PROCESSOR_GEOMETRY) { 625 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 626 switch (d->Semantic.Name) { 627 case TGSI_SEMANTIC_CLIPDIST: 628 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2); 629 break; 630 case TGSI_SEMANTIC_PSIZE: 631 ctx->shader->vs_out_misc_write = 1; 632 ctx->shader->vs_out_point_size = 1; 633 break; 634 case TGSI_SEMANTIC_EDGEFLAG: 635 ctx->shader->vs_out_misc_write = 1; 636 ctx->shader->vs_out_edgeflag = 1; 637 ctx->edgeflag_output = i; 638 break; 639 case TGSI_SEMANTIC_VIEWPORT_INDEX: 640 ctx->shader->vs_out_misc_write = 1; 641 ctx->shader->vs_out_viewport = 1; 642 break; 643 case TGSI_SEMANTIC_LAYER: 644 ctx->shader->vs_out_misc_write = 1; 645 ctx->shader->vs_out_layer = 1; 646 break; 647 case TGSI_SEMANTIC_CLIPVERTEX: 648 ctx->clip_vertex_write = TRUE; 649 ctx->cv_output = i; 650 break; 651 } 652 if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { 653 ctx->gs_out_ring_offset += 16; 654 } 655 } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 656 switch (d->Semantic.Name) { 657 case TGSI_SEMANTIC_COLOR: 658 ctx->shader->nr_ps_max_color_exports++; 659 break; 660 } 661 } 662 break; 663 case TGSI_FILE_TEMPORARY: 664 if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 665 if (d->Array.ArrayID) { 666 r600_add_gpr_array(ctx->shader, 667 ctx->file_offset[TGSI_FILE_TEMPORARY] + 668 d->Range.First, 669 d->Range.Last - d->Range.First + 1, 0x0F); 670 } 671 } 672 break; 673 674 case TGSI_FILE_CONSTANT: 675 case TGSI_FILE_SAMPLER: 676 case TGSI_FILE_ADDRESS: 677 break; 678 679 case TGSI_FILE_SYSTEM_VALUE: 680 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 681 if (!ctx->native_integers) { 682 struct r600_bytecode_alu alu; 683 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 684 685 alu.op = ALU_OP1_INT_TO_FLT; 686 alu.src[0].sel = 0; 687 alu.src[0].chan = 3; 688 689 alu.dst.sel = 0; 690 alu.dst.chan = 3; 691 alu.dst.write = 1; 692 alu.last = 1; 693 694 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 695 return r; 696 } 697 break; 698 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 699 break; 700 default: 701 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 702 return -EINVAL; 703 } 704 return 0; 705} 706 707static int r600_get_temp(struct r600_shader_ctx *ctx) 708{ 709 return ctx->temp_reg + ctx->max_driver_temp_used++; 710} 711 712/* 713 * for evergreen we need to scan the shader to find the number of GPRs we need to 714 * reserve for interpolation. 715 * 716 * we need to know if we are going to emit 717 * any centroid inputs 718 * if perspective and linear are required 719*/ 720static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 721{ 722 int i; 723 int num_baryc; 724 725 ctx->input_linear = FALSE; 726 ctx->input_perspective = FALSE; 727 ctx->input_centroid = FALSE; 728 ctx->num_interp_gpr = 1; 729 730 /* any centroid inputs */ 731 for (i = 0; i < ctx->info.num_inputs; i++) { 732 /* skip position/face */ 733 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 734 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 735 continue; 736 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 737 ctx->input_linear = TRUE; 738 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 739 ctx->input_perspective = TRUE; 740 if (ctx->info.input_centroid[i]) 741 ctx->input_centroid = TRUE; 742 } 743 744 num_baryc = 0; 745 /* ignoring sample for now */ 746 if (ctx->input_perspective) 747 num_baryc++; 748 if (ctx->input_linear) 749 num_baryc++; 750 if (ctx->input_centroid) 751 num_baryc *= 2; 752 753 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 754 755 /* XXX PULL MODEL and LINE STIPPLE, FIXED PT POS */ 756 return ctx->num_interp_gpr; 757} 758 759static void tgsi_src(struct r600_shader_ctx *ctx, 760 const struct tgsi_full_src_register *tgsi_src, 761 struct r600_shader_src *r600_src) 762{ 763 memset(r600_src, 0, sizeof(*r600_src)); 764 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 765 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 766 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 767 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 768 r600_src->neg = tgsi_src->Register.Negate; 769 r600_src->abs = tgsi_src->Register.Absolute; 770 771 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 772 int index; 773 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 774 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 775 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 776 777 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 778 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 779 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 780 return; 781 } 782 index = tgsi_src->Register.Index; 783 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 784 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 785 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 786 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 787 r600_src->swizzle[0] = 3; 788 r600_src->swizzle[1] = 3; 789 r600_src->swizzle[2] = 3; 790 r600_src->swizzle[3] = 3; 791 r600_src->sel = 0; 792 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 793 r600_src->swizzle[0] = 0; 794 r600_src->swizzle[1] = 0; 795 r600_src->swizzle[2] = 0; 796 r600_src->swizzle[3] = 0; 797 r600_src->sel = 0; 798 } 799 } else { 800 if (tgsi_src->Register.Indirect) 801 r600_src->rel = V_SQ_REL_RELATIVE; 802 r600_src->sel = tgsi_src->Register.Index; 803 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 804 } 805 if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { 806 if (tgsi_src->Register.Dimension) { 807 r600_src->kc_bank = tgsi_src->Dimension.Index; 808 } 809 } 810} 811 812static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 813 unsigned int cb_idx, unsigned int offset, unsigned ar_chan, 814 unsigned int dst_reg) 815{ 816 struct r600_bytecode_vtx vtx; 817 unsigned int ar_reg; 818 int r; 819 820 if (offset) { 821 struct r600_bytecode_alu alu; 822 823 memset(&alu, 0, sizeof(alu)); 824 825 alu.op = ALU_OP2_ADD_INT; 826 alu.src[0].sel = ctx->bc->ar_reg; 827 alu.src[0].chan = ar_chan; 828 829 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 830 alu.src[1].value = offset; 831 832 alu.dst.sel = dst_reg; 833 alu.dst.chan = ar_chan; 834 alu.dst.write = 1; 835 alu.last = 1; 836 837 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 838 return r; 839 840 ar_reg = dst_reg; 841 } else { 842 ar_reg = ctx->bc->ar_reg; 843 } 844 845 memset(&vtx, 0, sizeof(vtx)); 846 vtx.buffer_id = cb_idx; 847 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 848 vtx.src_gpr = ar_reg; 849 vtx.src_sel_x = ar_chan; 850 vtx.mega_fetch_count = 16; 851 vtx.dst_gpr = dst_reg; 852 vtx.dst_sel_x = 0; /* SEL_X */ 853 vtx.dst_sel_y = 1; /* SEL_Y */ 854 vtx.dst_sel_z = 2; /* SEL_Z */ 855 vtx.dst_sel_w = 3; /* SEL_W */ 856 vtx.data_format = FMT_32_32_32_32_FLOAT; 857 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 858 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 859 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 860 vtx.endian = r600_endian_swap(32); 861 862 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 863 return r; 864 865 return 0; 866} 867 868static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 869{ 870 struct r600_bytecode_vtx vtx; 871 int r; 872 unsigned index = src->Register.Index; 873 unsigned vtx_id = src->Dimension.Index; 874 int offset_reg = vtx_id / 3; 875 int offset_chan = vtx_id % 3; 876 877 /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, 878 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ 879 880 if (offset_reg == 0 && offset_chan == 2) 881 offset_chan = 3; 882 883 if (src->Dimension.Indirect) { 884 int treg[3]; 885 int t2; 886 struct r600_bytecode_alu alu; 887 int r, i; 888 889 /* you have got to be shitting me - 890 we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt. 891 at least this is what fglrx seems to do. */ 892 for (i = 0; i < 3; i++) { 893 treg[i] = r600_get_temp(ctx); 894 } 895 t2 = r600_get_temp(ctx); 896 for (i = 0; i < 3; i++) { 897 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 898 alu.op = ALU_OP1_MOV; 899 alu.src[0].sel = 0; 900 alu.src[0].chan = i == 2 ? 3 : i; 901 alu.dst.sel = treg[i]; 902 alu.dst.chan = 0; 903 alu.dst.write = 1; 904 alu.last = 1; 905 r = r600_bytecode_add_alu(ctx->bc, &alu); 906 if (r) 907 return r; 908 } 909 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 910 alu.op = ALU_OP1_MOV; 911 alu.src[0].sel = treg[0]; 912 alu.src[0].rel = 1; 913 alu.dst.sel = t2; 914 alu.dst.write = 1; 915 alu.last = 1; 916 r = r600_bytecode_add_alu(ctx->bc, &alu); 917 if (r) 918 return r; 919 offset_reg = t2; 920 } 921 922 923 memset(&vtx, 0, sizeof(vtx)); 924 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 925 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 926 vtx.src_gpr = offset_reg; 927 vtx.src_sel_x = offset_chan; 928 vtx.offset = index * 16; /*bytes*/ 929 vtx.mega_fetch_count = 16; 930 vtx.dst_gpr = dst_reg; 931 vtx.dst_sel_x = 0; /* SEL_X */ 932 vtx.dst_sel_y = 1; /* SEL_Y */ 933 vtx.dst_sel_z = 2; /* SEL_Z */ 934 vtx.dst_sel_w = 3; /* SEL_W */ 935 if (ctx->bc->chip_class >= EVERGREEN) { 936 vtx.use_const_fields = 1; 937 } else { 938 vtx.data_format = FMT_32_32_32_32_FLOAT; 939 } 940 941 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 942 return r; 943 944 return 0; 945} 946 947static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) 948{ 949 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 950 int i; 951 952 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 953 struct tgsi_full_src_register *src = &inst->Src[i]; 954 955 if (src->Register.File == TGSI_FILE_INPUT) { 956 if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) { 957 /* primitive id is in R0.z */ 958 ctx->src[i].sel = 0; 959 ctx->src[i].swizzle[0] = 2; 960 } 961 } 962 if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { 963 int treg = r600_get_temp(ctx); 964 965 fetch_gs_input(ctx, src, treg); 966 ctx->src[i].sel = treg; 967 } 968 } 969 return 0; 970} 971 972static int tgsi_split_constant(struct r600_shader_ctx *ctx) 973{ 974 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 975 struct r600_bytecode_alu alu; 976 int i, j, k, nconst, r; 977 978 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 979 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 980 nconst++; 981 } 982 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 983 } 984 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 985 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 986 continue; 987 } 988 989 if (ctx->src[i].rel) { 990 int chan = inst->Src[i].Indirect.Swizzle; 991 int treg = r600_get_temp(ctx); 992 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].sel - 512, chan, treg))) 993 return r; 994 995 ctx->src[i].kc_bank = 0; 996 ctx->src[i].sel = treg; 997 ctx->src[i].rel = 0; 998 j--; 999 } else if (j > 0) { 1000 int treg = r600_get_temp(ctx); 1001 for (k = 0; k < 4; k++) { 1002 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1003 alu.op = ALU_OP1_MOV; 1004 alu.src[0].sel = ctx->src[i].sel; 1005 alu.src[0].chan = k; 1006 alu.src[0].rel = ctx->src[i].rel; 1007 alu.dst.sel = treg; 1008 alu.dst.chan = k; 1009 alu.dst.write = 1; 1010 if (k == 3) 1011 alu.last = 1; 1012 r = r600_bytecode_add_alu(ctx->bc, &alu); 1013 if (r) 1014 return r; 1015 } 1016 ctx->src[i].sel = treg; 1017 ctx->src[i].rel =0; 1018 j--; 1019 } 1020 } 1021 return 0; 1022} 1023 1024/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 1025static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 1026{ 1027 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1028 struct r600_bytecode_alu alu; 1029 int i, j, k, nliteral, r; 1030 1031 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 1032 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1033 nliteral++; 1034 } 1035 } 1036 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 1037 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1038 int treg = r600_get_temp(ctx); 1039 for (k = 0; k < 4; k++) { 1040 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1041 alu.op = ALU_OP1_MOV; 1042 alu.src[0].sel = ctx->src[i].sel; 1043 alu.src[0].chan = k; 1044 alu.src[0].value = ctx->src[i].value[k]; 1045 alu.dst.sel = treg; 1046 alu.dst.chan = k; 1047 alu.dst.write = 1; 1048 if (k == 3) 1049 alu.last = 1; 1050 r = r600_bytecode_add_alu(ctx->bc, &alu); 1051 if (r) 1052 return r; 1053 } 1054 ctx->src[i].sel = treg; 1055 j--; 1056 } 1057 } 1058 return 0; 1059} 1060 1061static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 1062{ 1063 int i, r, count = ctx->shader->ninput; 1064 1065 for (i = 0; i < count; i++) { 1066 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1067 r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); 1068 if (r) 1069 return r; 1070 } 1071 } 1072 return 0; 1073} 1074 1075static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so) 1076{ 1077 unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; 1078 int i, j, r; 1079 1080 /* Sanity checking. */ 1081 if (so->num_outputs > PIPE_MAX_SHADER_OUTPUTS) { 1082 R600_ERR("Too many stream outputs: %d\n", so->num_outputs); 1083 r = -EINVAL; 1084 goto out_err; 1085 } 1086 for (i = 0; i < so->num_outputs; i++) { 1087 if (so->output[i].output_buffer >= 4) { 1088 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", 1089 so->output[i].output_buffer); 1090 r = -EINVAL; 1091 goto out_err; 1092 } 1093 } 1094 1095 /* Initialize locations where the outputs are stored. */ 1096 for (i = 0; i < so->num_outputs; i++) { 1097 so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr; 1098 1099 /* Lower outputs with dst_offset < start_component. 1100 * 1101 * We can only output 4D vectors with a write mask, e.g. we can 1102 * only output the W component at offset 3, etc. If we want 1103 * to store Y, Z, or W at buffer offset 0, we need to use MOV 1104 * to move it to X and output X. */ 1105 if (so->output[i].dst_offset < so->output[i].start_component) { 1106 unsigned tmp = r600_get_temp(ctx); 1107 1108 for (j = 0; j < so->output[i].num_components; j++) { 1109 struct r600_bytecode_alu alu; 1110 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1111 alu.op = ALU_OP1_MOV; 1112 alu.src[0].sel = so_gpr[i]; 1113 alu.src[0].chan = so->output[i].start_component + j; 1114 1115 alu.dst.sel = tmp; 1116 alu.dst.chan = j; 1117 alu.dst.write = 1; 1118 if (j == so->output[i].num_components - 1) 1119 alu.last = 1; 1120 r = r600_bytecode_add_alu(ctx->bc, &alu); 1121 if (r) 1122 return r; 1123 } 1124 so->output[i].start_component = 0; 1125 so_gpr[i] = tmp; 1126 } 1127 } 1128 1129 /* Write outputs to buffers. */ 1130 for (i = 0; i < so->num_outputs; i++) { 1131 struct r600_bytecode_output output; 1132 1133 memset(&output, 0, sizeof(struct r600_bytecode_output)); 1134 output.gpr = so_gpr[i]; 1135 output.elem_size = so->output[i].num_components; 1136 output.array_base = so->output[i].dst_offset - so->output[i].start_component; 1137 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 1138 output.burst_count = 1; 1139 /* array_size is an upper limit for the burst_count 1140 * with MEM_STREAM instructions */ 1141 output.array_size = 0xFFF; 1142 output.comp_mask = ((1 << so->output[i].num_components) - 1) << so->output[i].start_component; 1143 if (ctx->bc->chip_class >= EVERGREEN) { 1144 switch (so->output[i].output_buffer) { 1145 case 0: 1146 output.op = CF_OP_MEM_STREAM0_BUF0; 1147 break; 1148 case 1: 1149 output.op = CF_OP_MEM_STREAM0_BUF1; 1150 break; 1151 case 2: 1152 output.op = CF_OP_MEM_STREAM0_BUF2; 1153 break; 1154 case 3: 1155 output.op = CF_OP_MEM_STREAM0_BUF3; 1156 break; 1157 } 1158 } else { 1159 switch (so->output[i].output_buffer) { 1160 case 0: 1161 output.op = CF_OP_MEM_STREAM0; 1162 break; 1163 case 1: 1164 output.op = CF_OP_MEM_STREAM1; 1165 break; 1166 case 2: 1167 output.op = CF_OP_MEM_STREAM2; 1168 break; 1169 case 3: 1170 output.op = CF_OP_MEM_STREAM3; 1171 break; 1172 } 1173 } 1174 r = r600_bytecode_add_output(ctx->bc, &output); 1175 if (r) 1176 goto out_err; 1177 } 1178 return 0; 1179out_err: 1180 return r; 1181} 1182 1183static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) 1184{ 1185 struct r600_bytecode_alu alu; 1186 unsigned reg; 1187 1188 if (!ctx->shader->vs_out_edgeflag) 1189 return; 1190 1191 reg = ctx->shader->output[ctx->edgeflag_output].gpr; 1192 1193 /* clamp(x, 0, 1) */ 1194 memset(&alu, 0, sizeof(alu)); 1195 alu.op = ALU_OP1_MOV; 1196 alu.src[0].sel = reg; 1197 alu.dst.sel = reg; 1198 alu.dst.write = 1; 1199 alu.dst.clamp = 1; 1200 alu.last = 1; 1201 r600_bytecode_add_alu(ctx->bc, &alu); 1202 1203 memset(&alu, 0, sizeof(alu)); 1204 alu.op = ALU_OP1_FLT_TO_INT; 1205 alu.src[0].sel = reg; 1206 alu.dst.sel = reg; 1207 alu.dst.write = 1; 1208 alu.last = 1; 1209 r600_bytecode_add_alu(ctx->bc, &alu); 1210} 1211 1212static int generate_gs_copy_shader(struct r600_context *rctx, 1213 struct r600_pipe_shader *gs, 1214 struct pipe_stream_output_info *so) 1215{ 1216 struct r600_shader_ctx ctx = {}; 1217 struct r600_shader *gs_shader = &gs->shader; 1218 struct r600_pipe_shader *cshader; 1219 int ocnt = gs_shader->noutput; 1220 struct r600_bytecode_alu alu; 1221 struct r600_bytecode_vtx vtx; 1222 struct r600_bytecode_output output; 1223 struct r600_bytecode_cf *cf_jump, *cf_pop, 1224 *last_exp_pos = NULL, *last_exp_param = NULL; 1225 int i, next_clip_pos = 61, next_param = 0; 1226 1227 cshader = calloc(1, sizeof(struct r600_pipe_shader)); 1228 if (!cshader) 1229 return 0; 1230 1231 memcpy(cshader->shader.output, gs_shader->output, ocnt * 1232 sizeof(struct r600_shader_io)); 1233 1234 cshader->shader.noutput = ocnt; 1235 1236 ctx.shader = &cshader->shader; 1237 ctx.bc = &ctx.shader->bc; 1238 ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX; 1239 1240 r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, 1241 rctx->screen->has_compressed_msaa_texturing); 1242 1243 ctx.bc->isa = rctx->isa; 1244 1245 /* R0.x = R0.x & 0x3fffffff */ 1246 memset(&alu, 0, sizeof(alu)); 1247 alu.op = ALU_OP2_AND_INT; 1248 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1249 alu.src[1].value = 0x3fffffff; 1250 alu.dst.write = 1; 1251 r600_bytecode_add_alu(ctx.bc, &alu); 1252 1253 /* R0.y = R0.x >> 30 */ 1254 memset(&alu, 0, sizeof(alu)); 1255 alu.op = ALU_OP2_LSHR_INT; 1256 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1257 alu.src[1].value = 0x1e; 1258 alu.dst.chan = 1; 1259 alu.dst.write = 1; 1260 alu.last = 1; 1261 r600_bytecode_add_alu(ctx.bc, &alu); 1262 1263 /* PRED_SETE_INT __, R0.y, 0 */ 1264 memset(&alu, 0, sizeof(alu)); 1265 alu.op = ALU_OP2_PRED_SETE_INT; 1266 alu.src[0].chan = 1; 1267 alu.src[1].sel = V_SQ_ALU_SRC_0; 1268 alu.execute_mask = 1; 1269 alu.update_pred = 1; 1270 alu.last = 1; 1271 r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); 1272 1273 r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); 1274 cf_jump = ctx.bc->cf_last; 1275 1276 /* fetch vertex data from GSVS ring */ 1277 for (i = 0; i < ocnt; ++i) { 1278 struct r600_shader_io *out = &ctx.shader->output[i]; 1279 out->gpr = i + 1; 1280 out->ring_offset = i * 16; 1281 1282 memset(&vtx, 0, sizeof(vtx)); 1283 vtx.op = FETCH_OP_VFETCH; 1284 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1285 vtx.fetch_type = 2; 1286 vtx.offset = out->ring_offset; 1287 vtx.dst_gpr = out->gpr; 1288 vtx.dst_sel_x = 0; 1289 vtx.dst_sel_y = 1; 1290 vtx.dst_sel_z = 2; 1291 vtx.dst_sel_w = 3; 1292 if (rctx->b.chip_class >= EVERGREEN) { 1293 vtx.use_const_fields = 1; 1294 } else { 1295 vtx.data_format = FMT_32_32_32_32_FLOAT; 1296 } 1297 1298 r600_bytecode_add_vtx(ctx.bc, &vtx); 1299 } 1300 1301 /* XXX handle clipvertex, streamout? */ 1302 emit_streamout(&ctx, so); 1303 1304 /* export vertex data */ 1305 /* XXX factor out common code with r600_shader_from_tgsi ? */ 1306 for (i = 0; i < ocnt; ++i) { 1307 struct r600_shader_io *out = &ctx.shader->output[i]; 1308 1309 if (out->name == TGSI_SEMANTIC_CLIPVERTEX) 1310 continue; 1311 1312 memset(&output, 0, sizeof(output)); 1313 output.gpr = out->gpr; 1314 output.elem_size = 3; 1315 output.swizzle_x = 0; 1316 output.swizzle_y = 1; 1317 output.swizzle_z = 2; 1318 output.swizzle_w = 3; 1319 output.burst_count = 1; 1320 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1321 output.op = CF_OP_EXPORT; 1322 switch (out->name) { 1323 case TGSI_SEMANTIC_POSITION: 1324 output.array_base = 60; 1325 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1326 break; 1327 1328 case TGSI_SEMANTIC_PSIZE: 1329 output.array_base = 61; 1330 if (next_clip_pos == 61) 1331 next_clip_pos = 62; 1332 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1333 output.swizzle_y = 7; 1334 output.swizzle_z = 7; 1335 output.swizzle_w = 7; 1336 ctx.shader->vs_out_misc_write = 1; 1337 ctx.shader->vs_out_point_size = 1; 1338 break; 1339 case TGSI_SEMANTIC_LAYER: 1340 output.array_base = 61; 1341 if (next_clip_pos == 61) 1342 next_clip_pos = 62; 1343 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1344 output.swizzle_x = 7; 1345 output.swizzle_y = 7; 1346 output.swizzle_z = 0; 1347 output.swizzle_w = 7; 1348 ctx.shader->vs_out_misc_write = 1; 1349 ctx.shader->vs_out_layer = 1; 1350 break; 1351 case TGSI_SEMANTIC_VIEWPORT_INDEX: 1352 output.array_base = 61; 1353 if (next_clip_pos == 61) 1354 next_clip_pos = 62; 1355 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1356 ctx.shader->vs_out_misc_write = 1; 1357 ctx.shader->vs_out_viewport = 1; 1358 output.swizzle_x = 7; 1359 output.swizzle_y = 7; 1360 output.swizzle_z = 7; 1361 output.swizzle_w = 0; 1362 break; 1363 case TGSI_SEMANTIC_CLIPDIST: 1364 /* spi_sid is 0 for clipdistance outputs that were generated 1365 * for clipvertex - we don't need to pass them to PS */ 1366 ctx.shader->clip_dist_write = gs->shader.clip_dist_write; 1367 if (out->spi_sid) { 1368 /* duplicate it as PARAM to pass to the pixel shader */ 1369 output.array_base = next_param++; 1370 r600_bytecode_add_output(ctx.bc, &output); 1371 last_exp_param = ctx.bc->cf_last; 1372 } 1373 output.array_base = next_clip_pos++; 1374 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1375 break; 1376 case TGSI_SEMANTIC_FOG: 1377 output.swizzle_y = 4; /* 0 */ 1378 output.swizzle_z = 4; /* 0 */ 1379 output.swizzle_w = 5; /* 1 */ 1380 break; 1381 default: 1382 output.array_base = next_param++; 1383 break; 1384 } 1385 r600_bytecode_add_output(ctx.bc, &output); 1386 if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) 1387 last_exp_param = ctx.bc->cf_last; 1388 else 1389 last_exp_pos = ctx.bc->cf_last; 1390 } 1391 1392 if (!last_exp_pos) { 1393 memset(&output, 0, sizeof(output)); 1394 output.gpr = 0; 1395 output.elem_size = 3; 1396 output.swizzle_x = 7; 1397 output.swizzle_y = 7; 1398 output.swizzle_z = 7; 1399 output.swizzle_w = 7; 1400 output.burst_count = 1; 1401 output.type = 2; 1402 output.op = CF_OP_EXPORT; 1403 output.array_base = 60; 1404 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1405 r600_bytecode_add_output(ctx.bc, &output); 1406 last_exp_pos = ctx.bc->cf_last; 1407 } 1408 1409 if (!last_exp_param) { 1410 memset(&output, 0, sizeof(output)); 1411 output.gpr = 0; 1412 output.elem_size = 3; 1413 output.swizzle_x = 7; 1414 output.swizzle_y = 7; 1415 output.swizzle_z = 7; 1416 output.swizzle_w = 7; 1417 output.burst_count = 1; 1418 output.type = 2; 1419 output.op = CF_OP_EXPORT; 1420 output.array_base = next_param++; 1421 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1422 r600_bytecode_add_output(ctx.bc, &output); 1423 last_exp_param = ctx.bc->cf_last; 1424 } 1425 1426 last_exp_pos->op = CF_OP_EXPORT_DONE; 1427 last_exp_param->op = CF_OP_EXPORT_DONE; 1428 1429 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 1430 cf_pop = ctx.bc->cf_last; 1431 1432 cf_jump->cf_addr = cf_pop->id + 2; 1433 cf_jump->pop_count = 1; 1434 cf_pop->cf_addr = cf_pop->id + 2; 1435 cf_pop->pop_count = 1; 1436 1437 if (ctx.bc->chip_class == CAYMAN) 1438 cm_bytecode_add_cf_end(ctx.bc); 1439 else { 1440 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 1441 ctx.bc->cf_last->end_of_program = 1; 1442 } 1443 1444 gs->gs_copy_shader = cshader; 1445 1446 ctx.bc->nstack = 1; 1447 cshader->shader.ring_item_size = ocnt * 16; 1448 1449 return r600_bytecode_build(ctx.bc); 1450} 1451 1452static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind) 1453{ 1454 struct r600_bytecode_output output; 1455 int i, k, ring_offset; 1456 1457 for (i = 0; i < ctx->shader->noutput; i++) { 1458 if (ctx->gs_for_vs) { 1459 /* for ES we need to lookup corresponding ring offset expected by GS 1460 * (map this output to GS input by name and sid) */ 1461 /* FIXME precompute offsets */ 1462 ring_offset = -1; 1463 for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { 1464 struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; 1465 struct r600_shader_io *out = &ctx->shader->output[i]; 1466 if (in->name == out->name && in->sid == out->sid) 1467 ring_offset = in->ring_offset; 1468 } 1469 1470 if (ring_offset == -1) 1471 continue; 1472 } else 1473 ring_offset = i * 16; 1474 1475 /* next_ring_offset after parsing input decls contains total size of 1476 * single vertex data, gs_next_vertex - current vertex index */ 1477 if (!ind) 1478 ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex; 1479 1480 /* get a temp and add the ring offset to the next vertex base in the shader */ 1481 memset(&output, 0, sizeof(struct r600_bytecode_output)); 1482 output.gpr = ctx->shader->output[i].gpr; 1483 output.elem_size = 3; 1484 output.comp_mask = 0xF; 1485 output.burst_count = 1; 1486 1487 if (ind) 1488 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 1489 else 1490 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 1491 output.op = CF_OP_MEM_RING; 1492 1493 1494 if (ind) { 1495 output.array_base = ring_offset >> 2; /* in dwords */ 1496 output.array_size = 0xfff; 1497 output.index_gpr = ctx->gs_export_gpr_treg; 1498 } else 1499 output.array_base = ring_offset >> 2; /* in dwords */ 1500 r600_bytecode_add_output(ctx->bc, &output); 1501 } 1502 1503 if (ind) { 1504 struct r600_bytecode_alu alu; 1505 int r; 1506 1507 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1508 alu.op = ALU_OP2_ADD_INT; 1509 alu.src[0].sel = ctx->gs_export_gpr_treg; 1510 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1511 alu.src[1].value = ctx->gs_out_ring_offset >> 4; 1512 alu.dst.sel = ctx->gs_export_gpr_treg; 1513 alu.dst.write = 1; 1514 alu.last = 1; 1515 r = r600_bytecode_add_alu(ctx->bc, &alu); 1516 if (r) 1517 return r; 1518 } 1519 ++ctx->gs_next_vertex; 1520 return 0; 1521} 1522 1523static int r600_shader_from_tgsi(struct r600_context *rctx, 1524 struct r600_pipe_shader *pipeshader, 1525 struct r600_shader_key key) 1526{ 1527 struct r600_screen *rscreen = rctx->screen; 1528 struct r600_shader *shader = &pipeshader->shader; 1529 struct tgsi_token *tokens = pipeshader->selector->tokens; 1530 struct pipe_stream_output_info so = pipeshader->selector->so; 1531 struct tgsi_full_immediate *immediate; 1532 struct tgsi_full_property *property; 1533 struct r600_shader_ctx ctx; 1534 struct r600_bytecode_output output[32]; 1535 unsigned output_done, noutput; 1536 unsigned opcode; 1537 int i, j, k, r = 0; 1538 int next_param_base = 0, next_clip_base; 1539 int max_color_exports = MAX2(key.nr_cbufs, 1); 1540 /* Declarations used by llvm code */ 1541 bool use_llvm = false; 1542 bool indirect_gprs; 1543 bool ring_outputs = false; 1544 bool pos_emitted = false; 1545 1546#ifdef R600_USE_LLVM 1547 use_llvm = rscreen->b.debug_flags & DBG_LLVM; 1548#endif 1549 ctx.bc = &shader->bc; 1550 ctx.shader = shader; 1551 ctx.native_integers = true; 1552 1553 shader->vs_as_es = key.vs_as_es; 1554 1555 r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, 1556 rscreen->has_compressed_msaa_texturing); 1557 ctx.tokens = tokens; 1558 tgsi_scan_shader(tokens, &ctx.info); 1559 shader->indirect_files = ctx.info.indirect_files; 1560 indirect_gprs = ctx.info.indirect_files & ~(1 << TGSI_FILE_CONSTANT); 1561 tgsi_parse_init(&ctx.parse, tokens); 1562 ctx.type = ctx.parse.FullHeader.Processor.Processor; 1563 shader->processor_type = ctx.type; 1564 ctx.bc->type = shader->processor_type; 1565 1566 ring_outputs = key.vs_as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY); 1567 1568 if (key.vs_as_es) { 1569 ctx.gs_for_vs = &rctx->gs_shader->current->shader; 1570 } else { 1571 ctx.gs_for_vs = NULL; 1572 } 1573 1574 ctx.next_ring_offset = 0; 1575 ctx.gs_out_ring_offset = 0; 1576 ctx.gs_next_vertex = 0; 1577 1578 ctx.face_gpr = -1; 1579 ctx.fragcoord_input = -1; 1580 ctx.colors_used = 0; 1581 ctx.clip_vertex_write = 0; 1582 1583 shader->nr_ps_color_exports = 0; 1584 shader->nr_ps_max_color_exports = 0; 1585 1586 shader->two_side = key.color_two_side; 1587 1588 /* register allocations */ 1589 /* Values [0,127] correspond to GPR[0..127]. 1590 * Values [128,159] correspond to constant buffer bank 0 1591 * Values [160,191] correspond to constant buffer bank 1 1592 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 1593 * Values [256,287] correspond to constant buffer bank 2 (EG) 1594 * Values [288,319] correspond to constant buffer bank 3 (EG) 1595 * Other special values are shown in the list below. 1596 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 1597 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 1598 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 1599 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 1600 * 248 SQ_ALU_SRC_0: special constant 0.0. 1601 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 1602 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 1603 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 1604 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 1605 * 253 SQ_ALU_SRC_LITERAL: literal constant. 1606 * 254 SQ_ALU_SRC_PV: previous vector result. 1607 * 255 SQ_ALU_SRC_PS: previous scalar result. 1608 */ 1609 for (i = 0; i < TGSI_FILE_COUNT; i++) { 1610 ctx.file_offset[i] = 0; 1611 } 1612 1613#ifdef R600_USE_LLVM 1614 if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) { 1615 fprintf(stderr, "Warning: R600 LLVM backend does not support " 1616 "indirect adressing. Falling back to TGSI " 1617 "backend.\n"); 1618 use_llvm = 0; 1619 } 1620#endif 1621 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 1622 ctx.file_offset[TGSI_FILE_INPUT] = 1; 1623 if (!use_llvm) { 1624 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); 1625 } 1626 } 1627 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { 1628 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 1629 } 1630 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 1631 /* FIXME 1 would be enough in some cases (3 or less input vertices) */ 1632 ctx.file_offset[TGSI_FILE_INPUT] = 2; 1633 } 1634 ctx.use_llvm = use_llvm; 1635 1636 if (use_llvm) { 1637 ctx.file_offset[TGSI_FILE_OUTPUT] = 1638 ctx.file_offset[TGSI_FILE_INPUT]; 1639 } else { 1640 ctx.file_offset[TGSI_FILE_OUTPUT] = 1641 ctx.file_offset[TGSI_FILE_INPUT] + 1642 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 1643 } 1644 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 1645 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 1646 1647 /* Outside the GPR range. This will be translated to one of the 1648 * kcache banks later. */ 1649 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 1650 1651 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 1652 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 1653 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 1654 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 1655 ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1; 1656 ctx.temp_reg = ctx.bc->ar_reg + 2; 1657 } else 1658 ctx.temp_reg = ctx.bc->ar_reg + 1; 1659 1660 if (indirect_gprs) { 1661 shader->max_arrays = 0; 1662 shader->num_arrays = 0; 1663 1664 if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { 1665 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], 1666 ctx.file_offset[TGSI_FILE_OUTPUT] - 1667 ctx.file_offset[TGSI_FILE_INPUT], 1668 0x0F); 1669 } 1670 if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 1671 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT], 1672 ctx.file_offset[TGSI_FILE_TEMPORARY] - 1673 ctx.file_offset[TGSI_FILE_OUTPUT], 1674 0x0F); 1675 } 1676 } 1677 1678 ctx.nliterals = 0; 1679 ctx.literals = NULL; 1680 shader->fs_write_all = FALSE; 1681 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 1682 tgsi_parse_token(&ctx.parse); 1683 switch (ctx.parse.FullToken.Token.Type) { 1684 case TGSI_TOKEN_TYPE_IMMEDIATE: 1685 immediate = &ctx.parse.FullToken.FullImmediate; 1686 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 1687 if(ctx.literals == NULL) { 1688 r = -ENOMEM; 1689 goto out_err; 1690 } 1691 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 1692 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 1693 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 1694 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 1695 ctx.nliterals++; 1696 break; 1697 case TGSI_TOKEN_TYPE_DECLARATION: 1698 r = tgsi_declaration(&ctx); 1699 if (r) 1700 goto out_err; 1701 break; 1702 case TGSI_TOKEN_TYPE_INSTRUCTION: 1703 break; 1704 case TGSI_TOKEN_TYPE_PROPERTY: 1705 property = &ctx.parse.FullToken.FullProperty; 1706 switch (property->Property.PropertyName) { 1707 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: 1708 if (property->u[0].Data == 1) 1709 shader->fs_write_all = TRUE; 1710 break; 1711 case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION: 1712 if (property->u[0].Data == 1) 1713 shader->vs_position_window_space = TRUE; 1714 break; 1715 case TGSI_PROPERTY_VS_PROHIBIT_UCPS: 1716 /* we don't need this one */ 1717 break; 1718 case TGSI_PROPERTY_GS_INPUT_PRIM: 1719 shader->gs_input_prim = property->u[0].Data; 1720 break; 1721 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 1722 shader->gs_output_prim = property->u[0].Data; 1723 break; 1724 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 1725 shader->gs_max_out_vertices = property->u[0].Data; 1726 break; 1727 } 1728 break; 1729 default: 1730 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 1731 r = -EINVAL; 1732 goto out_err; 1733 } 1734 } 1735 1736 shader->ring_item_size = ctx.next_ring_offset; 1737 1738 /* Process two side if needed */ 1739 if (shader->two_side && ctx.colors_used) { 1740 int i, count = ctx.shader->ninput; 1741 unsigned next_lds_loc = ctx.shader->nlds; 1742 1743 /* additional inputs will be allocated right after the existing inputs, 1744 * we won't need them after the color selection, so we don't need to 1745 * reserve these gprs for the rest of the shader code and to adjust 1746 * output offsets etc. */ 1747 int gpr = ctx.file_offset[TGSI_FILE_INPUT] + 1748 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 1749 1750 if (ctx.face_gpr == -1) { 1751 i = ctx.shader->ninput++; 1752 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; 1753 ctx.shader->input[i].spi_sid = 0; 1754 ctx.shader->input[i].gpr = gpr++; 1755 ctx.face_gpr = ctx.shader->input[i].gpr; 1756 } 1757 1758 for (i = 0; i < count; i++) { 1759 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1760 int ni = ctx.shader->ninput++; 1761 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); 1762 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 1763 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); 1764 ctx.shader->input[ni].gpr = gpr++; 1765 // TGSI to LLVM needs to know the lds position of inputs. 1766 // Non LLVM path computes it later (in process_twoside_color) 1767 ctx.shader->input[ni].lds_pos = next_lds_loc++; 1768 ctx.shader->input[i].back_color_input = ni; 1769 if (ctx.bc->chip_class >= EVERGREEN) { 1770 if ((r = evergreen_interp_input(&ctx, ni))) 1771 return r; 1772 } 1773 } 1774 } 1775 } 1776 1777/* LLVM backend setup */ 1778#ifdef R600_USE_LLVM 1779 if (use_llvm) { 1780 struct radeon_llvm_context radeon_llvm_ctx; 1781 LLVMModuleRef mod; 1782 bool dump = r600_can_dump_shader(&rscreen->b, tokens); 1783 boolean use_kill = false; 1784 1785 memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); 1786 radeon_llvm_ctx.type = ctx.type; 1787 radeon_llvm_ctx.two_side = shader->two_side; 1788 radeon_llvm_ctx.face_gpr = ctx.face_gpr; 1789 radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1; 1790 radeon_llvm_ctx.r600_inputs = ctx.shader->input; 1791 radeon_llvm_ctx.r600_outputs = ctx.shader->output; 1792 radeon_llvm_ctx.color_buffer_count = max_color_exports; 1793 radeon_llvm_ctx.chip_class = ctx.bc->chip_class; 1794 radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN); 1795 radeon_llvm_ctx.stream_outputs = &so; 1796 radeon_llvm_ctx.clip_vertex = ctx.cv_output; 1797 radeon_llvm_ctx.alpha_to_one = key.alpha_to_one; 1798 radeon_llvm_ctx.has_compressed_msaa_texturing = 1799 ctx.bc->has_compressed_msaa_texturing; 1800 mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); 1801 ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp; 1802 ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers; 1803 1804 if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, dump)) { 1805 radeon_llvm_dispose(&radeon_llvm_ctx); 1806 use_llvm = 0; 1807 fprintf(stderr, "R600 LLVM backend failed to compile " 1808 "shader. Falling back to TGSI\n"); 1809 } else { 1810 ctx.file_offset[TGSI_FILE_OUTPUT] = 1811 ctx.file_offset[TGSI_FILE_INPUT]; 1812 } 1813 if (use_kill) 1814 ctx.shader->uses_kill = use_kill; 1815 radeon_llvm_dispose(&radeon_llvm_ctx); 1816 } 1817#endif 1818/* End of LLVM backend setup */ 1819 1820 if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) 1821 shader->nr_ps_max_color_exports = 8; 1822 1823 if (!use_llvm) { 1824 if (ctx.fragcoord_input >= 0) { 1825 if (ctx.bc->chip_class == CAYMAN) { 1826 for (j = 0 ; j < 4; j++) { 1827 struct r600_bytecode_alu alu; 1828 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1829 alu.op = ALU_OP1_RECIP_IEEE; 1830 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 1831 alu.src[0].chan = 3; 1832 1833 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 1834 alu.dst.chan = j; 1835 alu.dst.write = (j == 3); 1836 alu.last = 1; 1837 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 1838 return r; 1839 } 1840 } else { 1841 struct r600_bytecode_alu alu; 1842 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1843 alu.op = ALU_OP1_RECIP_IEEE; 1844 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 1845 alu.src[0].chan = 3; 1846 1847 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 1848 alu.dst.chan = 3; 1849 alu.dst.write = 1; 1850 alu.last = 1; 1851 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 1852 return r; 1853 } 1854 } 1855 1856 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 1857 struct r600_bytecode_alu alu; 1858 int r; 1859 1860 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1861 alu.op = ALU_OP1_MOV; 1862 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 1863 alu.src[0].value = 0; 1864 alu.dst.sel = ctx.gs_export_gpr_treg; 1865 alu.dst.write = 1; 1866 alu.last = 1; 1867 r = r600_bytecode_add_alu(ctx.bc, &alu); 1868 if (r) 1869 return r; 1870 } 1871 if (shader->two_side && ctx.colors_used) { 1872 if ((r = process_twoside_color_inputs(&ctx))) 1873 return r; 1874 } 1875 1876 tgsi_parse_init(&ctx.parse, tokens); 1877 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 1878 tgsi_parse_token(&ctx.parse); 1879 switch (ctx.parse.FullToken.Token.Type) { 1880 case TGSI_TOKEN_TYPE_INSTRUCTION: 1881 r = tgsi_is_supported(&ctx); 1882 if (r) 1883 goto out_err; 1884 ctx.max_driver_temp_used = 0; 1885 /* reserve first tmp for everyone */ 1886 r600_get_temp(&ctx); 1887 1888 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 1889 if ((r = tgsi_split_constant(&ctx))) 1890 goto out_err; 1891 if ((r = tgsi_split_literal_constant(&ctx))) 1892 goto out_err; 1893 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) 1894 if ((r = tgsi_split_gs_inputs(&ctx))) 1895 goto out_err; 1896 if (ctx.bc->chip_class == CAYMAN) 1897 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 1898 else if (ctx.bc->chip_class >= EVERGREEN) 1899 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 1900 else 1901 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 1902 r = ctx.inst_info->process(&ctx); 1903 if (r) 1904 goto out_err; 1905 break; 1906 default: 1907 break; 1908 } 1909 } 1910 } 1911 1912 /* Reset the temporary register counter. */ 1913 ctx.max_driver_temp_used = 0; 1914 1915 noutput = shader->noutput; 1916 1917 if (!ring_outputs && ctx.clip_vertex_write) { 1918 unsigned clipdist_temp[2]; 1919 1920 clipdist_temp[0] = r600_get_temp(&ctx); 1921 clipdist_temp[1] = r600_get_temp(&ctx); 1922 1923 /* need to convert a clipvertex write into clipdistance writes and not export 1924 the clip vertex anymore */ 1925 1926 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 1927 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 1928 shader->output[noutput].gpr = clipdist_temp[0]; 1929 noutput++; 1930 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 1931 shader->output[noutput].gpr = clipdist_temp[1]; 1932 noutput++; 1933 1934 /* reset spi_sid for clipvertex output to avoid confusing spi */ 1935 shader->output[ctx.cv_output].spi_sid = 0; 1936 1937 shader->clip_dist_write = 0xFF; 1938 1939 for (i = 0; i < 8; i++) { 1940 int oreg = i >> 2; 1941 int ochan = i & 3; 1942 1943 for (j = 0; j < 4; j++) { 1944 struct r600_bytecode_alu alu; 1945 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1946 alu.op = ALU_OP2_DOT4; 1947 alu.src[0].sel = shader->output[ctx.cv_output].gpr; 1948 alu.src[0].chan = j; 1949 1950 alu.src[1].sel = 512 + i; 1951 alu.src[1].kc_bank = R600_UCP_CONST_BUFFER; 1952 alu.src[1].chan = j; 1953 1954 alu.dst.sel = clipdist_temp[oreg]; 1955 alu.dst.chan = j; 1956 alu.dst.write = (j == ochan); 1957 if (j == 3) 1958 alu.last = 1; 1959 if (!use_llvm) 1960 r = r600_bytecode_add_alu(ctx.bc, &alu); 1961 if (r) 1962 return r; 1963 } 1964 } 1965 } 1966 1967 /* Add stream outputs. */ 1968 if (!ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX && 1969 so.num_outputs && !use_llvm) 1970 emit_streamout(&ctx, &so); 1971 1972 convert_edgeflag_to_int(&ctx); 1973 1974 if (ring_outputs) { 1975 if (key.vs_as_es) 1976 emit_gs_ring_writes(&ctx, FALSE); 1977 } else { 1978 /* Export output */ 1979 next_clip_base = shader->vs_out_misc_write ? 62 : 61; 1980 1981 for (i = 0, j = 0; i < noutput; i++, j++) { 1982 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1983 output[j].gpr = shader->output[i].gpr; 1984 output[j].elem_size = 3; 1985 output[j].swizzle_x = 0; 1986 output[j].swizzle_y = 1; 1987 output[j].swizzle_z = 2; 1988 output[j].swizzle_w = 3; 1989 output[j].burst_count = 1; 1990 output[j].type = -1; 1991 output[j].op = CF_OP_EXPORT; 1992 switch (ctx.type) { 1993 case TGSI_PROCESSOR_VERTEX: 1994 switch (shader->output[i].name) { 1995 case TGSI_SEMANTIC_POSITION: 1996 output[j].array_base = 60; 1997 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1998 pos_emitted = true; 1999 break; 2000 2001 case TGSI_SEMANTIC_PSIZE: 2002 output[j].array_base = 61; 2003 output[j].swizzle_y = 7; 2004 output[j].swizzle_z = 7; 2005 output[j].swizzle_w = 7; 2006 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2007 pos_emitted = true; 2008 break; 2009 case TGSI_SEMANTIC_EDGEFLAG: 2010 output[j].array_base = 61; 2011 output[j].swizzle_x = 7; 2012 output[j].swizzle_y = 0; 2013 output[j].swizzle_z = 7; 2014 output[j].swizzle_w = 7; 2015 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2016 pos_emitted = true; 2017 break; 2018 case TGSI_SEMANTIC_LAYER: 2019 output[j].array_base = 61; 2020 output[j].swizzle_x = 7; 2021 output[j].swizzle_y = 7; 2022 output[j].swizzle_z = 0; 2023 output[j].swizzle_w = 7; 2024 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2025 pos_emitted = true; 2026 break; 2027 case TGSI_SEMANTIC_CLIPVERTEX: 2028 j--; 2029 break; 2030 case TGSI_SEMANTIC_CLIPDIST: 2031 output[j].array_base = next_clip_base++; 2032 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2033 pos_emitted = true; 2034 /* spi_sid is 0 for clipdistance outputs that were generated 2035 * for clipvertex - we don't need to pass them to PS */ 2036 if (shader->output[i].spi_sid) { 2037 j++; 2038 /* duplicate it as PARAM to pass to the pixel shader */ 2039 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 2040 output[j].array_base = next_param_base++; 2041 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2042 } 2043 break; 2044 case TGSI_SEMANTIC_FOG: 2045 output[j].swizzle_y = 4; /* 0 */ 2046 output[j].swizzle_z = 4; /* 0 */ 2047 output[j].swizzle_w = 5; /* 1 */ 2048 break; 2049 } 2050 break; 2051 case TGSI_PROCESSOR_FRAGMENT: 2052 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 2053 /* never export more colors than the number of CBs */ 2054 if (shader->output[i].sid >= max_color_exports) { 2055 /* skip export */ 2056 j--; 2057 continue; 2058 } 2059 output[j].swizzle_w = key.alpha_to_one ? 5 : 3; 2060 output[j].array_base = shader->output[i].sid; 2061 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2062 shader->nr_ps_color_exports++; 2063 if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { 2064 for (k = 1; k < max_color_exports; k++) { 2065 j++; 2066 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2067 output[j].gpr = shader->output[i].gpr; 2068 output[j].elem_size = 3; 2069 output[j].swizzle_x = 0; 2070 output[j].swizzle_y = 1; 2071 output[j].swizzle_z = 2; 2072 output[j].swizzle_w = key.alpha_to_one ? 5 : 3; 2073 output[j].burst_count = 1; 2074 output[j].array_base = k; 2075 output[j].op = CF_OP_EXPORT; 2076 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2077 shader->nr_ps_color_exports++; 2078 } 2079 } 2080 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 2081 output[j].array_base = 61; 2082 output[j].swizzle_x = 2; 2083 output[j].swizzle_y = 7; 2084 output[j].swizzle_z = output[j].swizzle_w = 7; 2085 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2086 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 2087 output[j].array_base = 61; 2088 output[j].swizzle_x = 7; 2089 output[j].swizzle_y = 1; 2090 output[j].swizzle_z = output[j].swizzle_w = 7; 2091 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2092 } else { 2093 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 2094 r = -EINVAL; 2095 goto out_err; 2096 } 2097 break; 2098 default: 2099 R600_ERR("unsupported processor type %d\n", ctx.type); 2100 r = -EINVAL; 2101 goto out_err; 2102 } 2103 2104 if (output[j].type==-1) { 2105 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2106 output[j].array_base = next_param_base++; 2107 } 2108 } 2109 2110 /* add fake position export */ 2111 if (ctx.type == TGSI_PROCESSOR_VERTEX && pos_emitted == false) { 2112 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2113 output[j].gpr = 0; 2114 output[j].elem_size = 3; 2115 output[j].swizzle_x = 7; 2116 output[j].swizzle_y = 7; 2117 output[j].swizzle_z = 7; 2118 output[j].swizzle_w = 7; 2119 output[j].burst_count = 1; 2120 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2121 output[j].array_base = 60; 2122 output[j].op = CF_OP_EXPORT; 2123 j++; 2124 } 2125 2126 /* add fake param output for vertex shader if no param is exported */ 2127 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { 2128 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2129 output[j].gpr = 0; 2130 output[j].elem_size = 3; 2131 output[j].swizzle_x = 7; 2132 output[j].swizzle_y = 7; 2133 output[j].swizzle_z = 7; 2134 output[j].swizzle_w = 7; 2135 output[j].burst_count = 1; 2136 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2137 output[j].array_base = 0; 2138 output[j].op = CF_OP_EXPORT; 2139 j++; 2140 } 2141 2142 /* add fake pixel export */ 2143 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { 2144 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 2145 output[j].gpr = 0; 2146 output[j].elem_size = 3; 2147 output[j].swizzle_x = 7; 2148 output[j].swizzle_y = 7; 2149 output[j].swizzle_z = 7; 2150 output[j].swizzle_w = 7; 2151 output[j].burst_count = 1; 2152 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 2153 output[j].array_base = 0; 2154 output[j].op = CF_OP_EXPORT; 2155 j++; 2156 } 2157 2158 noutput = j; 2159 2160 /* set export done on last export of each type */ 2161 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 2162 if (!(output_done & (1 << output[i].type))) { 2163 output_done |= (1 << output[i].type); 2164 output[i].op = CF_OP_EXPORT_DONE; 2165 } 2166 } 2167 /* add output to bytecode */ 2168 if (!use_llvm) { 2169 for (i = 0; i < noutput; i++) { 2170 r = r600_bytecode_add_output(ctx.bc, &output[i]); 2171 if (r) 2172 goto out_err; 2173 } 2174 } 2175 } 2176 2177 /* add program end */ 2178 if (!use_llvm) { 2179 if (ctx.bc->chip_class == CAYMAN) 2180 cm_bytecode_add_cf_end(ctx.bc); 2181 else { 2182 const struct cf_op_info *last = NULL; 2183 2184 if (ctx.bc->cf_last) 2185 last = r600_isa_cf(ctx.bc->cf_last->op); 2186 2187 /* alu clause instructions don't have EOP bit, so add NOP */ 2188 if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS) 2189 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2190 2191 ctx.bc->cf_last->end_of_program = 1; 2192 } 2193 } 2194 2195 /* check GPR limit - we have 124 = 128 - 4 2196 * (4 are reserved as alu clause temporary registers) */ 2197 if (ctx.bc->ngpr > 124) { 2198 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 2199 r = -ENOMEM; 2200 goto out_err; 2201 } 2202 2203 if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { 2204 if ((r = generate_gs_copy_shader(rctx, pipeshader, &so))) 2205 return r; 2206 } 2207 2208 free(ctx.literals); 2209 tgsi_parse_free(&ctx.parse); 2210 return 0; 2211out_err: 2212 free(ctx.literals); 2213 tgsi_parse_free(&ctx.parse); 2214 return r; 2215} 2216 2217static int tgsi_unsupported(struct r600_shader_ctx *ctx) 2218{ 2219 R600_ERR("%s tgsi opcode unsupported\n", 2220 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); 2221 return -EINVAL; 2222} 2223 2224static int tgsi_end(struct r600_shader_ctx *ctx) 2225{ 2226 return 0; 2227} 2228 2229static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 2230 const struct r600_shader_src *shader_src, 2231 unsigned chan) 2232{ 2233 bc_src->sel = shader_src->sel; 2234 bc_src->chan = shader_src->swizzle[chan]; 2235 bc_src->neg = shader_src->neg; 2236 bc_src->abs = shader_src->abs; 2237 bc_src->rel = shader_src->rel; 2238 bc_src->value = shader_src->value[bc_src->chan]; 2239 bc_src->kc_bank = shader_src->kc_bank; 2240} 2241 2242static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 2243{ 2244 bc_src->abs = 1; 2245 bc_src->neg = 0; 2246} 2247 2248static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 2249{ 2250 bc_src->neg = !bc_src->neg; 2251} 2252 2253static void tgsi_dst(struct r600_shader_ctx *ctx, 2254 const struct tgsi_full_dst_register *tgsi_dst, 2255 unsigned swizzle, 2256 struct r600_bytecode_alu_dst *r600_dst) 2257{ 2258 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2259 2260 r600_dst->sel = tgsi_dst->Register.Index; 2261 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 2262 r600_dst->chan = swizzle; 2263 r600_dst->write = 1; 2264 if (tgsi_dst->Register.Indirect) 2265 r600_dst->rel = V_SQ_REL_RELATIVE; 2266 if (inst->Instruction.Saturate) { 2267 r600_dst->clamp = 1; 2268 } 2269} 2270 2271static int tgsi_last_instruction(unsigned writemask) 2272{ 2273 int i, lasti = 0; 2274 2275 for (i = 0; i < 4; i++) { 2276 if (writemask & (1 << i)) { 2277 lasti = i; 2278 } 2279 } 2280 return lasti; 2281} 2282 2283static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 2284{ 2285 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2286 struct r600_bytecode_alu alu; 2287 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2288 int i, j, r, lasti = tgsi_last_instruction(write_mask); 2289 /* use temp register if trans_only and more than one dst component */ 2290 int use_tmp = trans_only && (write_mask ^ (1 << lasti)); 2291 2292 for (i = 0; i <= lasti; i++) { 2293 if (!(write_mask & (1 << i))) 2294 continue; 2295 2296 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2297 if (use_tmp) { 2298 alu.dst.sel = ctx->temp_reg; 2299 alu.dst.chan = i; 2300 alu.dst.write = 1; 2301 } else 2302 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2303 2304 alu.op = ctx->inst_info->op; 2305 if (!swap) { 2306 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2307 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 2308 } 2309 } else { 2310 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2311 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2312 } 2313 /* handle some special cases */ 2314 switch (ctx->inst_info->tgsi_opcode) { 2315 case TGSI_OPCODE_SUB: 2316 r600_bytecode_src_toggle_neg(&alu.src[1]); 2317 break; 2318 case TGSI_OPCODE_ABS: 2319 r600_bytecode_src_set_abs(&alu.src[0]); 2320 break; 2321 default: 2322 break; 2323 } 2324 if (i == lasti || trans_only) { 2325 alu.last = 1; 2326 } 2327 r = r600_bytecode_add_alu(ctx->bc, &alu); 2328 if (r) 2329 return r; 2330 } 2331 2332 if (use_tmp) { 2333 /* move result from temp to dst */ 2334 for (i = 0; i <= lasti; i++) { 2335 if (!(write_mask & (1 << i))) 2336 continue; 2337 2338 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2339 alu.op = ALU_OP1_MOV; 2340 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2341 alu.src[0].sel = ctx->temp_reg; 2342 alu.src[0].chan = i; 2343 alu.last = (i == lasti); 2344 2345 r = r600_bytecode_add_alu(ctx->bc, &alu); 2346 if (r) 2347 return r; 2348 } 2349 } 2350 return 0; 2351} 2352 2353static int tgsi_op2(struct r600_shader_ctx *ctx) 2354{ 2355 return tgsi_op2_s(ctx, 0, 0); 2356} 2357 2358static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 2359{ 2360 return tgsi_op2_s(ctx, 1, 0); 2361} 2362 2363static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 2364{ 2365 return tgsi_op2_s(ctx, 0, 1); 2366} 2367 2368static int tgsi_ineg(struct r600_shader_ctx *ctx) 2369{ 2370 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2371 struct r600_bytecode_alu alu; 2372 int i, r; 2373 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2374 2375 for (i = 0; i < lasti + 1; i++) { 2376 2377 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2378 continue; 2379 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2380 alu.op = ctx->inst_info->op; 2381 2382 alu.src[0].sel = V_SQ_ALU_SRC_0; 2383 2384 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2385 2386 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2387 2388 if (i == lasti) { 2389 alu.last = 1; 2390 } 2391 r = r600_bytecode_add_alu(ctx->bc, &alu); 2392 if (r) 2393 return r; 2394 } 2395 return 0; 2396 2397} 2398 2399static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 2400{ 2401 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2402 int i, j, r; 2403 struct r600_bytecode_alu alu; 2404 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 2405 2406 for (i = 0 ; i < last_slot; i++) { 2407 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2408 alu.op = ctx->inst_info->op; 2409 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2410 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 2411 2412 /* RSQ should take the absolute value of src */ 2413 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_RSQ) { 2414 r600_bytecode_src_set_abs(&alu.src[j]); 2415 } 2416 } 2417 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2418 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2419 2420 if (i == last_slot - 1) 2421 alu.last = 1; 2422 r = r600_bytecode_add_alu(ctx->bc, &alu); 2423 if (r) 2424 return r; 2425 } 2426 return 0; 2427} 2428 2429static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 2430{ 2431 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2432 int i, j, k, r; 2433 struct r600_bytecode_alu alu; 2434 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 2435 for (k = 0; k < last_slot; k++) { 2436 if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 2437 continue; 2438 2439 for (i = 0 ; i < 4; i++) { 2440 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2441 alu.op = ctx->inst_info->op; 2442 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2443 r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 2444 } 2445 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2446 alu.dst.write = (i == k); 2447 if (i == 3) 2448 alu.last = 1; 2449 r = r600_bytecode_add_alu(ctx->bc, &alu); 2450 if (r) 2451 return r; 2452 } 2453 } 2454 return 0; 2455} 2456 2457/* 2458 * r600 - trunc to -PI..PI range 2459 * r700 - normalize by dividing by 2PI 2460 * see fdo bug 27901 2461 */ 2462static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 2463{ 2464 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 2465 static float double_pi = 3.1415926535 * 2; 2466 static float neg_pi = -3.1415926535; 2467 2468 int r; 2469 struct r600_bytecode_alu alu; 2470 2471 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2472 alu.op = ALU_OP3_MULADD; 2473 alu.is_op3 = 1; 2474 2475 alu.dst.chan = 0; 2476 alu.dst.sel = ctx->temp_reg; 2477 alu.dst.write = 1; 2478 2479 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2480 2481 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2482 alu.src[1].chan = 0; 2483 alu.src[1].value = *(uint32_t *)&half_inv_pi; 2484 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 2485 alu.src[2].chan = 0; 2486 alu.last = 1; 2487 r = r600_bytecode_add_alu(ctx->bc, &alu); 2488 if (r) 2489 return r; 2490 2491 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2492 alu.op = ALU_OP1_FRACT; 2493 2494 alu.dst.chan = 0; 2495 alu.dst.sel = ctx->temp_reg; 2496 alu.dst.write = 1; 2497 2498 alu.src[0].sel = ctx->temp_reg; 2499 alu.src[0].chan = 0; 2500 alu.last = 1; 2501 r = r600_bytecode_add_alu(ctx->bc, &alu); 2502 if (r) 2503 return r; 2504 2505 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2506 alu.op = ALU_OP3_MULADD; 2507 alu.is_op3 = 1; 2508 2509 alu.dst.chan = 0; 2510 alu.dst.sel = ctx->temp_reg; 2511 alu.dst.write = 1; 2512 2513 alu.src[0].sel = ctx->temp_reg; 2514 alu.src[0].chan = 0; 2515 2516 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2517 alu.src[1].chan = 0; 2518 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 2519 alu.src[2].chan = 0; 2520 2521 if (ctx->bc->chip_class == R600) { 2522 alu.src[1].value = *(uint32_t *)&double_pi; 2523 alu.src[2].value = *(uint32_t *)&neg_pi; 2524 } else { 2525 alu.src[1].sel = V_SQ_ALU_SRC_1; 2526 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 2527 alu.src[2].neg = 1; 2528 } 2529 2530 alu.last = 1; 2531 r = r600_bytecode_add_alu(ctx->bc, &alu); 2532 if (r) 2533 return r; 2534 return 0; 2535} 2536 2537static int cayman_trig(struct r600_shader_ctx *ctx) 2538{ 2539 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2540 struct r600_bytecode_alu alu; 2541 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 2542 int i, r; 2543 2544 r = tgsi_setup_trig(ctx); 2545 if (r) 2546 return r; 2547 2548 2549 for (i = 0; i < last_slot; i++) { 2550 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2551 alu.op = ctx->inst_info->op; 2552 alu.dst.chan = i; 2553 2554 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2555 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2556 2557 alu.src[0].sel = ctx->temp_reg; 2558 alu.src[0].chan = 0; 2559 if (i == last_slot - 1) 2560 alu.last = 1; 2561 r = r600_bytecode_add_alu(ctx->bc, &alu); 2562 if (r) 2563 return r; 2564 } 2565 return 0; 2566} 2567 2568static int tgsi_trig(struct r600_shader_ctx *ctx) 2569{ 2570 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2571 struct r600_bytecode_alu alu; 2572 int i, r; 2573 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2574 2575 r = tgsi_setup_trig(ctx); 2576 if (r) 2577 return r; 2578 2579 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2580 alu.op = ctx->inst_info->op; 2581 alu.dst.chan = 0; 2582 alu.dst.sel = ctx->temp_reg; 2583 alu.dst.write = 1; 2584 2585 alu.src[0].sel = ctx->temp_reg; 2586 alu.src[0].chan = 0; 2587 alu.last = 1; 2588 r = r600_bytecode_add_alu(ctx->bc, &alu); 2589 if (r) 2590 return r; 2591 2592 /* replicate result */ 2593 for (i = 0; i < lasti + 1; i++) { 2594 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2595 continue; 2596 2597 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2598 alu.op = ALU_OP1_MOV; 2599 2600 alu.src[0].sel = ctx->temp_reg; 2601 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2602 if (i == lasti) 2603 alu.last = 1; 2604 r = r600_bytecode_add_alu(ctx->bc, &alu); 2605 if (r) 2606 return r; 2607 } 2608 return 0; 2609} 2610 2611static int tgsi_scs(struct r600_shader_ctx *ctx) 2612{ 2613 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2614 struct r600_bytecode_alu alu; 2615 int i, r; 2616 2617 /* We'll only need the trig stuff if we are going to write to the 2618 * X or Y components of the destination vector. 2619 */ 2620 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 2621 r = tgsi_setup_trig(ctx); 2622 if (r) 2623 return r; 2624 } 2625 2626 /* dst.x = COS */ 2627 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2628 if (ctx->bc->chip_class == CAYMAN) { 2629 for (i = 0 ; i < 3; i++) { 2630 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2631 alu.op = ALU_OP1_COS; 2632 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2633 2634 if (i == 0) 2635 alu.dst.write = 1; 2636 else 2637 alu.dst.write = 0; 2638 alu.src[0].sel = ctx->temp_reg; 2639 alu.src[0].chan = 0; 2640 if (i == 2) 2641 alu.last = 1; 2642 r = r600_bytecode_add_alu(ctx->bc, &alu); 2643 if (r) 2644 return r; 2645 } 2646 } else { 2647 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2648 alu.op = ALU_OP1_COS; 2649 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 2650 2651 alu.src[0].sel = ctx->temp_reg; 2652 alu.src[0].chan = 0; 2653 alu.last = 1; 2654 r = r600_bytecode_add_alu(ctx->bc, &alu); 2655 if (r) 2656 return r; 2657 } 2658 } 2659 2660 /* dst.y = SIN */ 2661 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2662 if (ctx->bc->chip_class == CAYMAN) { 2663 for (i = 0 ; i < 3; i++) { 2664 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2665 alu.op = ALU_OP1_SIN; 2666 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2667 if (i == 1) 2668 alu.dst.write = 1; 2669 else 2670 alu.dst.write = 0; 2671 alu.src[0].sel = ctx->temp_reg; 2672 alu.src[0].chan = 0; 2673 if (i == 2) 2674 alu.last = 1; 2675 r = r600_bytecode_add_alu(ctx->bc, &alu); 2676 if (r) 2677 return r; 2678 } 2679 } else { 2680 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2681 alu.op = ALU_OP1_SIN; 2682 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 2683 2684 alu.src[0].sel = ctx->temp_reg; 2685 alu.src[0].chan = 0; 2686 alu.last = 1; 2687 r = r600_bytecode_add_alu(ctx->bc, &alu); 2688 if (r) 2689 return r; 2690 } 2691 } 2692 2693 /* dst.z = 0.0; */ 2694 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2695 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2696 2697 alu.op = ALU_OP1_MOV; 2698 2699 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 2700 2701 alu.src[0].sel = V_SQ_ALU_SRC_0; 2702 alu.src[0].chan = 0; 2703 2704 alu.last = 1; 2705 2706 r = r600_bytecode_add_alu(ctx->bc, &alu); 2707 if (r) 2708 return r; 2709 } 2710 2711 /* dst.w = 1.0; */ 2712 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2713 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2714 2715 alu.op = ALU_OP1_MOV; 2716 2717 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 2718 2719 alu.src[0].sel = V_SQ_ALU_SRC_1; 2720 alu.src[0].chan = 0; 2721 2722 alu.last = 1; 2723 2724 r = r600_bytecode_add_alu(ctx->bc, &alu); 2725 if (r) 2726 return r; 2727 } 2728 2729 return 0; 2730} 2731 2732static int tgsi_kill(struct r600_shader_ctx *ctx) 2733{ 2734 struct r600_bytecode_alu alu; 2735 int i, r; 2736 2737 for (i = 0; i < 4; i++) { 2738 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2739 alu.op = ctx->inst_info->op; 2740 2741 alu.dst.chan = i; 2742 2743 alu.src[0].sel = V_SQ_ALU_SRC_0; 2744 2745 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILL) { 2746 alu.src[1].sel = V_SQ_ALU_SRC_1; 2747 alu.src[1].neg = 1; 2748 } else { 2749 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2750 } 2751 if (i == 3) { 2752 alu.last = 1; 2753 } 2754 r = r600_bytecode_add_alu(ctx->bc, &alu); 2755 if (r) 2756 return r; 2757 } 2758 2759 /* kill must be last in ALU */ 2760 ctx->bc->force_add_cf = 1; 2761 ctx->shader->uses_kill = TRUE; 2762 return 0; 2763} 2764 2765static int tgsi_lit(struct r600_shader_ctx *ctx) 2766{ 2767 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2768 struct r600_bytecode_alu alu; 2769 int r; 2770 2771 /* tmp.x = max(src.y, 0.0) */ 2772 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2773 alu.op = ALU_OP2_MAX; 2774 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 2775 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 2776 alu.src[1].chan = 1; 2777 2778 alu.dst.sel = ctx->temp_reg; 2779 alu.dst.chan = 0; 2780 alu.dst.write = 1; 2781 2782 alu.last = 1; 2783 r = r600_bytecode_add_alu(ctx->bc, &alu); 2784 if (r) 2785 return r; 2786 2787 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 2788 { 2789 int chan; 2790 int sel; 2791 int i; 2792 2793 if (ctx->bc->chip_class == CAYMAN) { 2794 for (i = 0; i < 3; i++) { 2795 /* tmp.z = log(tmp.x) */ 2796 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2797 alu.op = ALU_OP1_LOG_CLAMPED; 2798 alu.src[0].sel = ctx->temp_reg; 2799 alu.src[0].chan = 0; 2800 alu.dst.sel = ctx->temp_reg; 2801 alu.dst.chan = i; 2802 if (i == 2) { 2803 alu.dst.write = 1; 2804 alu.last = 1; 2805 } else 2806 alu.dst.write = 0; 2807 2808 r = r600_bytecode_add_alu(ctx->bc, &alu); 2809 if (r) 2810 return r; 2811 } 2812 } else { 2813 /* tmp.z = log(tmp.x) */ 2814 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2815 alu.op = ALU_OP1_LOG_CLAMPED; 2816 alu.src[0].sel = ctx->temp_reg; 2817 alu.src[0].chan = 0; 2818 alu.dst.sel = ctx->temp_reg; 2819 alu.dst.chan = 2; 2820 alu.dst.write = 1; 2821 alu.last = 1; 2822 r = r600_bytecode_add_alu(ctx->bc, &alu); 2823 if (r) 2824 return r; 2825 } 2826 2827 chan = alu.dst.chan; 2828 sel = alu.dst.sel; 2829 2830 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 2831 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2832 alu.op = ALU_OP3_MUL_LIT; 2833 alu.src[0].sel = sel; 2834 alu.src[0].chan = chan; 2835 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 2836 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 2837 alu.dst.sel = ctx->temp_reg; 2838 alu.dst.chan = 0; 2839 alu.dst.write = 1; 2840 alu.is_op3 = 1; 2841 alu.last = 1; 2842 r = r600_bytecode_add_alu(ctx->bc, &alu); 2843 if (r) 2844 return r; 2845 2846 if (ctx->bc->chip_class == CAYMAN) { 2847 for (i = 0; i < 3; i++) { 2848 /* dst.z = exp(tmp.x) */ 2849 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2850 alu.op = ALU_OP1_EXP_IEEE; 2851 alu.src[0].sel = ctx->temp_reg; 2852 alu.src[0].chan = 0; 2853 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2854 if (i == 2) { 2855 alu.dst.write = 1; 2856 alu.last = 1; 2857 } else 2858 alu.dst.write = 0; 2859 r = r600_bytecode_add_alu(ctx->bc, &alu); 2860 if (r) 2861 return r; 2862 } 2863 } else { 2864 /* dst.z = exp(tmp.x) */ 2865 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2866 alu.op = ALU_OP1_EXP_IEEE; 2867 alu.src[0].sel = ctx->temp_reg; 2868 alu.src[0].chan = 0; 2869 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 2870 alu.last = 1; 2871 r = r600_bytecode_add_alu(ctx->bc, &alu); 2872 if (r) 2873 return r; 2874 } 2875 } 2876 2877 /* dst.x, <- 1.0 */ 2878 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2879 alu.op = ALU_OP1_MOV; 2880 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 2881 alu.src[0].chan = 0; 2882 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 2883 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 2884 r = r600_bytecode_add_alu(ctx->bc, &alu); 2885 if (r) 2886 return r; 2887 2888 /* dst.y = max(src.x, 0.0) */ 2889 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2890 alu.op = ALU_OP2_MAX; 2891 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2892 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 2893 alu.src[1].chan = 0; 2894 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 2895 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 2896 r = r600_bytecode_add_alu(ctx->bc, &alu); 2897 if (r) 2898 return r; 2899 2900 /* dst.w, <- 1.0 */ 2901 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2902 alu.op = ALU_OP1_MOV; 2903 alu.src[0].sel = V_SQ_ALU_SRC_1; 2904 alu.src[0].chan = 0; 2905 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 2906 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 2907 alu.last = 1; 2908 r = r600_bytecode_add_alu(ctx->bc, &alu); 2909 if (r) 2910 return r; 2911 2912 return 0; 2913} 2914 2915static int tgsi_rsq(struct r600_shader_ctx *ctx) 2916{ 2917 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2918 struct r600_bytecode_alu alu; 2919 int i, r; 2920 2921 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2922 2923 /* XXX: 2924 * For state trackers other than OpenGL, we'll want to use 2925 * _RECIPSQRT_IEEE instead. 2926 */ 2927 alu.op = ALU_OP1_RECIPSQRT_CLAMPED; 2928 2929 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 2930 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 2931 r600_bytecode_src_set_abs(&alu.src[i]); 2932 } 2933 alu.dst.sel = ctx->temp_reg; 2934 alu.dst.write = 1; 2935 alu.last = 1; 2936 r = r600_bytecode_add_alu(ctx->bc, &alu); 2937 if (r) 2938 return r; 2939 /* replicate result */ 2940 return tgsi_helper_tempx_replicate(ctx); 2941} 2942 2943static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 2944{ 2945 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2946 struct r600_bytecode_alu alu; 2947 int i, r; 2948 2949 for (i = 0; i < 4; i++) { 2950 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2951 alu.src[0].sel = ctx->temp_reg; 2952 alu.op = ALU_OP1_MOV; 2953 alu.dst.chan = i; 2954 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2955 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2956 if (i == 3) 2957 alu.last = 1; 2958 r = r600_bytecode_add_alu(ctx->bc, &alu); 2959 if (r) 2960 return r; 2961 } 2962 return 0; 2963} 2964 2965static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 2966{ 2967 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2968 struct r600_bytecode_alu alu; 2969 int i, r; 2970 2971 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2972 alu.op = ctx->inst_info->op; 2973 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 2974 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 2975 } 2976 alu.dst.sel = ctx->temp_reg; 2977 alu.dst.write = 1; 2978 alu.last = 1; 2979 r = r600_bytecode_add_alu(ctx->bc, &alu); 2980 if (r) 2981 return r; 2982 /* replicate result */ 2983 return tgsi_helper_tempx_replicate(ctx); 2984} 2985 2986static int cayman_pow(struct r600_shader_ctx *ctx) 2987{ 2988 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2989 int i, r; 2990 struct r600_bytecode_alu alu; 2991 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 2992 2993 for (i = 0; i < 3; i++) { 2994 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2995 alu.op = ALU_OP1_LOG_IEEE; 2996 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2997 alu.dst.sel = ctx->temp_reg; 2998 alu.dst.chan = i; 2999 alu.dst.write = 1; 3000 if (i == 2) 3001 alu.last = 1; 3002 r = r600_bytecode_add_alu(ctx->bc, &alu); 3003 if (r) 3004 return r; 3005 } 3006 3007 /* b * LOG2(a) */ 3008 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3009 alu.op = ALU_OP2_MUL; 3010 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 3011 alu.src[1].sel = ctx->temp_reg; 3012 alu.dst.sel = ctx->temp_reg; 3013 alu.dst.write = 1; 3014 alu.last = 1; 3015 r = r600_bytecode_add_alu(ctx->bc, &alu); 3016 if (r) 3017 return r; 3018 3019 for (i = 0; i < last_slot; i++) { 3020 /* POW(a,b) = EXP2(b * LOG2(a))*/ 3021 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3022 alu.op = ALU_OP1_EXP_IEEE; 3023 alu.src[0].sel = ctx->temp_reg; 3024 3025 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3026 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3027 if (i == last_slot - 1) 3028 alu.last = 1; 3029 r = r600_bytecode_add_alu(ctx->bc, &alu); 3030 if (r) 3031 return r; 3032 } 3033 return 0; 3034} 3035 3036static int tgsi_pow(struct r600_shader_ctx *ctx) 3037{ 3038 struct r600_bytecode_alu alu; 3039 int r; 3040 3041 /* LOG2(a) */ 3042 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3043 alu.op = ALU_OP1_LOG_IEEE; 3044 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3045 alu.dst.sel = ctx->temp_reg; 3046 alu.dst.write = 1; 3047 alu.last = 1; 3048 r = r600_bytecode_add_alu(ctx->bc, &alu); 3049 if (r) 3050 return r; 3051 /* b * LOG2(a) */ 3052 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3053 alu.op = ALU_OP2_MUL; 3054 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 3055 alu.src[1].sel = ctx->temp_reg; 3056 alu.dst.sel = ctx->temp_reg; 3057 alu.dst.write = 1; 3058 alu.last = 1; 3059 r = r600_bytecode_add_alu(ctx->bc, &alu); 3060 if (r) 3061 return r; 3062 /* POW(a,b) = EXP2(b * LOG2(a))*/ 3063 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3064 alu.op = ALU_OP1_EXP_IEEE; 3065 alu.src[0].sel = ctx->temp_reg; 3066 alu.dst.sel = ctx->temp_reg; 3067 alu.dst.write = 1; 3068 alu.last = 1; 3069 r = r600_bytecode_add_alu(ctx->bc, &alu); 3070 if (r) 3071 return r; 3072 return tgsi_helper_tempx_replicate(ctx); 3073} 3074 3075static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 3076{ 3077 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3078 struct r600_bytecode_alu alu; 3079 int i, r, j; 3080 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3081 int tmp0 = ctx->temp_reg; 3082 int tmp1 = r600_get_temp(ctx); 3083 int tmp2 = r600_get_temp(ctx); 3084 int tmp3 = r600_get_temp(ctx); 3085 /* Unsigned path: 3086 * 3087 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 3088 * 3089 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 3090 * 2. tmp0.z = lo (tmp0.x * src2) 3091 * 3. tmp0.w = -tmp0.z 3092 * 4. tmp0.y = hi (tmp0.x * src2) 3093 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 3094 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 3095 * 7. tmp1.x = tmp0.x - tmp0.w 3096 * 8. tmp1.y = tmp0.x + tmp0.w 3097 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 3098 * 10. tmp0.z = hi(tmp0.x * src1) = q 3099 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 3100 * 3101 * 12. tmp0.w = src1 - tmp0.y = r 3102 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 3103 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 3104 * 3105 * if DIV 3106 * 3107 * 15. tmp1.z = tmp0.z + 1 = q + 1 3108 * 16. tmp1.w = tmp0.z - 1 = q - 1 3109 * 3110 * else MOD 3111 * 3112 * 15. tmp1.z = tmp0.w - src2 = r - src2 3113 * 16. tmp1.w = tmp0.w + src2 = r + src2 3114 * 3115 * endif 3116 * 3117 * 17. tmp1.x = tmp1.x & tmp1.y 3118 * 3119 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 3120 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 3121 * 3122 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 3123 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 3124 * 3125 * Signed path: 3126 * 3127 * Same as unsigned, using abs values of the operands, 3128 * and fixing the sign of the result in the end. 3129 */ 3130 3131 for (i = 0; i < 4; i++) { 3132 if (!(write_mask & (1<<i))) 3133 continue; 3134 3135 if (signed_op) { 3136 3137 /* tmp2.x = -src0 */ 3138 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3139 alu.op = ALU_OP2_SUB_INT; 3140 3141 alu.dst.sel = tmp2; 3142 alu.dst.chan = 0; 3143 alu.dst.write = 1; 3144 3145 alu.src[0].sel = V_SQ_ALU_SRC_0; 3146 3147 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3148 3149 alu.last = 1; 3150 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3151 return r; 3152 3153 /* tmp2.y = -src1 */ 3154 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3155 alu.op = ALU_OP2_SUB_INT; 3156 3157 alu.dst.sel = tmp2; 3158 alu.dst.chan = 1; 3159 alu.dst.write = 1; 3160 3161 alu.src[0].sel = V_SQ_ALU_SRC_0; 3162 3163 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3164 3165 alu.last = 1; 3166 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3167 return r; 3168 3169 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 3170 /* it will be a sign of the quotient */ 3171 if (!mod) { 3172 3173 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3174 alu.op = ALU_OP2_XOR_INT; 3175 3176 alu.dst.sel = tmp2; 3177 alu.dst.chan = 2; 3178 alu.dst.write = 1; 3179 3180 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3181 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3182 3183 alu.last = 1; 3184 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3185 return r; 3186 } 3187 3188 /* tmp2.x = |src0| */ 3189 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3190 alu.op = ALU_OP3_CNDGE_INT; 3191 alu.is_op3 = 1; 3192 3193 alu.dst.sel = tmp2; 3194 alu.dst.chan = 0; 3195 alu.dst.write = 1; 3196 3197 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3198 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3199 alu.src[2].sel = tmp2; 3200 alu.src[2].chan = 0; 3201 3202 alu.last = 1; 3203 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3204 return r; 3205 3206 /* tmp2.y = |src1| */ 3207 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3208 alu.op = ALU_OP3_CNDGE_INT; 3209 alu.is_op3 = 1; 3210 3211 alu.dst.sel = tmp2; 3212 alu.dst.chan = 1; 3213 alu.dst.write = 1; 3214 3215 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3216 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3217 alu.src[2].sel = tmp2; 3218 alu.src[2].chan = 1; 3219 3220 alu.last = 1; 3221 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3222 return r; 3223 3224 } 3225 3226 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 3227 if (ctx->bc->chip_class == CAYMAN) { 3228 /* tmp3.x = u2f(src2) */ 3229 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3230 alu.op = ALU_OP1_UINT_TO_FLT; 3231 3232 alu.dst.sel = tmp3; 3233 alu.dst.chan = 0; 3234 alu.dst.write = 1; 3235 3236 if (signed_op) { 3237 alu.src[0].sel = tmp2; 3238 alu.src[0].chan = 1; 3239 } else { 3240 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3241 } 3242 3243 alu.last = 1; 3244 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3245 return r; 3246 3247 /* tmp0.x = recip(tmp3.x) */ 3248 for (j = 0 ; j < 3; j++) { 3249 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3250 alu.op = ALU_OP1_RECIP_IEEE; 3251 3252 alu.dst.sel = tmp0; 3253 alu.dst.chan = j; 3254 alu.dst.write = (j == 0); 3255 3256 alu.src[0].sel = tmp3; 3257 alu.src[0].chan = 0; 3258 3259 if (j == 2) 3260 alu.last = 1; 3261 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3262 return r; 3263 } 3264 3265 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3266 alu.op = ALU_OP2_MUL; 3267 3268 alu.src[0].sel = tmp0; 3269 alu.src[0].chan = 0; 3270 3271 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 3272 alu.src[1].value = 0x4f800000; 3273 3274 alu.dst.sel = tmp3; 3275 alu.dst.write = 1; 3276 alu.last = 1; 3277 r = r600_bytecode_add_alu(ctx->bc, &alu); 3278 if (r) 3279 return r; 3280 3281 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3282 alu.op = ALU_OP1_FLT_TO_UINT; 3283 3284 alu.dst.sel = tmp0; 3285 alu.dst.chan = 0; 3286 alu.dst.write = 1; 3287 3288 alu.src[0].sel = tmp3; 3289 alu.src[0].chan = 0; 3290 3291 alu.last = 1; 3292 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3293 return r; 3294 3295 } else { 3296 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3297 alu.op = ALU_OP1_RECIP_UINT; 3298 3299 alu.dst.sel = tmp0; 3300 alu.dst.chan = 0; 3301 alu.dst.write = 1; 3302 3303 if (signed_op) { 3304 alu.src[0].sel = tmp2; 3305 alu.src[0].chan = 1; 3306 } else { 3307 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3308 } 3309 3310 alu.last = 1; 3311 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3312 return r; 3313 } 3314 3315 /* 2. tmp0.z = lo (tmp0.x * src2) */ 3316 if (ctx->bc->chip_class == CAYMAN) { 3317 for (j = 0 ; j < 4; j++) { 3318 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3319 alu.op = ALU_OP2_MULLO_UINT; 3320 3321 alu.dst.sel = tmp0; 3322 alu.dst.chan = j; 3323 alu.dst.write = (j == 2); 3324 3325 alu.src[0].sel = tmp0; 3326 alu.src[0].chan = 0; 3327 if (signed_op) { 3328 alu.src[1].sel = tmp2; 3329 alu.src[1].chan = 1; 3330 } else { 3331 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3332 } 3333 3334 alu.last = (j == 3); 3335 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3336 return r; 3337 } 3338 } else { 3339 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3340 alu.op = ALU_OP2_MULLO_UINT; 3341 3342 alu.dst.sel = tmp0; 3343 alu.dst.chan = 2; 3344 alu.dst.write = 1; 3345 3346 alu.src[0].sel = tmp0; 3347 alu.src[0].chan = 0; 3348 if (signed_op) { 3349 alu.src[1].sel = tmp2; 3350 alu.src[1].chan = 1; 3351 } else { 3352 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3353 } 3354 3355 alu.last = 1; 3356 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3357 return r; 3358 } 3359 3360 /* 3. tmp0.w = -tmp0.z */ 3361 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3362 alu.op = ALU_OP2_SUB_INT; 3363 3364 alu.dst.sel = tmp0; 3365 alu.dst.chan = 3; 3366 alu.dst.write = 1; 3367 3368 alu.src[0].sel = V_SQ_ALU_SRC_0; 3369 alu.src[1].sel = tmp0; 3370 alu.src[1].chan = 2; 3371 3372 alu.last = 1; 3373 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3374 return r; 3375 3376 /* 4. tmp0.y = hi (tmp0.x * src2) */ 3377 if (ctx->bc->chip_class == CAYMAN) { 3378 for (j = 0 ; j < 4; j++) { 3379 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3380 alu.op = ALU_OP2_MULHI_UINT; 3381 3382 alu.dst.sel = tmp0; 3383 alu.dst.chan = j; 3384 alu.dst.write = (j == 1); 3385 3386 alu.src[0].sel = tmp0; 3387 alu.src[0].chan = 0; 3388 3389 if (signed_op) { 3390 alu.src[1].sel = tmp2; 3391 alu.src[1].chan = 1; 3392 } else { 3393 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3394 } 3395 alu.last = (j == 3); 3396 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3397 return r; 3398 } 3399 } else { 3400 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3401 alu.op = ALU_OP2_MULHI_UINT; 3402 3403 alu.dst.sel = tmp0; 3404 alu.dst.chan = 1; 3405 alu.dst.write = 1; 3406 3407 alu.src[0].sel = tmp0; 3408 alu.src[0].chan = 0; 3409 3410 if (signed_op) { 3411 alu.src[1].sel = tmp2; 3412 alu.src[1].chan = 1; 3413 } else { 3414 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3415 } 3416 3417 alu.last = 1; 3418 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3419 return r; 3420 } 3421 3422 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 3423 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3424 alu.op = ALU_OP3_CNDE_INT; 3425 alu.is_op3 = 1; 3426 3427 alu.dst.sel = tmp0; 3428 alu.dst.chan = 2; 3429 alu.dst.write = 1; 3430 3431 alu.src[0].sel = tmp0; 3432 alu.src[0].chan = 1; 3433 alu.src[1].sel = tmp0; 3434 alu.src[1].chan = 3; 3435 alu.src[2].sel = tmp0; 3436 alu.src[2].chan = 2; 3437 3438 alu.last = 1; 3439 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3440 return r; 3441 3442 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 3443 if (ctx->bc->chip_class == CAYMAN) { 3444 for (j = 0 ; j < 4; j++) { 3445 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3446 alu.op = ALU_OP2_MULHI_UINT; 3447 3448 alu.dst.sel = tmp0; 3449 alu.dst.chan = j; 3450 alu.dst.write = (j == 3); 3451 3452 alu.src[0].sel = tmp0; 3453 alu.src[0].chan = 2; 3454 3455 alu.src[1].sel = tmp0; 3456 alu.src[1].chan = 0; 3457 3458 alu.last = (j == 3); 3459 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3460 return r; 3461 } 3462 } else { 3463 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3464 alu.op = ALU_OP2_MULHI_UINT; 3465 3466 alu.dst.sel = tmp0; 3467 alu.dst.chan = 3; 3468 alu.dst.write = 1; 3469 3470 alu.src[0].sel = tmp0; 3471 alu.src[0].chan = 2; 3472 3473 alu.src[1].sel = tmp0; 3474 alu.src[1].chan = 0; 3475 3476 alu.last = 1; 3477 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3478 return r; 3479 } 3480 3481 /* 7. tmp1.x = tmp0.x - tmp0.w */ 3482 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3483 alu.op = ALU_OP2_SUB_INT; 3484 3485 alu.dst.sel = tmp1; 3486 alu.dst.chan = 0; 3487 alu.dst.write = 1; 3488 3489 alu.src[0].sel = tmp0; 3490 alu.src[0].chan = 0; 3491 alu.src[1].sel = tmp0; 3492 alu.src[1].chan = 3; 3493 3494 alu.last = 1; 3495 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3496 return r; 3497 3498 /* 8. tmp1.y = tmp0.x + tmp0.w */ 3499 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3500 alu.op = ALU_OP2_ADD_INT; 3501 3502 alu.dst.sel = tmp1; 3503 alu.dst.chan = 1; 3504 alu.dst.write = 1; 3505 3506 alu.src[0].sel = tmp0; 3507 alu.src[0].chan = 0; 3508 alu.src[1].sel = tmp0; 3509 alu.src[1].chan = 3; 3510 3511 alu.last = 1; 3512 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3513 return r; 3514 3515 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 3516 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3517 alu.op = ALU_OP3_CNDE_INT; 3518 alu.is_op3 = 1; 3519 3520 alu.dst.sel = tmp0; 3521 alu.dst.chan = 0; 3522 alu.dst.write = 1; 3523 3524 alu.src[0].sel = tmp0; 3525 alu.src[0].chan = 1; 3526 alu.src[1].sel = tmp1; 3527 alu.src[1].chan = 1; 3528 alu.src[2].sel = tmp1; 3529 alu.src[2].chan = 0; 3530 3531 alu.last = 1; 3532 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3533 return r; 3534 3535 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 3536 if (ctx->bc->chip_class == CAYMAN) { 3537 for (j = 0 ; j < 4; j++) { 3538 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3539 alu.op = ALU_OP2_MULHI_UINT; 3540 3541 alu.dst.sel = tmp0; 3542 alu.dst.chan = j; 3543 alu.dst.write = (j == 2); 3544 3545 alu.src[0].sel = tmp0; 3546 alu.src[0].chan = 0; 3547 3548 if (signed_op) { 3549 alu.src[1].sel = tmp2; 3550 alu.src[1].chan = 0; 3551 } else { 3552 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3553 } 3554 3555 alu.last = (j == 3); 3556 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3557 return r; 3558 } 3559 } else { 3560 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3561 alu.op = ALU_OP2_MULHI_UINT; 3562 3563 alu.dst.sel = tmp0; 3564 alu.dst.chan = 2; 3565 alu.dst.write = 1; 3566 3567 alu.src[0].sel = tmp0; 3568 alu.src[0].chan = 0; 3569 3570 if (signed_op) { 3571 alu.src[1].sel = tmp2; 3572 alu.src[1].chan = 0; 3573 } else { 3574 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3575 } 3576 3577 alu.last = 1; 3578 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3579 return r; 3580 } 3581 3582 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 3583 if (ctx->bc->chip_class == CAYMAN) { 3584 for (j = 0 ; j < 4; j++) { 3585 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3586 alu.op = ALU_OP2_MULLO_UINT; 3587 3588 alu.dst.sel = tmp0; 3589 alu.dst.chan = j; 3590 alu.dst.write = (j == 1); 3591 3592 if (signed_op) { 3593 alu.src[0].sel = tmp2; 3594 alu.src[0].chan = 1; 3595 } else { 3596 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3597 } 3598 3599 alu.src[1].sel = tmp0; 3600 alu.src[1].chan = 2; 3601 3602 alu.last = (j == 3); 3603 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3604 return r; 3605 } 3606 } else { 3607 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3608 alu.op = ALU_OP2_MULLO_UINT; 3609 3610 alu.dst.sel = tmp0; 3611 alu.dst.chan = 1; 3612 alu.dst.write = 1; 3613 3614 if (signed_op) { 3615 alu.src[0].sel = tmp2; 3616 alu.src[0].chan = 1; 3617 } else { 3618 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3619 } 3620 3621 alu.src[1].sel = tmp0; 3622 alu.src[1].chan = 2; 3623 3624 alu.last = 1; 3625 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3626 return r; 3627 } 3628 3629 /* 12. tmp0.w = src1 - tmp0.y = r */ 3630 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3631 alu.op = ALU_OP2_SUB_INT; 3632 3633 alu.dst.sel = tmp0; 3634 alu.dst.chan = 3; 3635 alu.dst.write = 1; 3636 3637 if (signed_op) { 3638 alu.src[0].sel = tmp2; 3639 alu.src[0].chan = 0; 3640 } else { 3641 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3642 } 3643 3644 alu.src[1].sel = tmp0; 3645 alu.src[1].chan = 1; 3646 3647 alu.last = 1; 3648 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3649 return r; 3650 3651 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 3652 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3653 alu.op = ALU_OP2_SETGE_UINT; 3654 3655 alu.dst.sel = tmp1; 3656 alu.dst.chan = 0; 3657 alu.dst.write = 1; 3658 3659 alu.src[0].sel = tmp0; 3660 alu.src[0].chan = 3; 3661 if (signed_op) { 3662 alu.src[1].sel = tmp2; 3663 alu.src[1].chan = 1; 3664 } else { 3665 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3666 } 3667 3668 alu.last = 1; 3669 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3670 return r; 3671 3672 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 3673 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3674 alu.op = ALU_OP2_SETGE_UINT; 3675 3676 alu.dst.sel = tmp1; 3677 alu.dst.chan = 1; 3678 alu.dst.write = 1; 3679 3680 if (signed_op) { 3681 alu.src[0].sel = tmp2; 3682 alu.src[0].chan = 0; 3683 } else { 3684 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3685 } 3686 3687 alu.src[1].sel = tmp0; 3688 alu.src[1].chan = 1; 3689 3690 alu.last = 1; 3691 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3692 return r; 3693 3694 if (mod) { /* UMOD */ 3695 3696 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 3697 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3698 alu.op = ALU_OP2_SUB_INT; 3699 3700 alu.dst.sel = tmp1; 3701 alu.dst.chan = 2; 3702 alu.dst.write = 1; 3703 3704 alu.src[0].sel = tmp0; 3705 alu.src[0].chan = 3; 3706 3707 if (signed_op) { 3708 alu.src[1].sel = tmp2; 3709 alu.src[1].chan = 1; 3710 } else { 3711 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3712 } 3713 3714 alu.last = 1; 3715 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3716 return r; 3717 3718 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 3719 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3720 alu.op = ALU_OP2_ADD_INT; 3721 3722 alu.dst.sel = tmp1; 3723 alu.dst.chan = 3; 3724 alu.dst.write = 1; 3725 3726 alu.src[0].sel = tmp0; 3727 alu.src[0].chan = 3; 3728 if (signed_op) { 3729 alu.src[1].sel = tmp2; 3730 alu.src[1].chan = 1; 3731 } else { 3732 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3733 } 3734 3735 alu.last = 1; 3736 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3737 return r; 3738 3739 } else { /* UDIV */ 3740 3741 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 3742 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3743 alu.op = ALU_OP2_ADD_INT; 3744 3745 alu.dst.sel = tmp1; 3746 alu.dst.chan = 2; 3747 alu.dst.write = 1; 3748 3749 alu.src[0].sel = tmp0; 3750 alu.src[0].chan = 2; 3751 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 3752 3753 alu.last = 1; 3754 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3755 return r; 3756 3757 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 3758 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3759 alu.op = ALU_OP2_ADD_INT; 3760 3761 alu.dst.sel = tmp1; 3762 alu.dst.chan = 3; 3763 alu.dst.write = 1; 3764 3765 alu.src[0].sel = tmp0; 3766 alu.src[0].chan = 2; 3767 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 3768 3769 alu.last = 1; 3770 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3771 return r; 3772 3773 } 3774 3775 /* 17. tmp1.x = tmp1.x & tmp1.y */ 3776 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3777 alu.op = ALU_OP2_AND_INT; 3778 3779 alu.dst.sel = tmp1; 3780 alu.dst.chan = 0; 3781 alu.dst.write = 1; 3782 3783 alu.src[0].sel = tmp1; 3784 alu.src[0].chan = 0; 3785 alu.src[1].sel = tmp1; 3786 alu.src[1].chan = 1; 3787 3788 alu.last = 1; 3789 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3790 return r; 3791 3792 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 3793 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 3794 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3795 alu.op = ALU_OP3_CNDE_INT; 3796 alu.is_op3 = 1; 3797 3798 alu.dst.sel = tmp0; 3799 alu.dst.chan = 2; 3800 alu.dst.write = 1; 3801 3802 alu.src[0].sel = tmp1; 3803 alu.src[0].chan = 0; 3804 alu.src[1].sel = tmp0; 3805 alu.src[1].chan = mod ? 3 : 2; 3806 alu.src[2].sel = tmp1; 3807 alu.src[2].chan = 2; 3808 3809 alu.last = 1; 3810 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3811 return r; 3812 3813 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 3814 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3815 alu.op = ALU_OP3_CNDE_INT; 3816 alu.is_op3 = 1; 3817 3818 if (signed_op) { 3819 alu.dst.sel = tmp0; 3820 alu.dst.chan = 2; 3821 alu.dst.write = 1; 3822 } else { 3823 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3824 } 3825 3826 alu.src[0].sel = tmp1; 3827 alu.src[0].chan = 1; 3828 alu.src[1].sel = tmp1; 3829 alu.src[1].chan = 3; 3830 alu.src[2].sel = tmp0; 3831 alu.src[2].chan = 2; 3832 3833 alu.last = 1; 3834 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3835 return r; 3836 3837 if (signed_op) { 3838 3839 /* fix the sign of the result */ 3840 3841 if (mod) { 3842 3843 /* tmp0.x = -tmp0.z */ 3844 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3845 alu.op = ALU_OP2_SUB_INT; 3846 3847 alu.dst.sel = tmp0; 3848 alu.dst.chan = 0; 3849 alu.dst.write = 1; 3850 3851 alu.src[0].sel = V_SQ_ALU_SRC_0; 3852 alu.src[1].sel = tmp0; 3853 alu.src[1].chan = 2; 3854 3855 alu.last = 1; 3856 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3857 return r; 3858 3859 /* sign of the remainder is the same as the sign of src0 */ 3860 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 3861 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3862 alu.op = ALU_OP3_CNDGE_INT; 3863 alu.is_op3 = 1; 3864 3865 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3866 3867 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3868 alu.src[1].sel = tmp0; 3869 alu.src[1].chan = 2; 3870 alu.src[2].sel = tmp0; 3871 alu.src[2].chan = 0; 3872 3873 alu.last = 1; 3874 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3875 return r; 3876 3877 } else { 3878 3879 /* tmp0.x = -tmp0.z */ 3880 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3881 alu.op = ALU_OP2_SUB_INT; 3882 3883 alu.dst.sel = tmp0; 3884 alu.dst.chan = 0; 3885 alu.dst.write = 1; 3886 3887 alu.src[0].sel = V_SQ_ALU_SRC_0; 3888 alu.src[1].sel = tmp0; 3889 alu.src[1].chan = 2; 3890 3891 alu.last = 1; 3892 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3893 return r; 3894 3895 /* fix the quotient sign (same as the sign of src0*src1) */ 3896 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 3897 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3898 alu.op = ALU_OP3_CNDGE_INT; 3899 alu.is_op3 = 1; 3900 3901 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3902 3903 alu.src[0].sel = tmp2; 3904 alu.src[0].chan = 2; 3905 alu.src[1].sel = tmp0; 3906 alu.src[1].chan = 2; 3907 alu.src[2].sel = tmp0; 3908 alu.src[2].chan = 0; 3909 3910 alu.last = 1; 3911 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3912 return r; 3913 } 3914 } 3915 } 3916 return 0; 3917} 3918 3919static int tgsi_udiv(struct r600_shader_ctx *ctx) 3920{ 3921 return tgsi_divmod(ctx, 0, 0); 3922} 3923 3924static int tgsi_umod(struct r600_shader_ctx *ctx) 3925{ 3926 return tgsi_divmod(ctx, 1, 0); 3927} 3928 3929static int tgsi_idiv(struct r600_shader_ctx *ctx) 3930{ 3931 return tgsi_divmod(ctx, 0, 1); 3932} 3933 3934static int tgsi_imod(struct r600_shader_ctx *ctx) 3935{ 3936 return tgsi_divmod(ctx, 1, 1); 3937} 3938 3939 3940static int tgsi_f2i(struct r600_shader_ctx *ctx) 3941{ 3942 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3943 struct r600_bytecode_alu alu; 3944 int i, r; 3945 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3946 int last_inst = tgsi_last_instruction(write_mask); 3947 3948 for (i = 0; i < 4; i++) { 3949 if (!(write_mask & (1<<i))) 3950 continue; 3951 3952 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3953 alu.op = ALU_OP1_TRUNC; 3954 3955 alu.dst.sel = ctx->temp_reg; 3956 alu.dst.chan = i; 3957 alu.dst.write = 1; 3958 3959 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3960 if (i == last_inst) 3961 alu.last = 1; 3962 r = r600_bytecode_add_alu(ctx->bc, &alu); 3963 if (r) 3964 return r; 3965 } 3966 3967 for (i = 0; i < 4; i++) { 3968 if (!(write_mask & (1<<i))) 3969 continue; 3970 3971 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3972 alu.op = ctx->inst_info->op; 3973 3974 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3975 3976 alu.src[0].sel = ctx->temp_reg; 3977 alu.src[0].chan = i; 3978 3979 if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT) 3980 alu.last = 1; 3981 r = r600_bytecode_add_alu(ctx->bc, &alu); 3982 if (r) 3983 return r; 3984 } 3985 3986 return 0; 3987} 3988 3989static int tgsi_iabs(struct r600_shader_ctx *ctx) 3990{ 3991 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3992 struct r600_bytecode_alu alu; 3993 int i, r; 3994 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3995 int last_inst = tgsi_last_instruction(write_mask); 3996 3997 /* tmp = -src */ 3998 for (i = 0; i < 4; i++) { 3999 if (!(write_mask & (1<<i))) 4000 continue; 4001 4002 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4003 alu.op = ALU_OP2_SUB_INT; 4004 4005 alu.dst.sel = ctx->temp_reg; 4006 alu.dst.chan = i; 4007 alu.dst.write = 1; 4008 4009 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4010 alu.src[0].sel = V_SQ_ALU_SRC_0; 4011 4012 if (i == last_inst) 4013 alu.last = 1; 4014 r = r600_bytecode_add_alu(ctx->bc, &alu); 4015 if (r) 4016 return r; 4017 } 4018 4019 /* dst = (src >= 0 ? src : tmp) */ 4020 for (i = 0; i < 4; i++) { 4021 if (!(write_mask & (1<<i))) 4022 continue; 4023 4024 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4025 alu.op = ALU_OP3_CNDGE_INT; 4026 alu.is_op3 = 1; 4027 alu.dst.write = 1; 4028 4029 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4030 4031 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4032 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4033 alu.src[2].sel = ctx->temp_reg; 4034 alu.src[2].chan = i; 4035 4036 if (i == last_inst) 4037 alu.last = 1; 4038 r = r600_bytecode_add_alu(ctx->bc, &alu); 4039 if (r) 4040 return r; 4041 } 4042 return 0; 4043} 4044 4045static int tgsi_issg(struct r600_shader_ctx *ctx) 4046{ 4047 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4048 struct r600_bytecode_alu alu; 4049 int i, r; 4050 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4051 int last_inst = tgsi_last_instruction(write_mask); 4052 4053 /* tmp = (src >= 0 ? src : -1) */ 4054 for (i = 0; i < 4; i++) { 4055 if (!(write_mask & (1<<i))) 4056 continue; 4057 4058 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4059 alu.op = ALU_OP3_CNDGE_INT; 4060 alu.is_op3 = 1; 4061 4062 alu.dst.sel = ctx->temp_reg; 4063 alu.dst.chan = i; 4064 alu.dst.write = 1; 4065 4066 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4067 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4068 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 4069 4070 if (i == last_inst) 4071 alu.last = 1; 4072 r = r600_bytecode_add_alu(ctx->bc, &alu); 4073 if (r) 4074 return r; 4075 } 4076 4077 /* dst = (tmp > 0 ? 1 : tmp) */ 4078 for (i = 0; i < 4; i++) { 4079 if (!(write_mask & (1<<i))) 4080 continue; 4081 4082 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4083 alu.op = ALU_OP3_CNDGT_INT; 4084 alu.is_op3 = 1; 4085 alu.dst.write = 1; 4086 4087 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4088 4089 alu.src[0].sel = ctx->temp_reg; 4090 alu.src[0].chan = i; 4091 4092 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 4093 4094 alu.src[2].sel = ctx->temp_reg; 4095 alu.src[2].chan = i; 4096 4097 if (i == last_inst) 4098 alu.last = 1; 4099 r = r600_bytecode_add_alu(ctx->bc, &alu); 4100 if (r) 4101 return r; 4102 } 4103 return 0; 4104} 4105 4106 4107 4108static int tgsi_ssg(struct r600_shader_ctx *ctx) 4109{ 4110 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4111 struct r600_bytecode_alu alu; 4112 int i, r; 4113 4114 /* tmp = (src > 0 ? 1 : src) */ 4115 for (i = 0; i < 4; i++) { 4116 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4117 alu.op = ALU_OP3_CNDGT; 4118 alu.is_op3 = 1; 4119 4120 alu.dst.sel = ctx->temp_reg; 4121 alu.dst.chan = i; 4122 4123 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4124 alu.src[1].sel = V_SQ_ALU_SRC_1; 4125 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 4126 4127 if (i == 3) 4128 alu.last = 1; 4129 r = r600_bytecode_add_alu(ctx->bc, &alu); 4130 if (r) 4131 return r; 4132 } 4133 4134 /* dst = (-tmp > 0 ? -1 : tmp) */ 4135 for (i = 0; i < 4; i++) { 4136 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4137 alu.op = ALU_OP3_CNDGT; 4138 alu.is_op3 = 1; 4139 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4140 4141 alu.src[0].sel = ctx->temp_reg; 4142 alu.src[0].chan = i; 4143 alu.src[0].neg = 1; 4144 4145 alu.src[1].sel = V_SQ_ALU_SRC_1; 4146 alu.src[1].neg = 1; 4147 4148 alu.src[2].sel = ctx->temp_reg; 4149 alu.src[2].chan = i; 4150 4151 if (i == 3) 4152 alu.last = 1; 4153 r = r600_bytecode_add_alu(ctx->bc, &alu); 4154 if (r) 4155 return r; 4156 } 4157 return 0; 4158} 4159 4160static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 4161{ 4162 struct r600_bytecode_alu alu; 4163 int i, r; 4164 4165 for (i = 0; i < 4; i++) { 4166 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4167 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 4168 alu.op = ALU_OP0_NOP; 4169 alu.dst.chan = i; 4170 } else { 4171 alu.op = ALU_OP1_MOV; 4172 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4173 alu.src[0].sel = ctx->temp_reg; 4174 alu.src[0].chan = i; 4175 } 4176 if (i == 3) { 4177 alu.last = 1; 4178 } 4179 r = r600_bytecode_add_alu(ctx->bc, &alu); 4180 if (r) 4181 return r; 4182 } 4183 return 0; 4184} 4185 4186static int tgsi_op3(struct r600_shader_ctx *ctx) 4187{ 4188 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4189 struct r600_bytecode_alu alu; 4190 int i, j, r; 4191 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4192 4193 for (i = 0; i < lasti + 1; i++) { 4194 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4195 continue; 4196 4197 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4198 alu.op = ctx->inst_info->op; 4199 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4200 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 4201 } 4202 4203 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4204 alu.dst.chan = i; 4205 alu.dst.write = 1; 4206 alu.is_op3 = 1; 4207 if (i == lasti) { 4208 alu.last = 1; 4209 } 4210 r = r600_bytecode_add_alu(ctx->bc, &alu); 4211 if (r) 4212 return r; 4213 } 4214 return 0; 4215} 4216 4217static int tgsi_dp(struct r600_shader_ctx *ctx) 4218{ 4219 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4220 struct r600_bytecode_alu alu; 4221 int i, j, r; 4222 4223 for (i = 0; i < 4; i++) { 4224 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4225 alu.op = ctx->inst_info->op; 4226 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4227 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 4228 } 4229 4230 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4231 alu.dst.chan = i; 4232 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4233 /* handle some special cases */ 4234 switch (ctx->inst_info->tgsi_opcode) { 4235 case TGSI_OPCODE_DP2: 4236 if (i > 1) { 4237 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 4238 alu.src[0].chan = alu.src[1].chan = 0; 4239 } 4240 break; 4241 case TGSI_OPCODE_DP3: 4242 if (i > 2) { 4243 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 4244 alu.src[0].chan = alu.src[1].chan = 0; 4245 } 4246 break; 4247 case TGSI_OPCODE_DPH: 4248 if (i == 3) { 4249 alu.src[0].sel = V_SQ_ALU_SRC_1; 4250 alu.src[0].chan = 0; 4251 alu.src[0].neg = 0; 4252 } 4253 break; 4254 default: 4255 break; 4256 } 4257 if (i == 3) { 4258 alu.last = 1; 4259 } 4260 r = r600_bytecode_add_alu(ctx->bc, &alu); 4261 if (r) 4262 return r; 4263 } 4264 return 0; 4265} 4266 4267static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 4268 unsigned index) 4269{ 4270 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4271 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 4272 inst->Src[index].Register.File != TGSI_FILE_INPUT && 4273 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 4274 ctx->src[index].neg || ctx->src[index].abs; 4275} 4276 4277static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 4278 unsigned index) 4279{ 4280 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4281 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 4282} 4283 4284static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) 4285{ 4286 struct r600_bytecode_vtx vtx; 4287 struct r600_bytecode_alu alu; 4288 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4289 int src_gpr, r, i; 4290 int id = tgsi_tex_get_src_gpr(ctx, 1); 4291 4292 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 4293 if (src_requires_loading) { 4294 for (i = 0; i < 4; i++) { 4295 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4296 alu.op = ALU_OP1_MOV; 4297 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4298 alu.dst.sel = ctx->temp_reg; 4299 alu.dst.chan = i; 4300 if (i == 3) 4301 alu.last = 1; 4302 alu.dst.write = 1; 4303 r = r600_bytecode_add_alu(ctx->bc, &alu); 4304 if (r) 4305 return r; 4306 } 4307 src_gpr = ctx->temp_reg; 4308 } 4309 4310 memset(&vtx, 0, sizeof(vtx)); 4311 vtx.op = FETCH_OP_VFETCH; 4312 vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; 4313 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 4314 vtx.src_gpr = src_gpr; 4315 vtx.mega_fetch_count = 16; 4316 vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 4317 vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 4318 vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 4319 vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 4320 vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 4321 vtx.use_const_fields = 1; 4322 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 4323 4324 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 4325 return r; 4326 4327 if (ctx->bc->chip_class >= EVERGREEN) 4328 return 0; 4329 4330 for (i = 0; i < 4; i++) { 4331 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4332 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4333 continue; 4334 4335 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4336 alu.op = ALU_OP2_AND_INT; 4337 4338 alu.dst.chan = i; 4339 alu.dst.sel = vtx.dst_gpr; 4340 alu.dst.write = 1; 4341 4342 alu.src[0].sel = vtx.dst_gpr; 4343 alu.src[0].chan = i; 4344 4345 alu.src[1].sel = 512 + (id * 2); 4346 alu.src[1].chan = i % 4; 4347 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 4348 4349 if (i == lasti) 4350 alu.last = 1; 4351 r = r600_bytecode_add_alu(ctx->bc, &alu); 4352 if (r) 4353 return r; 4354 } 4355 4356 if (inst->Dst[0].Register.WriteMask & 3) { 4357 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4358 alu.op = ALU_OP2_OR_INT; 4359 4360 alu.dst.chan = 3; 4361 alu.dst.sel = vtx.dst_gpr; 4362 alu.dst.write = 1; 4363 4364 alu.src[0].sel = vtx.dst_gpr; 4365 alu.src[0].chan = 3; 4366 4367 alu.src[1].sel = 512 + (id * 2) + 1; 4368 alu.src[1].chan = 0; 4369 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 4370 4371 alu.last = 1; 4372 r = r600_bytecode_add_alu(ctx->bc, &alu); 4373 if (r) 4374 return r; 4375 } 4376 return 0; 4377} 4378 4379static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) 4380{ 4381 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4382 struct r600_bytecode_alu alu; 4383 int r; 4384 int id = tgsi_tex_get_src_gpr(ctx, 1); 4385 4386 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4387 alu.op = ALU_OP1_MOV; 4388 4389 if (ctx->bc->chip_class >= EVERGREEN) { 4390 alu.src[0].sel = 512 + (id / 4); 4391 alu.src[0].chan = id % 4; 4392 } else { 4393 /* r600 we have them at channel 2 of the second dword */ 4394 alu.src[0].sel = 512 + (id * 2) + 1; 4395 alu.src[0].chan = 1; 4396 } 4397 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 4398 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4399 alu.last = 1; 4400 r = r600_bytecode_add_alu(ctx->bc, &alu); 4401 if (r) 4402 return r; 4403 return 0; 4404} 4405 4406static int tgsi_tex(struct r600_shader_ctx *ctx) 4407{ 4408 static float one_point_five = 1.5f; 4409 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4410 struct r600_bytecode_tex tex; 4411 struct r600_bytecode_alu alu; 4412 unsigned src_gpr; 4413 int r, i, j; 4414 int opcode; 4415 bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing && 4416 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 4417 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || 4418 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); 4419 4420 bool txf_add_offsets = inst->Texture.NumOffsets && 4421 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 4422 inst->Texture.Texture != TGSI_TEXTURE_BUFFER; 4423 4424 /* Texture fetch instructions can only use gprs as source. 4425 * Also they cannot negate the source or take the absolute value */ 4426 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && 4427 tgsi_tex_src_requires_loading(ctx, 0)) || 4428 read_compressed_msaa || txf_add_offsets; 4429 4430 boolean src_loaded = FALSE; 4431 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; 4432 int8_t offset_x = 0, offset_y = 0, offset_z = 0; 4433 boolean has_txq_cube_array_z = false; 4434 4435 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && 4436 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 4437 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) 4438 if (inst->Dst[0].Register.WriteMask & 4) { 4439 ctx->shader->has_txq_cube_array_z_comp = true; 4440 has_txq_cube_array_z = true; 4441 } 4442 4443 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 4444 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 4445 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 4446 sampler_src_reg = 2; 4447 4448 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 4449 4450 if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { 4451 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { 4452 ctx->shader->uses_tex_buffers = true; 4453 return r600_do_buffer_txq(ctx); 4454 } 4455 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 4456 if (ctx->bc->chip_class < EVERGREEN) 4457 ctx->shader->uses_tex_buffers = true; 4458 return do_vtx_fetch_inst(ctx, src_requires_loading); 4459 } 4460 } 4461 4462 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 4463 /* TGSI moves the sampler to src reg 3 for TXD */ 4464 sampler_src_reg = 3; 4465 4466 for (i = 1; i < 3; i++) { 4467 /* set gradients h/v */ 4468 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 4469 tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H : 4470 FETCH_OP_SET_GRADIENTS_V; 4471 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 4472 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 4473 4474 if (tgsi_tex_src_requires_loading(ctx, i)) { 4475 tex.src_gpr = r600_get_temp(ctx); 4476 tex.src_sel_x = 0; 4477 tex.src_sel_y = 1; 4478 tex.src_sel_z = 2; 4479 tex.src_sel_w = 3; 4480 4481 for (j = 0; j < 4; j++) { 4482 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4483 alu.op = ALU_OP1_MOV; 4484 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 4485 alu.dst.sel = tex.src_gpr; 4486 alu.dst.chan = j; 4487 if (j == 3) 4488 alu.last = 1; 4489 alu.dst.write = 1; 4490 r = r600_bytecode_add_alu(ctx->bc, &alu); 4491 if (r) 4492 return r; 4493 } 4494 4495 } else { 4496 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); 4497 tex.src_sel_x = ctx->src[i].swizzle[0]; 4498 tex.src_sel_y = ctx->src[i].swizzle[1]; 4499 tex.src_sel_z = ctx->src[i].swizzle[2]; 4500 tex.src_sel_w = ctx->src[i].swizzle[3]; 4501 tex.src_rel = ctx->src[i].rel; 4502 } 4503 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ 4504 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 4505 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 4506 tex.coord_type_x = 1; 4507 tex.coord_type_y = 1; 4508 tex.coord_type_z = 1; 4509 tex.coord_type_w = 1; 4510 } 4511 r = r600_bytecode_add_tex(ctx->bc, &tex); 4512 if (r) 4513 return r; 4514 } 4515 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 4516 int out_chan; 4517 /* Add perspective divide */ 4518 if (ctx->bc->chip_class == CAYMAN) { 4519 out_chan = 2; 4520 for (i = 0; i < 3; i++) { 4521 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4522 alu.op = ALU_OP1_RECIP_IEEE; 4523 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 4524 4525 alu.dst.sel = ctx->temp_reg; 4526 alu.dst.chan = i; 4527 if (i == 2) 4528 alu.last = 1; 4529 if (out_chan == i) 4530 alu.dst.write = 1; 4531 r = r600_bytecode_add_alu(ctx->bc, &alu); 4532 if (r) 4533 return r; 4534 } 4535 4536 } else { 4537 out_chan = 3; 4538 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4539 alu.op = ALU_OP1_RECIP_IEEE; 4540 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 4541 4542 alu.dst.sel = ctx->temp_reg; 4543 alu.dst.chan = out_chan; 4544 alu.last = 1; 4545 alu.dst.write = 1; 4546 r = r600_bytecode_add_alu(ctx->bc, &alu); 4547 if (r) 4548 return r; 4549 } 4550 4551 for (i = 0; i < 3; i++) { 4552 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4553 alu.op = ALU_OP2_MUL; 4554 alu.src[0].sel = ctx->temp_reg; 4555 alu.src[0].chan = out_chan; 4556 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4557 alu.dst.sel = ctx->temp_reg; 4558 alu.dst.chan = i; 4559 alu.dst.write = 1; 4560 r = r600_bytecode_add_alu(ctx->bc, &alu); 4561 if (r) 4562 return r; 4563 } 4564 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4565 alu.op = ALU_OP1_MOV; 4566 alu.src[0].sel = V_SQ_ALU_SRC_1; 4567 alu.src[0].chan = 0; 4568 alu.dst.sel = ctx->temp_reg; 4569 alu.dst.chan = 3; 4570 alu.last = 1; 4571 alu.dst.write = 1; 4572 r = r600_bytecode_add_alu(ctx->bc, &alu); 4573 if (r) 4574 return r; 4575 src_loaded = TRUE; 4576 src_gpr = ctx->temp_reg; 4577 } 4578 4579 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 4580 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 4581 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 4582 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 4583 inst->Instruction.Opcode != TGSI_OPCODE_TXQ && 4584 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { 4585 4586 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 4587 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 4588 4589 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 4590 for (i = 0; i < 4; i++) { 4591 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4592 alu.op = ALU_OP2_CUBE; 4593 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 4594 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 4595 alu.dst.sel = ctx->temp_reg; 4596 alu.dst.chan = i; 4597 if (i == 3) 4598 alu.last = 1; 4599 alu.dst.write = 1; 4600 r = r600_bytecode_add_alu(ctx->bc, &alu); 4601 if (r) 4602 return r; 4603 } 4604 4605 /* tmp1.z = RCP_e(|tmp1.z|) */ 4606 if (ctx->bc->chip_class == CAYMAN) { 4607 for (i = 0; i < 3; i++) { 4608 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4609 alu.op = ALU_OP1_RECIP_IEEE; 4610 alu.src[0].sel = ctx->temp_reg; 4611 alu.src[0].chan = 2; 4612 alu.src[0].abs = 1; 4613 alu.dst.sel = ctx->temp_reg; 4614 alu.dst.chan = i; 4615 if (i == 2) 4616 alu.dst.write = 1; 4617 if (i == 2) 4618 alu.last = 1; 4619 r = r600_bytecode_add_alu(ctx->bc, &alu); 4620 if (r) 4621 return r; 4622 } 4623 } else { 4624 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4625 alu.op = ALU_OP1_RECIP_IEEE; 4626 alu.src[0].sel = ctx->temp_reg; 4627 alu.src[0].chan = 2; 4628 alu.src[0].abs = 1; 4629 alu.dst.sel = ctx->temp_reg; 4630 alu.dst.chan = 2; 4631 alu.dst.write = 1; 4632 alu.last = 1; 4633 r = r600_bytecode_add_alu(ctx->bc, &alu); 4634 if (r) 4635 return r; 4636 } 4637 4638 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 4639 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 4640 * muladd has no writemask, have to use another temp 4641 */ 4642 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4643 alu.op = ALU_OP3_MULADD; 4644 alu.is_op3 = 1; 4645 4646 alu.src[0].sel = ctx->temp_reg; 4647 alu.src[0].chan = 0; 4648 alu.src[1].sel = ctx->temp_reg; 4649 alu.src[1].chan = 2; 4650 4651 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 4652 alu.src[2].chan = 0; 4653 alu.src[2].value = *(uint32_t *)&one_point_five; 4654 4655 alu.dst.sel = ctx->temp_reg; 4656 alu.dst.chan = 0; 4657 alu.dst.write = 1; 4658 4659 r = r600_bytecode_add_alu(ctx->bc, &alu); 4660 if (r) 4661 return r; 4662 4663 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4664 alu.op = ALU_OP3_MULADD; 4665 alu.is_op3 = 1; 4666 4667 alu.src[0].sel = ctx->temp_reg; 4668 alu.src[0].chan = 1; 4669 alu.src[1].sel = ctx->temp_reg; 4670 alu.src[1].chan = 2; 4671 4672 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 4673 alu.src[2].chan = 0; 4674 alu.src[2].value = *(uint32_t *)&one_point_five; 4675 4676 alu.dst.sel = ctx->temp_reg; 4677 alu.dst.chan = 1; 4678 alu.dst.write = 1; 4679 4680 alu.last = 1; 4681 r = r600_bytecode_add_alu(ctx->bc, &alu); 4682 if (r) 4683 return r; 4684 /* write initial compare value into Z component 4685 - W src 0 for shadow cube 4686 - X src 1 for shadow cube array */ 4687 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 4688 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 4689 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4690 alu.op = ALU_OP1_MOV; 4691 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) 4692 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 4693 else 4694 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 4695 alu.dst.sel = ctx->temp_reg; 4696 alu.dst.chan = 2; 4697 alu.dst.write = 1; 4698 alu.last = 1; 4699 r = r600_bytecode_add_alu(ctx->bc, &alu); 4700 if (r) 4701 return r; 4702 } 4703 4704 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 4705 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 4706 if (ctx->bc->chip_class >= EVERGREEN) { 4707 int mytmp = r600_get_temp(ctx); 4708 static const float eight = 8.0f; 4709 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4710 alu.op = ALU_OP1_MOV; 4711 alu.src[0].sel = ctx->temp_reg; 4712 alu.src[0].chan = 3; 4713 alu.dst.sel = mytmp; 4714 alu.dst.chan = 0; 4715 alu.dst.write = 1; 4716 alu.last = 1; 4717 r = r600_bytecode_add_alu(ctx->bc, &alu); 4718 if (r) 4719 return r; 4720 4721 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ 4722 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4723 alu.op = ALU_OP3_MULADD; 4724 alu.is_op3 = 1; 4725 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 4726 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4727 alu.src[1].chan = 0; 4728 alu.src[1].value = *(uint32_t *)&eight; 4729 alu.src[2].sel = mytmp; 4730 alu.src[2].chan = 0; 4731 alu.dst.sel = ctx->temp_reg; 4732 alu.dst.chan = 3; 4733 alu.dst.write = 1; 4734 alu.last = 1; 4735 r = r600_bytecode_add_alu(ctx->bc, &alu); 4736 if (r) 4737 return r; 4738 } else if (ctx->bc->chip_class < EVERGREEN) { 4739 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 4740 tex.op = FETCH_OP_SET_CUBEMAP_INDEX; 4741 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 4742 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 4743 tex.src_gpr = r600_get_temp(ctx); 4744 tex.src_sel_x = 0; 4745 tex.src_sel_y = 0; 4746 tex.src_sel_z = 0; 4747 tex.src_sel_w = 0; 4748 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 4749 tex.coord_type_x = 1; 4750 tex.coord_type_y = 1; 4751 tex.coord_type_z = 1; 4752 tex.coord_type_w = 1; 4753 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4754 alu.op = ALU_OP1_MOV; 4755 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 4756 alu.dst.sel = tex.src_gpr; 4757 alu.dst.chan = 0; 4758 alu.last = 1; 4759 alu.dst.write = 1; 4760 r = r600_bytecode_add_alu(ctx->bc, &alu); 4761 if (r) 4762 return r; 4763 4764 r = r600_bytecode_add_tex(ctx->bc, &tex); 4765 if (r) 4766 return r; 4767 } 4768 4769 } 4770 4771 /* for cube forms of lod and bias we need to route things */ 4772 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || 4773 inst->Instruction.Opcode == TGSI_OPCODE_TXL || 4774 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 4775 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { 4776 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4777 alu.op = ALU_OP1_MOV; 4778 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 4779 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 4780 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 4781 else 4782 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 4783 alu.dst.sel = ctx->temp_reg; 4784 alu.dst.chan = 2; 4785 alu.last = 1; 4786 alu.dst.write = 1; 4787 r = r600_bytecode_add_alu(ctx->bc, &alu); 4788 if (r) 4789 return r; 4790 } 4791 4792 src_loaded = TRUE; 4793 src_gpr = ctx->temp_reg; 4794 } 4795 4796 if (src_requires_loading && !src_loaded) { 4797 for (i = 0; i < 4; i++) { 4798 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4799 alu.op = ALU_OP1_MOV; 4800 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4801 alu.dst.sel = ctx->temp_reg; 4802 alu.dst.chan = i; 4803 if (i == 3) 4804 alu.last = 1; 4805 alu.dst.write = 1; 4806 r = r600_bytecode_add_alu(ctx->bc, &alu); 4807 if (r) 4808 return r; 4809 } 4810 src_loaded = TRUE; 4811 src_gpr = ctx->temp_reg; 4812 } 4813 4814 /* get offset values */ 4815 if (inst->Texture.NumOffsets) { 4816 assert(inst->Texture.NumOffsets == 1); 4817 4818 /* The texture offset feature doesn't work with the TXF instruction 4819 * and must be emulated by adding the offset to the texture coordinates. */ 4820 if (txf_add_offsets) { 4821 const struct tgsi_texture_offset *off = inst->TexOffsets; 4822 4823 switch (inst->Texture.Texture) { 4824 case TGSI_TEXTURE_3D: 4825 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4826 alu.op = ALU_OP2_ADD_INT; 4827 alu.src[0].sel = src_gpr; 4828 alu.src[0].chan = 2; 4829 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4830 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ]; 4831 alu.dst.sel = src_gpr; 4832 alu.dst.chan = 2; 4833 alu.dst.write = 1; 4834 alu.last = 1; 4835 r = r600_bytecode_add_alu(ctx->bc, &alu); 4836 if (r) 4837 return r; 4838 /* fall through */ 4839 4840 case TGSI_TEXTURE_2D: 4841 case TGSI_TEXTURE_SHADOW2D: 4842 case TGSI_TEXTURE_RECT: 4843 case TGSI_TEXTURE_SHADOWRECT: 4844 case TGSI_TEXTURE_2D_ARRAY: 4845 case TGSI_TEXTURE_SHADOW2D_ARRAY: 4846 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4847 alu.op = ALU_OP2_ADD_INT; 4848 alu.src[0].sel = src_gpr; 4849 alu.src[0].chan = 1; 4850 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4851 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY]; 4852 alu.dst.sel = src_gpr; 4853 alu.dst.chan = 1; 4854 alu.dst.write = 1; 4855 alu.last = 1; 4856 r = r600_bytecode_add_alu(ctx->bc, &alu); 4857 if (r) 4858 return r; 4859 /* fall through */ 4860 4861 case TGSI_TEXTURE_1D: 4862 case TGSI_TEXTURE_SHADOW1D: 4863 case TGSI_TEXTURE_1D_ARRAY: 4864 case TGSI_TEXTURE_SHADOW1D_ARRAY: 4865 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4866 alu.op = ALU_OP2_ADD_INT; 4867 alu.src[0].sel = src_gpr; 4868 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4869 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX]; 4870 alu.dst.sel = src_gpr; 4871 alu.dst.write = 1; 4872 alu.last = 1; 4873 r = r600_bytecode_add_alu(ctx->bc, &alu); 4874 if (r) 4875 return r; 4876 break; 4877 /* texture offsets do not apply to other texture targets */ 4878 } 4879 } else { 4880 offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 4881 offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 4882 offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 4883 } 4884 } 4885 4886 /* Obtain the sample index for reading a compressed MSAA color texture. 4887 * To read the FMASK, we use the ldfptr instruction, which tells us 4888 * where the samples are stored. 4889 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, 4890 * which is the identity mapping. Each nibble says which physical sample 4891 * should be fetched to get that sample. 4892 * 4893 * Assume src.z contains the sample index. It should be modified like this: 4894 * src.z = (ldfptr() >> (src.z * 4)) & 0xF; 4895 * Then fetch the texel with src. 4896 */ 4897 if (read_compressed_msaa) { 4898 unsigned sample_chan = 3; 4899 unsigned temp = r600_get_temp(ctx); 4900 assert(src_loaded); 4901 4902 /* temp.w = ldfptr() */ 4903 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 4904 tex.op = FETCH_OP_LD; 4905 tex.inst_mod = 1; /* to indicate this is ldfptr */ 4906 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 4907 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 4908 tex.src_gpr = src_gpr; 4909 tex.dst_gpr = temp; 4910 tex.dst_sel_x = 7; /* mask out these components */ 4911 tex.dst_sel_y = 7; 4912 tex.dst_sel_z = 7; 4913 tex.dst_sel_w = 0; /* store X */ 4914 tex.src_sel_x = 0; 4915 tex.src_sel_y = 1; 4916 tex.src_sel_z = 2; 4917 tex.src_sel_w = 3; 4918 tex.offset_x = offset_x; 4919 tex.offset_y = offset_y; 4920 tex.offset_z = offset_z; 4921 r = r600_bytecode_add_tex(ctx->bc, &tex); 4922 if (r) 4923 return r; 4924 4925 /* temp.x = sample_index*4 */ 4926 if (ctx->bc->chip_class == CAYMAN) { 4927 for (i = 0 ; i < 4; i++) { 4928 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4929 alu.op = ALU_OP2_MULLO_INT; 4930 alu.src[0].sel = src_gpr; 4931 alu.src[0].chan = sample_chan; 4932 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4933 alu.src[1].value = 4; 4934 alu.dst.sel = temp; 4935 alu.dst.chan = i; 4936 alu.dst.write = i == 0; 4937 if (i == 3) 4938 alu.last = 1; 4939 r = r600_bytecode_add_alu(ctx->bc, &alu); 4940 if (r) 4941 return r; 4942 } 4943 } else { 4944 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4945 alu.op = ALU_OP2_MULLO_INT; 4946 alu.src[0].sel = src_gpr; 4947 alu.src[0].chan = sample_chan; 4948 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4949 alu.src[1].value = 4; 4950 alu.dst.sel = temp; 4951 alu.dst.chan = 0; 4952 alu.dst.write = 1; 4953 alu.last = 1; 4954 r = r600_bytecode_add_alu(ctx->bc, &alu); 4955 if (r) 4956 return r; 4957 } 4958 4959 /* sample_index = temp.w >> temp.x */ 4960 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4961 alu.op = ALU_OP2_LSHR_INT; 4962 alu.src[0].sel = temp; 4963 alu.src[0].chan = 3; 4964 alu.src[1].sel = temp; 4965 alu.src[1].chan = 0; 4966 alu.dst.sel = src_gpr; 4967 alu.dst.chan = sample_chan; 4968 alu.dst.write = 1; 4969 alu.last = 1; 4970 r = r600_bytecode_add_alu(ctx->bc, &alu); 4971 if (r) 4972 return r; 4973 4974 /* sample_index & 0xF */ 4975 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4976 alu.op = ALU_OP2_AND_INT; 4977 alu.src[0].sel = src_gpr; 4978 alu.src[0].chan = sample_chan; 4979 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4980 alu.src[1].value = 0xF; 4981 alu.dst.sel = src_gpr; 4982 alu.dst.chan = sample_chan; 4983 alu.dst.write = 1; 4984 alu.last = 1; 4985 r = r600_bytecode_add_alu(ctx->bc, &alu); 4986 if (r) 4987 return r; 4988#if 0 4989 /* visualize the FMASK */ 4990 for (i = 0; i < 4; i++) { 4991 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4992 alu.op = ALU_OP1_INT_TO_FLT; 4993 alu.src[0].sel = src_gpr; 4994 alu.src[0].chan = sample_chan; 4995 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 4996 alu.dst.chan = i; 4997 alu.dst.write = 1; 4998 alu.last = 1; 4999 r = r600_bytecode_add_alu(ctx->bc, &alu); 5000 if (r) 5001 return r; 5002 } 5003 return 0; 5004#endif 5005 } 5006 5007 /* does this shader want a num layers from TXQ for a cube array? */ 5008 if (has_txq_cube_array_z) { 5009 int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 5010 5011 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5012 alu.op = ALU_OP1_MOV; 5013 5014 alu.src[0].sel = 512 + (id / 4); 5015 alu.src[0].kc_bank = R600_TXQ_CONST_BUFFER; 5016 alu.src[0].chan = id % 4; 5017 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 5018 alu.last = 1; 5019 r = r600_bytecode_add_alu(ctx->bc, &alu); 5020 if (r) 5021 return r; 5022 /* disable writemask from texture instruction */ 5023 inst->Dst[0].Register.WriteMask &= ~4; 5024 } 5025 5026 opcode = ctx->inst_info->op; 5027 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 5028 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 5029 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 5030 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 5031 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 5032 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 5033 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 5034 switch (opcode) { 5035 case FETCH_OP_SAMPLE: 5036 opcode = FETCH_OP_SAMPLE_C; 5037 break; 5038 case FETCH_OP_SAMPLE_L: 5039 opcode = FETCH_OP_SAMPLE_C_L; 5040 break; 5041 case FETCH_OP_SAMPLE_LB: 5042 opcode = FETCH_OP_SAMPLE_C_LB; 5043 break; 5044 case FETCH_OP_SAMPLE_G: 5045 opcode = FETCH_OP_SAMPLE_C_G; 5046 break; 5047 } 5048 } 5049 5050 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 5051 tex.op = opcode; 5052 5053 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 5054 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 5055 tex.src_gpr = src_gpr; 5056 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 5057 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 5058 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 5059 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 5060 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 5061 5062 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ) { 5063 tex.src_sel_x = 4; 5064 tex.src_sel_y = 4; 5065 tex.src_sel_z = 4; 5066 tex.src_sel_w = 4; 5067 } else if (src_loaded) { 5068 tex.src_sel_x = 0; 5069 tex.src_sel_y = 1; 5070 tex.src_sel_z = 2; 5071 tex.src_sel_w = 3; 5072 } else { 5073 tex.src_sel_x = ctx->src[0].swizzle[0]; 5074 tex.src_sel_y = ctx->src[0].swizzle[1]; 5075 tex.src_sel_z = ctx->src[0].swizzle[2]; 5076 tex.src_sel_w = ctx->src[0].swizzle[3]; 5077 tex.src_rel = ctx->src[0].rel; 5078 } 5079 5080 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || 5081 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 5082 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 5083 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 5084 tex.src_sel_x = 1; 5085 tex.src_sel_y = 0; 5086 tex.src_sel_z = 3; 5087 tex.src_sel_w = 2; /* route Z compare or Lod value into W */ 5088 } 5089 5090 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 5091 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 5092 tex.coord_type_x = 1; 5093 tex.coord_type_y = 1; 5094 } 5095 tex.coord_type_z = 1; 5096 tex.coord_type_w = 1; 5097 5098 tex.offset_x = offset_x; 5099 tex.offset_y = offset_y; 5100 tex.offset_z = offset_z; 5101 5102 /* Put the depth for comparison in W. 5103 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 5104 * Some instructions expect the depth in Z. */ 5105 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 5106 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 5107 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 5108 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 5109 opcode != FETCH_OP_SAMPLE_C_L && 5110 opcode != FETCH_OP_SAMPLE_C_LB) { 5111 tex.src_sel_w = tex.src_sel_z; 5112 } 5113 5114 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 5115 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 5116 if (opcode == FETCH_OP_SAMPLE_C_L || 5117 opcode == FETCH_OP_SAMPLE_C_LB) { 5118 /* the array index is read from Y */ 5119 tex.coord_type_y = 0; 5120 } else { 5121 /* the array index is read from Z */ 5122 tex.coord_type_z = 0; 5123 tex.src_sel_z = tex.src_sel_y; 5124 } 5125 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 5126 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 5127 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 5128 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 5129 (ctx->bc->chip_class >= EVERGREEN))) 5130 /* the array index is read from Z */ 5131 tex.coord_type_z = 0; 5132 5133 /* mask unused source components */ 5134 if (opcode == FETCH_OP_SAMPLE) { 5135 switch (inst->Texture.Texture) { 5136 case TGSI_TEXTURE_2D: 5137 case TGSI_TEXTURE_RECT: 5138 tex.src_sel_z = 7; 5139 tex.src_sel_w = 7; 5140 break; 5141 case TGSI_TEXTURE_1D_ARRAY: 5142 tex.src_sel_y = 7; 5143 tex.src_sel_w = 7; 5144 break; 5145 case TGSI_TEXTURE_1D: 5146 tex.src_sel_y = 7; 5147 tex.src_sel_z = 7; 5148 tex.src_sel_w = 7; 5149 break; 5150 } 5151 } 5152 5153 r = r600_bytecode_add_tex(ctx->bc, &tex); 5154 if (r) 5155 return r; 5156 5157 /* add shadow ambient support - gallium doesn't do it yet */ 5158 return 0; 5159} 5160 5161static int tgsi_lrp(struct r600_shader_ctx *ctx) 5162{ 5163 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5164 struct r600_bytecode_alu alu; 5165 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5166 unsigned i; 5167 int r; 5168 5169 /* optimize if it's just an equal balance */ 5170 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 5171 for (i = 0; i < lasti + 1; i++) { 5172 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5173 continue; 5174 5175 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5176 alu.op = ALU_OP2_ADD; 5177 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5178 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 5179 alu.omod = 3; 5180 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5181 alu.dst.chan = i; 5182 if (i == lasti) { 5183 alu.last = 1; 5184 } 5185 r = r600_bytecode_add_alu(ctx->bc, &alu); 5186 if (r) 5187 return r; 5188 } 5189 return 0; 5190 } 5191 5192 /* 1 - src0 */ 5193 for (i = 0; i < lasti + 1; i++) { 5194 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5195 continue; 5196 5197 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5198 alu.op = ALU_OP2_ADD; 5199 alu.src[0].sel = V_SQ_ALU_SRC_1; 5200 alu.src[0].chan = 0; 5201 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5202 r600_bytecode_src_toggle_neg(&alu.src[1]); 5203 alu.dst.sel = ctx->temp_reg; 5204 alu.dst.chan = i; 5205 if (i == lasti) { 5206 alu.last = 1; 5207 } 5208 alu.dst.write = 1; 5209 r = r600_bytecode_add_alu(ctx->bc, &alu); 5210 if (r) 5211 return r; 5212 } 5213 5214 /* (1 - src0) * src2 */ 5215 for (i = 0; i < lasti + 1; i++) { 5216 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5217 continue; 5218 5219 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5220 alu.op = ALU_OP2_MUL; 5221 alu.src[0].sel = ctx->temp_reg; 5222 alu.src[0].chan = i; 5223 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 5224 alu.dst.sel = ctx->temp_reg; 5225 alu.dst.chan = i; 5226 if (i == lasti) { 5227 alu.last = 1; 5228 } 5229 alu.dst.write = 1; 5230 r = r600_bytecode_add_alu(ctx->bc, &alu); 5231 if (r) 5232 return r; 5233 } 5234 5235 /* src0 * src1 + (1 - src0) * src2 */ 5236 for (i = 0; i < lasti + 1; i++) { 5237 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5238 continue; 5239 5240 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5241 alu.op = ALU_OP3_MULADD; 5242 alu.is_op3 = 1; 5243 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5244 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5245 alu.src[2].sel = ctx->temp_reg; 5246 alu.src[2].chan = i; 5247 5248 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5249 alu.dst.chan = i; 5250 if (i == lasti) { 5251 alu.last = 1; 5252 } 5253 r = r600_bytecode_add_alu(ctx->bc, &alu); 5254 if (r) 5255 return r; 5256 } 5257 return 0; 5258} 5259 5260static int tgsi_cmp(struct r600_shader_ctx *ctx) 5261{ 5262 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5263 struct r600_bytecode_alu alu; 5264 int i, r; 5265 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5266 5267 for (i = 0; i < lasti + 1; i++) { 5268 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5269 continue; 5270 5271 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5272 alu.op = ALU_OP3_CNDGE; 5273 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5274 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 5275 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 5276 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5277 alu.dst.chan = i; 5278 alu.dst.write = 1; 5279 alu.is_op3 = 1; 5280 if (i == lasti) 5281 alu.last = 1; 5282 r = r600_bytecode_add_alu(ctx->bc, &alu); 5283 if (r) 5284 return r; 5285 } 5286 return 0; 5287} 5288 5289static int tgsi_ucmp(struct r600_shader_ctx *ctx) 5290{ 5291 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5292 struct r600_bytecode_alu alu; 5293 int i, r; 5294 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5295 5296 for (i = 0; i < lasti + 1; i++) { 5297 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5298 continue; 5299 5300 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5301 alu.op = ALU_OP3_CNDGE_INT; 5302 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5303 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 5304 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 5305 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5306 alu.dst.chan = i; 5307 alu.dst.write = 1; 5308 alu.is_op3 = 1; 5309 if (i == lasti) 5310 alu.last = 1; 5311 r = r600_bytecode_add_alu(ctx->bc, &alu); 5312 if (r) 5313 return r; 5314 } 5315 return 0; 5316} 5317 5318static int tgsi_xpd(struct r600_shader_ctx *ctx) 5319{ 5320 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5321 static const unsigned int src0_swizzle[] = {2, 0, 1}; 5322 static const unsigned int src1_swizzle[] = {1, 2, 0}; 5323 struct r600_bytecode_alu alu; 5324 uint32_t use_temp = 0; 5325 int i, r; 5326 5327 if (inst->Dst[0].Register.WriteMask != 0xf) 5328 use_temp = 1; 5329 5330 for (i = 0; i < 4; i++) { 5331 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5332 alu.op = ALU_OP2_MUL; 5333 if (i < 3) { 5334 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 5335 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 5336 } else { 5337 alu.src[0].sel = V_SQ_ALU_SRC_0; 5338 alu.src[0].chan = i; 5339 alu.src[1].sel = V_SQ_ALU_SRC_0; 5340 alu.src[1].chan = i; 5341 } 5342 5343 alu.dst.sel = ctx->temp_reg; 5344 alu.dst.chan = i; 5345 alu.dst.write = 1; 5346 5347 if (i == 3) 5348 alu.last = 1; 5349 r = r600_bytecode_add_alu(ctx->bc, &alu); 5350 if (r) 5351 return r; 5352 } 5353 5354 for (i = 0; i < 4; i++) { 5355 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5356 alu.op = ALU_OP3_MULADD; 5357 5358 if (i < 3) { 5359 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 5360 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 5361 } else { 5362 alu.src[0].sel = V_SQ_ALU_SRC_0; 5363 alu.src[0].chan = i; 5364 alu.src[1].sel = V_SQ_ALU_SRC_0; 5365 alu.src[1].chan = i; 5366 } 5367 5368 alu.src[2].sel = ctx->temp_reg; 5369 alu.src[2].neg = 1; 5370 alu.src[2].chan = i; 5371 5372 if (use_temp) 5373 alu.dst.sel = ctx->temp_reg; 5374 else 5375 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5376 alu.dst.chan = i; 5377 alu.dst.write = 1; 5378 alu.is_op3 = 1; 5379 if (i == 3) 5380 alu.last = 1; 5381 r = r600_bytecode_add_alu(ctx->bc, &alu); 5382 if (r) 5383 return r; 5384 } 5385 if (use_temp) 5386 return tgsi_helper_copy(ctx, inst); 5387 return 0; 5388} 5389 5390static int tgsi_exp(struct r600_shader_ctx *ctx) 5391{ 5392 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5393 struct r600_bytecode_alu alu; 5394 int r; 5395 int i; 5396 5397 /* result.x = 2^floor(src); */ 5398 if (inst->Dst[0].Register.WriteMask & 1) { 5399 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5400 5401 alu.op = ALU_OP1_FLOOR; 5402 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5403 5404 alu.dst.sel = ctx->temp_reg; 5405 alu.dst.chan = 0; 5406 alu.dst.write = 1; 5407 alu.last = 1; 5408 r = r600_bytecode_add_alu(ctx->bc, &alu); 5409 if (r) 5410 return r; 5411 5412 if (ctx->bc->chip_class == CAYMAN) { 5413 for (i = 0; i < 3; i++) { 5414 alu.op = ALU_OP1_EXP_IEEE; 5415 alu.src[0].sel = ctx->temp_reg; 5416 alu.src[0].chan = 0; 5417 5418 alu.dst.sel = ctx->temp_reg; 5419 alu.dst.chan = i; 5420 alu.dst.write = i == 0; 5421 alu.last = i == 2; 5422 r = r600_bytecode_add_alu(ctx->bc, &alu); 5423 if (r) 5424 return r; 5425 } 5426 } else { 5427 alu.op = ALU_OP1_EXP_IEEE; 5428 alu.src[0].sel = ctx->temp_reg; 5429 alu.src[0].chan = 0; 5430 5431 alu.dst.sel = ctx->temp_reg; 5432 alu.dst.chan = 0; 5433 alu.dst.write = 1; 5434 alu.last = 1; 5435 r = r600_bytecode_add_alu(ctx->bc, &alu); 5436 if (r) 5437 return r; 5438 } 5439 } 5440 5441 /* result.y = tmp - floor(tmp); */ 5442 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 5443 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5444 5445 alu.op = ALU_OP1_FRACT; 5446 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5447 5448 alu.dst.sel = ctx->temp_reg; 5449#if 0 5450 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5451 if (r) 5452 return r; 5453#endif 5454 alu.dst.write = 1; 5455 alu.dst.chan = 1; 5456 5457 alu.last = 1; 5458 5459 r = r600_bytecode_add_alu(ctx->bc, &alu); 5460 if (r) 5461 return r; 5462 } 5463 5464 /* result.z = RoughApprox2ToX(tmp);*/ 5465 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 5466 if (ctx->bc->chip_class == CAYMAN) { 5467 for (i = 0; i < 3; i++) { 5468 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5469 alu.op = ALU_OP1_EXP_IEEE; 5470 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5471 5472 alu.dst.sel = ctx->temp_reg; 5473 alu.dst.chan = i; 5474 if (i == 2) { 5475 alu.dst.write = 1; 5476 alu.last = 1; 5477 } 5478 5479 r = r600_bytecode_add_alu(ctx->bc, &alu); 5480 if (r) 5481 return r; 5482 } 5483 } else { 5484 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5485 alu.op = ALU_OP1_EXP_IEEE; 5486 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5487 5488 alu.dst.sel = ctx->temp_reg; 5489 alu.dst.write = 1; 5490 alu.dst.chan = 2; 5491 5492 alu.last = 1; 5493 5494 r = r600_bytecode_add_alu(ctx->bc, &alu); 5495 if (r) 5496 return r; 5497 } 5498 } 5499 5500 /* result.w = 1.0;*/ 5501 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 5502 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5503 5504 alu.op = ALU_OP1_MOV; 5505 alu.src[0].sel = V_SQ_ALU_SRC_1; 5506 alu.src[0].chan = 0; 5507 5508 alu.dst.sel = ctx->temp_reg; 5509 alu.dst.chan = 3; 5510 alu.dst.write = 1; 5511 alu.last = 1; 5512 r = r600_bytecode_add_alu(ctx->bc, &alu); 5513 if (r) 5514 return r; 5515 } 5516 return tgsi_helper_copy(ctx, inst); 5517} 5518 5519static int tgsi_log(struct r600_shader_ctx *ctx) 5520{ 5521 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5522 struct r600_bytecode_alu alu; 5523 int r; 5524 int i; 5525 5526 /* result.x = floor(log2(|src|)); */ 5527 if (inst->Dst[0].Register.WriteMask & 1) { 5528 if (ctx->bc->chip_class == CAYMAN) { 5529 for (i = 0; i < 3; i++) { 5530 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5531 5532 alu.op = ALU_OP1_LOG_IEEE; 5533 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5534 r600_bytecode_src_set_abs(&alu.src[0]); 5535 5536 alu.dst.sel = ctx->temp_reg; 5537 alu.dst.chan = i; 5538 if (i == 0) 5539 alu.dst.write = 1; 5540 if (i == 2) 5541 alu.last = 1; 5542 r = r600_bytecode_add_alu(ctx->bc, &alu); 5543 if (r) 5544 return r; 5545 } 5546 5547 } else { 5548 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5549 5550 alu.op = ALU_OP1_LOG_IEEE; 5551 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5552 r600_bytecode_src_set_abs(&alu.src[0]); 5553 5554 alu.dst.sel = ctx->temp_reg; 5555 alu.dst.chan = 0; 5556 alu.dst.write = 1; 5557 alu.last = 1; 5558 r = r600_bytecode_add_alu(ctx->bc, &alu); 5559 if (r) 5560 return r; 5561 } 5562 5563 alu.op = ALU_OP1_FLOOR; 5564 alu.src[0].sel = ctx->temp_reg; 5565 alu.src[0].chan = 0; 5566 5567 alu.dst.sel = ctx->temp_reg; 5568 alu.dst.chan = 0; 5569 alu.dst.write = 1; 5570 alu.last = 1; 5571 5572 r = r600_bytecode_add_alu(ctx->bc, &alu); 5573 if (r) 5574 return r; 5575 } 5576 5577 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 5578 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 5579 5580 if (ctx->bc->chip_class == CAYMAN) { 5581 for (i = 0; i < 3; i++) { 5582 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5583 5584 alu.op = ALU_OP1_LOG_IEEE; 5585 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5586 r600_bytecode_src_set_abs(&alu.src[0]); 5587 5588 alu.dst.sel = ctx->temp_reg; 5589 alu.dst.chan = i; 5590 if (i == 1) 5591 alu.dst.write = 1; 5592 if (i == 2) 5593 alu.last = 1; 5594 5595 r = r600_bytecode_add_alu(ctx->bc, &alu); 5596 if (r) 5597 return r; 5598 } 5599 } else { 5600 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5601 5602 alu.op = ALU_OP1_LOG_IEEE; 5603 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5604 r600_bytecode_src_set_abs(&alu.src[0]); 5605 5606 alu.dst.sel = ctx->temp_reg; 5607 alu.dst.chan = 1; 5608 alu.dst.write = 1; 5609 alu.last = 1; 5610 5611 r = r600_bytecode_add_alu(ctx->bc, &alu); 5612 if (r) 5613 return r; 5614 } 5615 5616 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5617 5618 alu.op = ALU_OP1_FLOOR; 5619 alu.src[0].sel = ctx->temp_reg; 5620 alu.src[0].chan = 1; 5621 5622 alu.dst.sel = ctx->temp_reg; 5623 alu.dst.chan = 1; 5624 alu.dst.write = 1; 5625 alu.last = 1; 5626 5627 r = r600_bytecode_add_alu(ctx->bc, &alu); 5628 if (r) 5629 return r; 5630 5631 if (ctx->bc->chip_class == CAYMAN) { 5632 for (i = 0; i < 3; i++) { 5633 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5634 alu.op = ALU_OP1_EXP_IEEE; 5635 alu.src[0].sel = ctx->temp_reg; 5636 alu.src[0].chan = 1; 5637 5638 alu.dst.sel = ctx->temp_reg; 5639 alu.dst.chan = i; 5640 if (i == 1) 5641 alu.dst.write = 1; 5642 if (i == 2) 5643 alu.last = 1; 5644 5645 r = r600_bytecode_add_alu(ctx->bc, &alu); 5646 if (r) 5647 return r; 5648 } 5649 } else { 5650 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5651 alu.op = ALU_OP1_EXP_IEEE; 5652 alu.src[0].sel = ctx->temp_reg; 5653 alu.src[0].chan = 1; 5654 5655 alu.dst.sel = ctx->temp_reg; 5656 alu.dst.chan = 1; 5657 alu.dst.write = 1; 5658 alu.last = 1; 5659 5660 r = r600_bytecode_add_alu(ctx->bc, &alu); 5661 if (r) 5662 return r; 5663 } 5664 5665 if (ctx->bc->chip_class == CAYMAN) { 5666 for (i = 0; i < 3; i++) { 5667 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5668 alu.op = ALU_OP1_RECIP_IEEE; 5669 alu.src[0].sel = ctx->temp_reg; 5670 alu.src[0].chan = 1; 5671 5672 alu.dst.sel = ctx->temp_reg; 5673 alu.dst.chan = i; 5674 if (i == 1) 5675 alu.dst.write = 1; 5676 if (i == 2) 5677 alu.last = 1; 5678 5679 r = r600_bytecode_add_alu(ctx->bc, &alu); 5680 if (r) 5681 return r; 5682 } 5683 } else { 5684 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5685 alu.op = ALU_OP1_RECIP_IEEE; 5686 alu.src[0].sel = ctx->temp_reg; 5687 alu.src[0].chan = 1; 5688 5689 alu.dst.sel = ctx->temp_reg; 5690 alu.dst.chan = 1; 5691 alu.dst.write = 1; 5692 alu.last = 1; 5693 5694 r = r600_bytecode_add_alu(ctx->bc, &alu); 5695 if (r) 5696 return r; 5697 } 5698 5699 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5700 5701 alu.op = ALU_OP2_MUL; 5702 5703 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5704 r600_bytecode_src_set_abs(&alu.src[0]); 5705 5706 alu.src[1].sel = ctx->temp_reg; 5707 alu.src[1].chan = 1; 5708 5709 alu.dst.sel = ctx->temp_reg; 5710 alu.dst.chan = 1; 5711 alu.dst.write = 1; 5712 alu.last = 1; 5713 5714 r = r600_bytecode_add_alu(ctx->bc, &alu); 5715 if (r) 5716 return r; 5717 } 5718 5719 /* result.z = log2(|src|);*/ 5720 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 5721 if (ctx->bc->chip_class == CAYMAN) { 5722 for (i = 0; i < 3; i++) { 5723 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5724 5725 alu.op = ALU_OP1_LOG_IEEE; 5726 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5727 r600_bytecode_src_set_abs(&alu.src[0]); 5728 5729 alu.dst.sel = ctx->temp_reg; 5730 if (i == 2) 5731 alu.dst.write = 1; 5732 alu.dst.chan = i; 5733 if (i == 2) 5734 alu.last = 1; 5735 5736 r = r600_bytecode_add_alu(ctx->bc, &alu); 5737 if (r) 5738 return r; 5739 } 5740 } else { 5741 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5742 5743 alu.op = ALU_OP1_LOG_IEEE; 5744 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5745 r600_bytecode_src_set_abs(&alu.src[0]); 5746 5747 alu.dst.sel = ctx->temp_reg; 5748 alu.dst.write = 1; 5749 alu.dst.chan = 2; 5750 alu.last = 1; 5751 5752 r = r600_bytecode_add_alu(ctx->bc, &alu); 5753 if (r) 5754 return r; 5755 } 5756 } 5757 5758 /* result.w = 1.0; */ 5759 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 5760 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5761 5762 alu.op = ALU_OP1_MOV; 5763 alu.src[0].sel = V_SQ_ALU_SRC_1; 5764 alu.src[0].chan = 0; 5765 5766 alu.dst.sel = ctx->temp_reg; 5767 alu.dst.chan = 3; 5768 alu.dst.write = 1; 5769 alu.last = 1; 5770 5771 r = r600_bytecode_add_alu(ctx->bc, &alu); 5772 if (r) 5773 return r; 5774 } 5775 5776 return tgsi_helper_copy(ctx, inst); 5777} 5778 5779static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 5780{ 5781 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5782 struct r600_bytecode_alu alu; 5783 int r; 5784 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5785 5786 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5787 5788 switch (inst->Instruction.Opcode) { 5789 case TGSI_OPCODE_ARL: 5790 alu.op = ALU_OP1_FLT_TO_INT_FLOOR; 5791 break; 5792 case TGSI_OPCODE_ARR: 5793 alu.op = ALU_OP1_FLT_TO_INT; 5794 break; 5795 case TGSI_OPCODE_UARL: 5796 alu.op = ALU_OP1_MOV; 5797 break; 5798 default: 5799 assert(0); 5800 return -1; 5801 } 5802 5803 for (i = 0; i <= lasti; ++i) { 5804 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 5805 continue; 5806 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5807 alu.last = i == lasti; 5808 alu.dst.sel = ctx->bc->ar_reg; 5809 alu.dst.chan = i; 5810 alu.dst.write = 1; 5811 r = r600_bytecode_add_alu(ctx->bc, &alu); 5812 if (r) 5813 return r; 5814 } 5815 5816 ctx->bc->ar_loaded = 0; 5817 return 0; 5818} 5819static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 5820{ 5821 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5822 struct r600_bytecode_alu alu; 5823 int r; 5824 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 5825 5826 switch (inst->Instruction.Opcode) { 5827 case TGSI_OPCODE_ARL: 5828 memset(&alu, 0, sizeof(alu)); 5829 alu.op = ALU_OP1_FLOOR; 5830 alu.dst.sel = ctx->bc->ar_reg; 5831 alu.dst.write = 1; 5832 for (i = 0; i <= lasti; ++i) { 5833 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 5834 alu.dst.chan = i; 5835 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5836 alu.last = i == lasti; 5837 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5838 return r; 5839 } 5840 } 5841 5842 memset(&alu, 0, sizeof(alu)); 5843 alu.op = ALU_OP1_FLT_TO_INT; 5844 alu.src[0].sel = ctx->bc->ar_reg; 5845 alu.dst.sel = ctx->bc->ar_reg; 5846 alu.dst.write = 1; 5847 /* FLT_TO_INT is trans-only on r600/r700 */ 5848 alu.last = TRUE; 5849 for (i = 0; i <= lasti; ++i) { 5850 alu.dst.chan = i; 5851 alu.src[0].chan = i; 5852 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5853 return r; 5854 } 5855 break; 5856 case TGSI_OPCODE_ARR: 5857 memset(&alu, 0, sizeof(alu)); 5858 alu.op = ALU_OP1_FLT_TO_INT; 5859 alu.dst.sel = ctx->bc->ar_reg; 5860 alu.dst.write = 1; 5861 /* FLT_TO_INT is trans-only on r600/r700 */ 5862 alu.last = TRUE; 5863 for (i = 0; i <= lasti; ++i) { 5864 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 5865 alu.dst.chan = i; 5866 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5867 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5868 return r; 5869 } 5870 } 5871 break; 5872 case TGSI_OPCODE_UARL: 5873 memset(&alu, 0, sizeof(alu)); 5874 alu.op = ALU_OP1_MOV; 5875 alu.dst.sel = ctx->bc->ar_reg; 5876 alu.dst.write = 1; 5877 for (i = 0; i <= lasti; ++i) { 5878 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 5879 alu.dst.chan = i; 5880 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5881 alu.last = i == lasti; 5882 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5883 return r; 5884 } 5885 } 5886 break; 5887 default: 5888 assert(0); 5889 return -1; 5890 } 5891 5892 ctx->bc->ar_loaded = 0; 5893 return 0; 5894} 5895 5896static int tgsi_opdst(struct r600_shader_ctx *ctx) 5897{ 5898 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5899 struct r600_bytecode_alu alu; 5900 int i, r = 0; 5901 5902 for (i = 0; i < 4; i++) { 5903 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5904 5905 alu.op = ALU_OP2_MUL; 5906 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5907 5908 if (i == 0 || i == 3) { 5909 alu.src[0].sel = V_SQ_ALU_SRC_1; 5910 } else { 5911 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5912 } 5913 5914 if (i == 0 || i == 2) { 5915 alu.src[1].sel = V_SQ_ALU_SRC_1; 5916 } else { 5917 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5918 } 5919 if (i == 3) 5920 alu.last = 1; 5921 r = r600_bytecode_add_alu(ctx->bc, &alu); 5922 if (r) 5923 return r; 5924 } 5925 return 0; 5926} 5927 5928static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type) 5929{ 5930 struct r600_bytecode_alu alu; 5931 int r; 5932 5933 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5934 alu.op = opcode; 5935 alu.execute_mask = 1; 5936 alu.update_pred = 1; 5937 5938 alu.dst.sel = ctx->temp_reg; 5939 alu.dst.write = 1; 5940 alu.dst.chan = 0; 5941 5942 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5943 alu.src[1].sel = V_SQ_ALU_SRC_0; 5944 alu.src[1].chan = 0; 5945 5946 alu.last = 1; 5947 5948 r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); 5949 if (r) 5950 return r; 5951 return 0; 5952} 5953 5954static int pops(struct r600_shader_ctx *ctx, int pops) 5955{ 5956 unsigned force_pop = ctx->bc->force_add_cf; 5957 5958 if (!force_pop) { 5959 int alu_pop = 3; 5960 if (ctx->bc->cf_last) { 5961 if (ctx->bc->cf_last->op == CF_OP_ALU) 5962 alu_pop = 0; 5963 else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER) 5964 alu_pop = 1; 5965 } 5966 alu_pop += pops; 5967 if (alu_pop == 1) { 5968 ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER; 5969 ctx->bc->force_add_cf = 1; 5970 } else if (alu_pop == 2) { 5971 ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER; 5972 ctx->bc->force_add_cf = 1; 5973 } else { 5974 force_pop = 1; 5975 } 5976 } 5977 5978 if (force_pop) { 5979 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 5980 ctx->bc->cf_last->pop_count = pops; 5981 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 5982 } 5983 5984 return 0; 5985} 5986 5987static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, 5988 unsigned reason) 5989{ 5990 struct r600_stack_info *stack = &ctx->bc->stack; 5991 unsigned elements, entries; 5992 5993 unsigned entry_size = stack->entry_size; 5994 5995 elements = (stack->loop + stack->push_wqm ) * entry_size; 5996 elements += stack->push; 5997 5998 switch (ctx->bc->chip_class) { 5999 case R600: 6000 case R700: 6001 /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on 6002 * the stack must be reserved to hold the current active/continue 6003 * masks */ 6004 if (reason == FC_PUSH_VPM) { 6005 elements += 2; 6006 } 6007 break; 6008 6009 case CAYMAN: 6010 /* r9xx: any stack operation on empty stack consumes 2 additional 6011 * elements */ 6012 elements += 2; 6013 6014 /* fallthrough */ 6015 /* FIXME: do the two elements added above cover the cases for the 6016 * r8xx+ below? */ 6017 6018 case EVERGREEN: 6019 /* r8xx+: 2 extra elements are not always required, but one extra 6020 * element must be added for each of the following cases: 6021 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest 6022 * stack usage. 6023 * (Currently we don't use ALU_ELSE_AFTER.) 6024 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM 6025 * PUSH instruction executed. 6026 * 6027 * NOTE: it seems we also need to reserve additional element in some 6028 * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, 6029 * then STACK_SIZE should be 2 instead of 1 */ 6030 if (reason == FC_PUSH_VPM) { 6031 elements += 1; 6032 } 6033 break; 6034 6035 default: 6036 assert(0); 6037 break; 6038 } 6039 6040 /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4 6041 * for all chips, so we use 4 in the final formula, not the real entry_size 6042 * for the chip */ 6043 entry_size = 4; 6044 6045 entries = (elements + (entry_size - 1)) / entry_size; 6046 6047 if (entries > stack->max_entries) 6048 stack->max_entries = entries; 6049} 6050 6051static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) 6052{ 6053 switch(reason) { 6054 case FC_PUSH_VPM: 6055 --ctx->bc->stack.push; 6056 assert(ctx->bc->stack.push >= 0); 6057 break; 6058 case FC_PUSH_WQM: 6059 --ctx->bc->stack.push_wqm; 6060 assert(ctx->bc->stack.push_wqm >= 0); 6061 break; 6062 case FC_LOOP: 6063 --ctx->bc->stack.loop; 6064 assert(ctx->bc->stack.loop >= 0); 6065 break; 6066 default: 6067 assert(0); 6068 break; 6069 } 6070} 6071 6072static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) 6073{ 6074 switch (reason) { 6075 case FC_PUSH_VPM: 6076 ++ctx->bc->stack.push; 6077 break; 6078 case FC_PUSH_WQM: 6079 ++ctx->bc->stack.push_wqm; 6080 case FC_LOOP: 6081 ++ctx->bc->stack.loop; 6082 break; 6083 default: 6084 assert(0); 6085 } 6086 6087 callstack_update_max_depth(ctx, reason); 6088} 6089 6090static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 6091{ 6092 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 6093 6094 sp->mid = realloc((void *)sp->mid, 6095 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 6096 sp->mid[sp->num_mid] = ctx->bc->cf_last; 6097 sp->num_mid++; 6098} 6099 6100static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 6101{ 6102 ctx->bc->fc_sp++; 6103 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 6104 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 6105} 6106 6107static void fc_poplevel(struct r600_shader_ctx *ctx) 6108{ 6109 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 6110 free(sp->mid); 6111 sp->mid = NULL; 6112 sp->num_mid = 0; 6113 sp->start = NULL; 6114 sp->type = 0; 6115 ctx->bc->fc_sp--; 6116} 6117 6118#if 0 6119static int emit_return(struct r600_shader_ctx *ctx) 6120{ 6121 r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN)); 6122 return 0; 6123} 6124 6125static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 6126{ 6127 6128 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP)); 6129 ctx->bc->cf_last->pop_count = pops; 6130 /* XXX work out offset */ 6131 return 0; 6132} 6133 6134static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 6135{ 6136 return 0; 6137} 6138 6139static void emit_testflag(struct r600_shader_ctx *ctx) 6140{ 6141 6142} 6143 6144static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 6145{ 6146 emit_testflag(ctx); 6147 emit_jump_to_offset(ctx, 1, 4); 6148 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 6149 pops(ctx, ifidx + 1); 6150 emit_return(ctx); 6151} 6152 6153static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 6154{ 6155 emit_testflag(ctx); 6156 6157 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 6158 ctx->bc->cf_last->pop_count = 1; 6159 6160 fc_set_mid(ctx, fc_sp); 6161 6162 pops(ctx, 1); 6163} 6164#endif 6165 6166static int emit_if(struct r600_shader_ctx *ctx, int opcode) 6167{ 6168 int alu_type = CF_OP_ALU_PUSH_BEFORE; 6169 6170 /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by 6171 * LOOP_STARTxxx for nested loops may put the branch stack into a state 6172 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this 6173 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ 6174 if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) { 6175 r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); 6176 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 6177 alu_type = CF_OP_ALU; 6178 } 6179 6180 emit_logic_pred(ctx, opcode, alu_type); 6181 6182 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 6183 6184 fc_pushlevel(ctx, FC_IF); 6185 6186 callstack_push(ctx, FC_PUSH_VPM); 6187 return 0; 6188} 6189 6190static int tgsi_if(struct r600_shader_ctx *ctx) 6191{ 6192 return emit_if(ctx, ALU_OP2_PRED_SETNE); 6193} 6194 6195static int tgsi_uif(struct r600_shader_ctx *ctx) 6196{ 6197 return emit_if(ctx, ALU_OP2_PRED_SETNE_INT); 6198} 6199 6200static int tgsi_else(struct r600_shader_ctx *ctx) 6201{ 6202 r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); 6203 ctx->bc->cf_last->pop_count = 1; 6204 6205 fc_set_mid(ctx, ctx->bc->fc_sp); 6206 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 6207 return 0; 6208} 6209 6210static int tgsi_endif(struct r600_shader_ctx *ctx) 6211{ 6212 pops(ctx, 1); 6213 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 6214 R600_ERR("if/endif unbalanced in shader\n"); 6215 return -1; 6216 } 6217 6218 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 6219 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 6220 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 6221 } else { 6222 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 6223 } 6224 fc_poplevel(ctx); 6225 6226 callstack_pop(ctx, FC_PUSH_VPM); 6227 return 0; 6228} 6229 6230static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 6231{ 6232 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not 6233 * limited to 4096 iterations, like the other LOOP_* instructions. */ 6234 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10); 6235 6236 fc_pushlevel(ctx, FC_LOOP); 6237 6238 /* check stack depth */ 6239 callstack_push(ctx, FC_LOOP); 6240 return 0; 6241} 6242 6243static int tgsi_endloop(struct r600_shader_ctx *ctx) 6244{ 6245 int i; 6246 6247 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); 6248 6249 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 6250 R600_ERR("loop/endloop in shader code are not paired.\n"); 6251 return -EINVAL; 6252 } 6253 6254 /* fixup loop pointers - from r600isa 6255 LOOP END points to CF after LOOP START, 6256 LOOP START point to CF after LOOP END 6257 BRK/CONT point to LOOP END CF 6258 */ 6259 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 6260 6261 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 6262 6263 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 6264 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 6265 } 6266 /* XXX add LOOPRET support */ 6267 fc_poplevel(ctx); 6268 callstack_pop(ctx, FC_LOOP); 6269 return 0; 6270} 6271 6272static int tgsi_loop_breakc(struct r600_shader_ctx *ctx) 6273{ 6274 int r; 6275 unsigned int fscp; 6276 6277 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 6278 { 6279 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 6280 break; 6281 } 6282 if (fscp == 0) { 6283 R600_ERR("BREAKC not inside loop/endloop pair\n"); 6284 return -EINVAL; 6285 } 6286 6287 if (ctx->bc->chip_class == EVERGREEN && 6288 ctx->bc->family != CHIP_CYPRESS && 6289 ctx->bc->family != CHIP_JUNIPER) { 6290 /* HW bug: ALU_BREAK does not save the active mask correctly */ 6291 r = tgsi_uif(ctx); 6292 if (r) 6293 return r; 6294 6295 r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK); 6296 if (r) 6297 return r; 6298 fc_set_mid(ctx, fscp); 6299 6300 return tgsi_endif(ctx); 6301 } else { 6302 r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK); 6303 if (r) 6304 return r; 6305 fc_set_mid(ctx, fscp); 6306 } 6307 6308 return 0; 6309} 6310 6311static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 6312{ 6313 unsigned int fscp; 6314 6315 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 6316 { 6317 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 6318 break; 6319 } 6320 6321 if (fscp == 0) { 6322 R600_ERR("Break not inside loop/endloop pair\n"); 6323 return -EINVAL; 6324 } 6325 6326 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 6327 6328 fc_set_mid(ctx, fscp); 6329 6330 return 0; 6331} 6332 6333static int tgsi_gs_emit(struct r600_shader_ctx *ctx) 6334{ 6335 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 6336 emit_gs_ring_writes(ctx, TRUE); 6337 6338 return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 6339} 6340 6341static int tgsi_umad(struct r600_shader_ctx *ctx) 6342{ 6343 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6344 struct r600_bytecode_alu alu; 6345 int i, j, k, r; 6346 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6347 6348 /* src0 * src1 */ 6349 for (i = 0; i < lasti + 1; i++) { 6350 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6351 continue; 6352 6353 if (ctx->bc->chip_class == CAYMAN) { 6354 for (j = 0 ; j < 4; j++) { 6355 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6356 6357 alu.op = ALU_OP2_MULLO_UINT; 6358 for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { 6359 r600_bytecode_src(&alu.src[k], &ctx->src[k], i); 6360 } 6361 tgsi_dst(ctx, &inst->Dst[0], j, &alu.dst); 6362 alu.dst.sel = ctx->temp_reg; 6363 alu.dst.write = (j == i); 6364 if (j == 3) 6365 alu.last = 1; 6366 r = r600_bytecode_add_alu(ctx->bc, &alu); 6367 if (r) 6368 return r; 6369 } 6370 } else { 6371 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6372 6373 alu.dst.chan = i; 6374 alu.dst.sel = ctx->temp_reg; 6375 alu.dst.write = 1; 6376 6377 alu.op = ALU_OP2_MULLO_UINT; 6378 for (j = 0; j < 2; j++) { 6379 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 6380 } 6381 6382 alu.last = 1; 6383 r = r600_bytecode_add_alu(ctx->bc, &alu); 6384 if (r) 6385 return r; 6386 } 6387 } 6388 6389 6390 for (i = 0; i < lasti + 1; i++) { 6391 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6392 continue; 6393 6394 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6395 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6396 6397 alu.op = ALU_OP2_ADD_INT; 6398 6399 alu.src[0].sel = ctx->temp_reg; 6400 alu.src[0].chan = i; 6401 6402 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6403 if (i == lasti) { 6404 alu.last = 1; 6405 } 6406 r = r600_bytecode_add_alu(ctx->bc, &alu); 6407 if (r) 6408 return r; 6409 } 6410 return 0; 6411} 6412 6413static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 6414 {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_r600_arl}, 6415 {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2}, 6416 {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit}, 6417 6418 /* XXX: 6419 * For state trackers other than OpenGL, we'll want to use 6420 * _RECIP_IEEE instead. 6421 */ 6422 {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 6423 6424 {TGSI_OPCODE_RSQ, 0, ALU_OP0_NOP, tgsi_rsq}, 6425 {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp}, 6426 {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log}, 6427 {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2}, 6428 {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2}, 6429 {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp}, 6430 {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp}, 6431 {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst}, 6432 {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2}, 6433 {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2}, 6434 {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap}, 6435 {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2}, 6436 {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, 6437 {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, 6438 {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, 6439 {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, 6440 /* gap */ 6441 {20, 0, ALU_OP0_NOP, tgsi_unsupported}, 6442 {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, 6443 /* gap */ 6444 {22, 0, ALU_OP0_NOP, tgsi_unsupported}, 6445 {23, 0, ALU_OP0_NOP, tgsi_unsupported}, 6446 {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2}, 6447 {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported}, 6448 {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2}, 6449 {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2}, 6450 {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 6451 {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 6452 {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow}, 6453 {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd}, 6454 /* gap */ 6455 {32, 0, ALU_OP0_NOP, tgsi_unsupported}, 6456 {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2}, 6457 {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported}, 6458 {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp}, 6459 {TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig}, 6460 {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 6461 {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 6462 {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 6463 {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported}, 6464 {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported}, 6465 {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported}, 6466 {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, 6467 {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported}, 6468 {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2}, 6469 {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported}, 6470 {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2}, 6471 {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, tgsi_trig}, 6472 {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap}, 6473 {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2}, 6474 {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported}, 6475 {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex}, 6476 {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex}, 6477 {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex}, 6478 {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported}, 6479 {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported}, 6480 {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported}, 6481 {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, 6482 {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported}, 6483 {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported}, 6484 {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_r600_arl}, 6485 {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported}, 6486 {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported}, 6487 {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported}, 6488 {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg}, 6489 {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp}, 6490 {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs}, 6491 {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, 6492 {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported}, 6493 {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported}, 6494 {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp}, 6495 {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, 6496 {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 6497 {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if}, 6498 {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif}, 6499 {76, 0, ALU_OP0_NOP, tgsi_unsupported}, 6500 {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else}, 6501 {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif}, 6502 /* gap */ 6503 {79, 0, ALU_OP0_NOP, tgsi_unsupported}, 6504 {80, 0, ALU_OP0_NOP, tgsi_unsupported}, 6505 {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported}, 6506 {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported}, 6507 {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2}, 6508 {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 6509 {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2}, 6510 {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2}, 6511 {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2_trans}, 6512 /* gap */ 6513 {88, 0, ALU_OP0_NOP, tgsi_unsupported}, 6514 {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2}, 6515 {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2}, 6516 {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod}, 6517 {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2}, 6518 {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported}, 6519 {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, 6520 {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 6521 {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 6522 {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 6523 {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit}, 6524 {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, 6525 {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, 6526 {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, 6527 {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, 6528 {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 6529 /* gap */ 6530 {104, 0, ALU_OP0_NOP, tgsi_unsupported}, 6531 {105, 0, ALU_OP0_NOP, tgsi_unsupported}, 6532 {106, 0, ALU_OP0_NOP, tgsi_unsupported}, 6533 {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, 6534 {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2}, 6535 {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2}, 6536 {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 6537 {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 6538 {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported}, 6539 {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, 6540 /* gap */ 6541 {114, 0, ALU_OP0_NOP, tgsi_unsupported}, 6542 {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_loop_breakc}, 6543 {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 6544 {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 6545 /* gap */ 6546 {118, 0, ALU_OP0_NOP, tgsi_unsupported}, 6547 {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, 6548 {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv}, 6549 {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2}, 6550 {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2}, 6551 {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg}, 6552 {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2}, 6553 {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2_trans}, 6554 {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap}, 6555 {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, 6556 {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 6557 {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2}, 6558 {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv}, 6559 {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad}, 6560 {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2}, 6561 {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2}, 6562 {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod}, 6563 {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 6564 {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2}, 6565 {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2}, 6566 {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2_trans}, 6567 {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 6568 {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2_swap}, 6569 {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, 6570 {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6571 {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported}, 6572 {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, 6573 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 6574 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, 6575 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, 6576 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 6577 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 6578 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 6579 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 6580 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 6581 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 6582 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, 6583 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 6584 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 6585 {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_r600_arl}, 6586 {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp}, 6587 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 6588 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 6589 {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported}, 6590 {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6591 {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6592 {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6593 {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6594 {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported}, 6595 {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported}, 6596 {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported}, 6597 {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported}, 6598 {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported}, 6599 {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported}, 6600 {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported}, 6601 {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, 6602 {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, 6603 {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, 6604 {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, 6605 {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex}, 6606 {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, 6607 {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, 6608 {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported}, 6609}; 6610 6611static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 6612 {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_eg_arl}, 6613 {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2}, 6614 {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit}, 6615 {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 6616 {TGSI_OPCODE_RSQ, 0, ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, 6617 {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp}, 6618 {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log}, 6619 {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2}, 6620 {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2}, 6621 {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp}, 6622 {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp}, 6623 {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst}, 6624 {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2}, 6625 {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2}, 6626 {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap}, 6627 {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2}, 6628 {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, 6629 {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, 6630 {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, 6631 {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, 6632 /* gap */ 6633 {20, 0, ALU_OP0_NOP, tgsi_unsupported}, 6634 {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, 6635 /* gap */ 6636 {22, 0, ALU_OP0_NOP, tgsi_unsupported}, 6637 {23, 0, ALU_OP0_NOP, tgsi_unsupported}, 6638 {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2}, 6639 {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported}, 6640 {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2}, 6641 {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2}, 6642 {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 6643 {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 6644 {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow}, 6645 {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd}, 6646 /* gap */ 6647 {32, 0, ALU_OP0_NOP, tgsi_unsupported}, 6648 {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2}, 6649 {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported}, 6650 {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp}, 6651 {TGSI_OPCODE_COS, 0, ALU_OP1_COS, tgsi_trig}, 6652 {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 6653 {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 6654 {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 6655 {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported}, 6656 {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported}, 6657 {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported}, 6658 {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, 6659 {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported}, 6660 {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2}, 6661 {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported}, 6662 {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2}, 6663 {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, tgsi_trig}, 6664 {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap}, 6665 {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2}, 6666 {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported}, 6667 {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex}, 6668 {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex}, 6669 {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex}, 6670 {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported}, 6671 {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported}, 6672 {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported}, 6673 {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, 6674 {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported}, 6675 {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported}, 6676 {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_eg_arl}, 6677 {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported}, 6678 {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported}, 6679 {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported}, 6680 {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg}, 6681 {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp}, 6682 {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs}, 6683 {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, 6684 {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported}, 6685 {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported}, 6686 {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp}, 6687 {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, 6688 {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 6689 {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if}, 6690 {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif}, 6691 {76, 0, ALU_OP0_NOP, tgsi_unsupported}, 6692 {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else}, 6693 {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif}, 6694 /* gap */ 6695 {79, 0, ALU_OP0_NOP, tgsi_unsupported}, 6696 {80, 0, ALU_OP0_NOP, tgsi_unsupported}, 6697 {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported}, 6698 {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported}, 6699 {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2}, 6700 {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 6701 {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2}, 6702 {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2}, 6703 {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2}, 6704 /* gap */ 6705 {88, 0, ALU_OP0_NOP, tgsi_unsupported}, 6706 {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2}, 6707 {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2}, 6708 {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod}, 6709 {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2}, 6710 {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported}, 6711 {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, 6712 {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 6713 {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 6714 {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 6715 {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit}, 6716 {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, 6717 {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, 6718 {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, 6719 {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, 6720 {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 6721 /* gap */ 6722 {104, 0, ALU_OP0_NOP, tgsi_unsupported}, 6723 {105, 0, ALU_OP0_NOP, tgsi_unsupported}, 6724 {106, 0, ALU_OP0_NOP, tgsi_unsupported}, 6725 {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, 6726 {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2}, 6727 {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2}, 6728 {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 6729 {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 6730 {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported}, 6731 {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, 6732 /* gap */ 6733 {114, 0, ALU_OP0_NOP, tgsi_unsupported}, 6734 {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported}, 6735 {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 6736 {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 6737 /* gap */ 6738 {118, 0, ALU_OP0_NOP, tgsi_unsupported}, 6739 {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_f2i}, 6740 {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv}, 6741 {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2}, 6742 {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2}, 6743 {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg}, 6744 {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2}, 6745 {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2}, 6746 {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap}, 6747 {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_f2i}, 6748 {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 6749 {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2}, 6750 {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv}, 6751 {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad}, 6752 {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2}, 6753 {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2}, 6754 {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod}, 6755 {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 6756 {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2}, 6757 {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2}, 6758 {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2}, 6759 {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 6760 {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2}, 6761 {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, 6762 {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6763 {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported}, 6764 {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, 6765 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 6766 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, 6767 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, 6768 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 6769 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 6770 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 6771 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 6772 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 6773 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 6774 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, 6775 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 6776 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 6777 {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_eg_arl}, 6778 {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp}, 6779 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 6780 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 6781 {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported}, 6782 {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6783 {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6784 {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6785 {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6786 {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported}, 6787 {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported}, 6788 {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported}, 6789 {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported}, 6790 {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported}, 6791 {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported}, 6792 {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported}, 6793 {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, 6794 {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, 6795 {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, 6796 {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, 6797 {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex}, 6798 {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, 6799 {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, 6800 {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported}, 6801}; 6802 6803static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 6804 {TGSI_OPCODE_ARL, 0, ALU_OP0_NOP, tgsi_eg_arl}, 6805 {TGSI_OPCODE_MOV, 0, ALU_OP1_MOV, tgsi_op2}, 6806 {TGSI_OPCODE_LIT, 0, ALU_OP0_NOP, tgsi_lit}, 6807 {TGSI_OPCODE_RCP, 0, ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, 6808 {TGSI_OPCODE_RSQ, 0, ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, 6809 {TGSI_OPCODE_EXP, 0, ALU_OP0_NOP, tgsi_exp}, 6810 {TGSI_OPCODE_LOG, 0, ALU_OP0_NOP, tgsi_log}, 6811 {TGSI_OPCODE_MUL, 0, ALU_OP2_MUL, tgsi_op2}, 6812 {TGSI_OPCODE_ADD, 0, ALU_OP2_ADD, tgsi_op2}, 6813 {TGSI_OPCODE_DP3, 0, ALU_OP2_DOT4, tgsi_dp}, 6814 {TGSI_OPCODE_DP4, 0, ALU_OP2_DOT4, tgsi_dp}, 6815 {TGSI_OPCODE_DST, 0, ALU_OP0_NOP, tgsi_opdst}, 6816 {TGSI_OPCODE_MIN, 0, ALU_OP2_MIN, tgsi_op2}, 6817 {TGSI_OPCODE_MAX, 0, ALU_OP2_MAX, tgsi_op2}, 6818 {TGSI_OPCODE_SLT, 0, ALU_OP2_SETGT, tgsi_op2_swap}, 6819 {TGSI_OPCODE_SGE, 0, ALU_OP2_SETGE, tgsi_op2}, 6820 {TGSI_OPCODE_MAD, 1, ALU_OP3_MULADD, tgsi_op3}, 6821 {TGSI_OPCODE_SUB, 0, ALU_OP2_ADD, tgsi_op2}, 6822 {TGSI_OPCODE_LRP, 0, ALU_OP0_NOP, tgsi_lrp}, 6823 {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, 6824 /* gap */ 6825 {20, 0, ALU_OP0_NOP, tgsi_unsupported}, 6826 {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, 6827 /* gap */ 6828 {22, 0, ALU_OP0_NOP, tgsi_unsupported}, 6829 {23, 0, ALU_OP0_NOP, tgsi_unsupported}, 6830 {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2}, 6831 {TGSI_OPCODE_CLAMP, 0, ALU_OP0_NOP, tgsi_unsupported}, 6832 {TGSI_OPCODE_FLR, 0, ALU_OP1_FLOOR, tgsi_op2}, 6833 {TGSI_OPCODE_ROUND, 0, ALU_OP1_RNDNE, tgsi_op2}, 6834 {TGSI_OPCODE_EX2, 0, ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, 6835 {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, 6836 {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, cayman_pow}, 6837 {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd}, 6838 /* gap */ 6839 {32, 0, ALU_OP0_NOP, tgsi_unsupported}, 6840 {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2}, 6841 {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported}, 6842 {TGSI_OPCODE_DPH, 0, ALU_OP2_DOT4, tgsi_dp}, 6843 {TGSI_OPCODE_COS, 0, ALU_OP1_COS, cayman_trig}, 6844 {TGSI_OPCODE_DDX, 0, FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 6845 {TGSI_OPCODE_DDY, 0, FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 6846 {TGSI_OPCODE_KILL, 0, ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 6847 {TGSI_OPCODE_PK2H, 0, ALU_OP0_NOP, tgsi_unsupported}, 6848 {TGSI_OPCODE_PK2US, 0, ALU_OP0_NOP, tgsi_unsupported}, 6849 {TGSI_OPCODE_PK4B, 0, ALU_OP0_NOP, tgsi_unsupported}, 6850 {TGSI_OPCODE_PK4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, 6851 {TGSI_OPCODE_RFL, 0, ALU_OP0_NOP, tgsi_unsupported}, 6852 {TGSI_OPCODE_SEQ, 0, ALU_OP2_SETE, tgsi_op2}, 6853 {TGSI_OPCODE_SFL, 0, ALU_OP0_NOP, tgsi_unsupported}, 6854 {TGSI_OPCODE_SGT, 0, ALU_OP2_SETGT, tgsi_op2}, 6855 {TGSI_OPCODE_SIN, 0, ALU_OP1_SIN, cayman_trig}, 6856 {TGSI_OPCODE_SLE, 0, ALU_OP2_SETGE, tgsi_op2_swap}, 6857 {TGSI_OPCODE_SNE, 0, ALU_OP2_SETNE, tgsi_op2}, 6858 {TGSI_OPCODE_STR, 0, ALU_OP0_NOP, tgsi_unsupported}, 6859 {TGSI_OPCODE_TEX, 0, FETCH_OP_SAMPLE, tgsi_tex}, 6860 {TGSI_OPCODE_TXD, 0, FETCH_OP_SAMPLE_G, tgsi_tex}, 6861 {TGSI_OPCODE_TXP, 0, FETCH_OP_SAMPLE, tgsi_tex}, 6862 {TGSI_OPCODE_UP2H, 0, ALU_OP0_NOP, tgsi_unsupported}, 6863 {TGSI_OPCODE_UP2US, 0, ALU_OP0_NOP, tgsi_unsupported}, 6864 {TGSI_OPCODE_UP4B, 0, ALU_OP0_NOP, tgsi_unsupported}, 6865 {TGSI_OPCODE_UP4UB, 0, ALU_OP0_NOP, tgsi_unsupported}, 6866 {TGSI_OPCODE_X2D, 0, ALU_OP0_NOP, tgsi_unsupported}, 6867 {TGSI_OPCODE_ARA, 0, ALU_OP0_NOP, tgsi_unsupported}, 6868 {TGSI_OPCODE_ARR, 0, ALU_OP0_NOP, tgsi_eg_arl}, 6869 {TGSI_OPCODE_BRA, 0, ALU_OP0_NOP, tgsi_unsupported}, 6870 {TGSI_OPCODE_CAL, 0, ALU_OP0_NOP, tgsi_unsupported}, 6871 {TGSI_OPCODE_RET, 0, ALU_OP0_NOP, tgsi_unsupported}, 6872 {TGSI_OPCODE_SSG, 0, ALU_OP0_NOP, tgsi_ssg}, 6873 {TGSI_OPCODE_CMP, 0, ALU_OP0_NOP, tgsi_cmp}, 6874 {TGSI_OPCODE_SCS, 0, ALU_OP0_NOP, tgsi_scs}, 6875 {TGSI_OPCODE_TXB, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, 6876 {TGSI_OPCODE_NRM, 0, ALU_OP0_NOP, tgsi_unsupported}, 6877 {TGSI_OPCODE_DIV, 0, ALU_OP0_NOP, tgsi_unsupported}, 6878 {TGSI_OPCODE_DP2, 0, ALU_OP2_DOT4, tgsi_dp}, 6879 {TGSI_OPCODE_TXL, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, 6880 {TGSI_OPCODE_BRK, 0, CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 6881 {TGSI_OPCODE_IF, 0, ALU_OP0_NOP, tgsi_if}, 6882 {TGSI_OPCODE_UIF, 0, ALU_OP0_NOP, tgsi_uif}, 6883 {76, 0, ALU_OP0_NOP, tgsi_unsupported}, 6884 {TGSI_OPCODE_ELSE, 0, ALU_OP0_NOP, tgsi_else}, 6885 {TGSI_OPCODE_ENDIF, 0, ALU_OP0_NOP, tgsi_endif}, 6886 /* gap */ 6887 {79, 0, ALU_OP0_NOP, tgsi_unsupported}, 6888 {80, 0, ALU_OP0_NOP, tgsi_unsupported}, 6889 {TGSI_OPCODE_PUSHA, 0, ALU_OP0_NOP, tgsi_unsupported}, 6890 {TGSI_OPCODE_POPA, 0, ALU_OP0_NOP, tgsi_unsupported}, 6891 {TGSI_OPCODE_CEIL, 0, ALU_OP1_CEIL, tgsi_op2}, 6892 {TGSI_OPCODE_I2F, 0, ALU_OP1_INT_TO_FLT, tgsi_op2}, 6893 {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2}, 6894 {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2}, 6895 {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2}, 6896 /* gap */ 6897 {88, 0, ALU_OP0_NOP, tgsi_unsupported}, 6898 {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2}, 6899 {TGSI_OPCODE_OR, 0, ALU_OP2_OR_INT, tgsi_op2}, 6900 {TGSI_OPCODE_MOD, 0, ALU_OP0_NOP, tgsi_imod}, 6901 {TGSI_OPCODE_XOR, 0, ALU_OP2_XOR_INT, tgsi_op2}, 6902 {TGSI_OPCODE_SAD, 0, ALU_OP0_NOP, tgsi_unsupported}, 6903 {TGSI_OPCODE_TXF, 0, FETCH_OP_LD, tgsi_tex}, 6904 {TGSI_OPCODE_TXQ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 6905 {TGSI_OPCODE_CONT, 0, CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 6906 {TGSI_OPCODE_EMIT, 0, CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 6907 {TGSI_OPCODE_ENDPRIM, 0, CF_OP_CUT_VERTEX, tgsi_gs_emit}, 6908 {TGSI_OPCODE_BGNLOOP, 0, ALU_OP0_NOP, tgsi_bgnloop}, 6909 {TGSI_OPCODE_BGNSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, 6910 {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, 6911 {TGSI_OPCODE_ENDSUB, 0, ALU_OP0_NOP, tgsi_unsupported}, 6912 {TGSI_OPCODE_TXQ_LZ, 0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 6913 /* gap */ 6914 {104, 0, ALU_OP0_NOP, tgsi_unsupported}, 6915 {105, 0, ALU_OP0_NOP, tgsi_unsupported}, 6916 {106, 0, ALU_OP0_NOP, tgsi_unsupported}, 6917 {TGSI_OPCODE_NOP, 0, ALU_OP0_NOP, tgsi_unsupported}, 6918 /* gap */ 6919 {TGSI_OPCODE_FSEQ, 0, ALU_OP2_SETE_DX10, tgsi_op2}, 6920 {TGSI_OPCODE_FSGE, 0, ALU_OP2_SETGE_DX10, tgsi_op2}, 6921 {TGSI_OPCODE_FSLT, 0, ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 6922 {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 6923 {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported}, 6924 {TGSI_OPCODE_CALLNZ, 0, ALU_OP0_NOP, tgsi_unsupported}, 6925 /* gap */ 6926 {114, 0, ALU_OP0_NOP, tgsi_unsupported}, 6927 {TGSI_OPCODE_BREAKC, 0, ALU_OP0_NOP, tgsi_unsupported}, 6928 {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 6929 {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 6930 /* gap */ 6931 {118, 0, ALU_OP0_NOP, tgsi_unsupported}, 6932 {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2}, 6933 {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv}, 6934 {TGSI_OPCODE_IMAX, 0, ALU_OP2_MAX_INT, tgsi_op2}, 6935 {TGSI_OPCODE_IMIN, 0, ALU_OP2_MIN_INT, tgsi_op2}, 6936 {TGSI_OPCODE_INEG, 0, ALU_OP2_SUB_INT, tgsi_ineg}, 6937 {TGSI_OPCODE_ISGE, 0, ALU_OP2_SETGE_INT, tgsi_op2}, 6938 {TGSI_OPCODE_ISHR, 0, ALU_OP2_ASHR_INT, tgsi_op2}, 6939 {TGSI_OPCODE_ISLT, 0, ALU_OP2_SETGT_INT, tgsi_op2_swap}, 6940 {TGSI_OPCODE_F2U, 0, ALU_OP1_FLT_TO_UINT, tgsi_op2}, 6941 {TGSI_OPCODE_U2F, 0, ALU_OP1_UINT_TO_FLT, tgsi_op2}, 6942 {TGSI_OPCODE_UADD, 0, ALU_OP2_ADD_INT, tgsi_op2}, 6943 {TGSI_OPCODE_UDIV, 0, ALU_OP0_NOP, tgsi_udiv}, 6944 {TGSI_OPCODE_UMAD, 0, ALU_OP0_NOP, tgsi_umad}, 6945 {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2}, 6946 {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2}, 6947 {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod}, 6948 {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_INT, cayman_mul_int_instr}, 6949 {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2}, 6950 {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2}, 6951 {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2}, 6952 {TGSI_OPCODE_USLT, 0, ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 6953 {TGSI_OPCODE_USNE, 0, ALU_OP2_SETNE_INT, tgsi_op2}, 6954 {TGSI_OPCODE_SWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, 6955 {TGSI_OPCODE_CASE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6956 {TGSI_OPCODE_DEFAULT, 0, ALU_OP0_NOP, tgsi_unsupported}, 6957 {TGSI_OPCODE_ENDSWITCH, 0, ALU_OP0_NOP, tgsi_unsupported}, 6958 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 6959 {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, 6960 {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, 6961 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 6962 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 6963 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 6964 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 6965 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 6966 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 6967 {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, 6968 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 6969 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 6970 {TGSI_OPCODE_UARL, 0, ALU_OP1_MOVA_INT, tgsi_eg_arl}, 6971 {TGSI_OPCODE_UCMP, 0, ALU_OP0_NOP, tgsi_ucmp}, 6972 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 6973 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 6974 {TGSI_OPCODE_LOAD, 0, ALU_OP0_NOP, tgsi_unsupported}, 6975 {TGSI_OPCODE_STORE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6976 {TGSI_OPCODE_MFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6977 {TGSI_OPCODE_LFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6978 {TGSI_OPCODE_SFENCE, 0, ALU_OP0_NOP, tgsi_unsupported}, 6979 {TGSI_OPCODE_BARRIER, 0, ALU_OP0_NOP, tgsi_unsupported}, 6980 {TGSI_OPCODE_ATOMUADD, 0, ALU_OP0_NOP, tgsi_unsupported}, 6981 {TGSI_OPCODE_ATOMXCHG, 0, ALU_OP0_NOP, tgsi_unsupported}, 6982 {TGSI_OPCODE_ATOMCAS, 0, ALU_OP0_NOP, tgsi_unsupported}, 6983 {TGSI_OPCODE_ATOMAND, 0, ALU_OP0_NOP, tgsi_unsupported}, 6984 {TGSI_OPCODE_ATOMOR, 0, ALU_OP0_NOP, tgsi_unsupported}, 6985 {TGSI_OPCODE_ATOMXOR, 0, ALU_OP0_NOP, tgsi_unsupported}, 6986 {TGSI_OPCODE_ATOMUMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, 6987 {TGSI_OPCODE_ATOMUMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, 6988 {TGSI_OPCODE_ATOMIMIN, 0, ALU_OP0_NOP, tgsi_unsupported}, 6989 {TGSI_OPCODE_ATOMIMAX, 0, ALU_OP0_NOP, tgsi_unsupported}, 6990 {TGSI_OPCODE_TEX2, 0, FETCH_OP_SAMPLE, tgsi_tex}, 6991 {TGSI_OPCODE_TXB2, 0, FETCH_OP_SAMPLE_LB, tgsi_tex}, 6992 {TGSI_OPCODE_TXL2, 0, FETCH_OP_SAMPLE_L, tgsi_tex}, 6993 {TGSI_OPCODE_LAST, 0, ALU_OP0_NOP, tgsi_unsupported}, 6994}; 6995