r600_shader.c revision 21c5607e64ca4ef68730d8e846d8e7744ecdd024
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_info.h" 25#include "tgsi/tgsi_parse.h" 26#include "tgsi/tgsi_scan.h" 27#include "tgsi/tgsi_dump.h" 28#include "util/u_format.h" 29#include "r600_pipe.h" 30#include "r600_asm.h" 31#include "r600_sq.h" 32#include "r600_formats.h" 33#include "r600_opcodes.h" 34#include "r600d.h" 35#include <stdio.h> 36#include <errno.h> 37#include <byteswap.h> 38 39/* CAYMAN notes 40Why CAYMAN got loops for lots of instructions is explained here. 41 42-These 8xx t-slot only ops are implemented in all vector slots. 43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 44These 8xx t-slot only opcodes become vector ops, with all four 45slots expecting the arguments on sources a and b. Result is 46broadcast to all channels. 47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT 48These 8xx t-slot only opcodes become vector ops in the z, y, and 49x slots. 50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 52SQRT_IEEE/_64 53SIN/COS 54The w slot may have an independent co-issued operation, or if the 55result is required to be in the w slot, the opcode above may be 56issued in the w slot as well. 57The compiler must issue the source argument to slots z, y, and x 58*/ 59 60 61int r600_find_vs_semantic_index(struct r600_shader *vs, 62 struct r600_shader *ps, int id) 63{ 64 struct r600_shader_io *input = &ps->input[id]; 65 66 for (int i = 0; i < vs->noutput; i++) { 67 if (input->name == vs->output[i].name && 68 input->sid == vs->output[i].sid) { 69 return i - 1; 70 } 71 } 72 return 0; 73} 74 75static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 76{ 77 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 78 struct r600_shader *rshader = &shader->shader; 79 uint32_t *ptr; 80 int i; 81 82 /* copy new shader */ 83 if (shader->bo == NULL) { 84 /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ 85 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE); 86 if (shader->bo == NULL) { 87 return -ENOMEM; 88 } 89 ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, rctx->ctx.cs, PIPE_TRANSFER_WRITE); 90 if (R600_BIG_ENDIAN) { 91 for (i = 0; i < rshader->bc.ndw; ++i) { 92 ptr[i] = bswap_32(rshader->bc.bytecode[i]); 93 } 94 } else { 95 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr)); 96 } 97 r600_bo_unmap(rctx->radeon, shader->bo); 98 } 99 /* build state */ 100 switch (rshader->processor_type) { 101 case TGSI_PROCESSOR_VERTEX: 102 if (rctx->chip_class >= EVERGREEN) { 103 evergreen_pipe_shader_vs(ctx, shader); 104 } else { 105 r600_pipe_shader_vs(ctx, shader); 106 } 107 break; 108 case TGSI_PROCESSOR_FRAGMENT: 109 if (rctx->chip_class >= EVERGREEN) { 110 evergreen_pipe_shader_ps(ctx, shader); 111 } else { 112 r600_pipe_shader_ps(ctx, shader); 113 } 114 break; 115 default: 116 return -EINVAL; 117 } 118 return 0; 119} 120 121static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader); 122 123int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader) 124{ 125 static int dump_shaders = -1; 126 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 127 int r; 128 129 /* Would like some magic "get_bool_option_once" routine. 130 */ 131 if (dump_shaders == -1) 132 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 133 134 if (dump_shaders) { 135 fprintf(stderr, "--------------------------------------------------------------\n"); 136 tgsi_dump(shader->tokens, 0); 137 } 138 r = r600_shader_from_tgsi(rctx, shader); 139 if (r) { 140 R600_ERR("translation from TGSI failed !\n"); 141 return r; 142 } 143 r = r600_bytecode_build(&shader->shader.bc); 144 if (r) { 145 R600_ERR("building bytecode failed !\n"); 146 return r; 147 } 148 if (dump_shaders) { 149 r600_bytecode_dump(&shader->shader.bc); 150 fprintf(stderr, "______________________________________________________________\n"); 151 } 152 return r600_pipe_shader(ctx, shader); 153} 154 155void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 156{ 157 r600_bo_reference(&shader->bo, NULL); 158 r600_bytecode_clear(&shader->shader.bc); 159 160 memset(&shader->shader,0,sizeof(struct r600_shader)); 161} 162 163/* 164 * tgsi -> r600 shader 165 */ 166struct r600_shader_tgsi_instruction; 167 168struct r600_shader_src { 169 unsigned sel; 170 unsigned swizzle[4]; 171 unsigned neg; 172 unsigned abs; 173 unsigned rel; 174 uint32_t value[4]; 175}; 176 177struct r600_shader_ctx { 178 struct tgsi_shader_info info; 179 struct tgsi_parse_context parse; 180 const struct tgsi_token *tokens; 181 unsigned type; 182 unsigned file_offset[TGSI_FILE_COUNT]; 183 unsigned temp_reg; 184 unsigned ar_reg; 185 struct r600_shader_tgsi_instruction *inst_info; 186 struct r600_bytecode *bc; 187 struct r600_shader *shader; 188 struct r600_shader_src src[4]; 189 u32 *literals; 190 u32 nliterals; 191 u32 max_driver_temp_used; 192 /* needed for evergreen interpolation */ 193 boolean input_centroid; 194 boolean input_linear; 195 boolean input_perspective; 196 int num_interp_gpr; 197}; 198 199struct r600_shader_tgsi_instruction { 200 unsigned tgsi_opcode; 201 unsigned is_op3; 202 unsigned r600_opcode; 203 int (*process)(struct r600_shader_ctx *ctx); 204}; 205 206static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 207static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 208 209static int tgsi_is_supported(struct r600_shader_ctx *ctx) 210{ 211 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 212 int j; 213 214 if (i->Instruction.NumDstRegs > 1) { 215 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 216 return -EINVAL; 217 } 218 if (i->Instruction.Predicate) { 219 R600_ERR("predicate unsupported\n"); 220 return -EINVAL; 221 } 222#if 0 223 if (i->Instruction.Label) { 224 R600_ERR("label unsupported\n"); 225 return -EINVAL; 226 } 227#endif 228 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 229 if (i->Src[j].Register.Dimension) { 230 R600_ERR("unsupported src %d (dimension %d)\n", j, 231 i->Src[j].Register.Dimension); 232 return -EINVAL; 233 } 234 } 235 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 236 if (i->Dst[j].Register.Dimension) { 237 R600_ERR("unsupported dst (dimension)\n"); 238 return -EINVAL; 239 } 240 } 241 return 0; 242} 243 244static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 245{ 246 int i, r; 247 struct r600_bytecode_alu alu; 248 int gpr = 0, base_chan = 0; 249 int ij_index = 0; 250 251 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 252 ij_index = 0; 253 if (ctx->shader->input[input].centroid) 254 ij_index++; 255 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 256 ij_index = 0; 257 /* if we have perspective add one */ 258 if (ctx->input_perspective) { 259 ij_index++; 260 /* if we have perspective centroid */ 261 if (ctx->input_centroid) 262 ij_index++; 263 } 264 if (ctx->shader->input[input].centroid) 265 ij_index++; 266 } 267 268 /* work out gpr and base_chan from index */ 269 gpr = ij_index / 2; 270 base_chan = (2 * (ij_index % 2)) + 1; 271 272 for (i = 0; i < 8; i++) { 273 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 274 275 if (i < 4) 276 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 277 else 278 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 279 280 if ((i > 1) && (i < 6)) { 281 alu.dst.sel = ctx->shader->input[input].gpr; 282 alu.dst.write = 1; 283 } 284 285 alu.dst.chan = i % 4; 286 287 alu.src[0].sel = gpr; 288 alu.src[0].chan = (base_chan - (i % 2)); 289 290 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 291 292 alu.bank_swizzle_force = SQ_ALU_VEC_210; 293 if ((i % 4) == 3) 294 alu.last = 1; 295 r = r600_bytecode_add_alu(ctx->bc, &alu); 296 if (r) 297 return r; 298 } 299 return 0; 300} 301 302static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 303{ 304 int i, r; 305 struct r600_bytecode_alu alu; 306 307 for (i = 0; i < 4; i++) { 308 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 309 310 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_LOAD_P0; 311 312 alu.dst.sel = ctx->shader->input[input].gpr; 313 alu.dst.write = 1; 314 315 alu.dst.chan = i; 316 317 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 318 alu.src[0].chan = i; 319 320 if (i == 3) 321 alu.last = 1; 322 r = r600_bytecode_add_alu(ctx->bc, &alu); 323 if (r) 324 return r; 325 } 326 return 0; 327} 328 329static int tgsi_declaration(struct r600_shader_ctx *ctx) 330{ 331 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 332 unsigned i; 333 int r; 334 335 switch (d->Declaration.File) { 336 case TGSI_FILE_INPUT: 337 i = ctx->shader->ninput++; 338 ctx->shader->input[i].name = d->Semantic.Name; 339 ctx->shader->input[i].sid = d->Semantic.Index; 340 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 341 ctx->shader->input[i].centroid = d->Declaration.Centroid; 342 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 343 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) { 344 /* turn input into interpolate on EG */ 345 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION && 346 ctx->shader->input[i].name != TGSI_SEMANTIC_FACE) { 347 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 348 if (ctx->shader->input[i].interpolate > 0) { 349 evergreen_interp_alu(ctx, i); 350 } else { 351 evergreen_interp_flat(ctx, i); 352 } 353 } 354 } 355 break; 356 case TGSI_FILE_OUTPUT: 357 i = ctx->shader->noutput++; 358 ctx->shader->output[i].name = d->Semantic.Name; 359 ctx->shader->output[i].sid = d->Semantic.Index; 360 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 361 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 362 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 363 /* these don't count as vertex param exports */ 364 if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) || 365 (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE)) 366 ctx->shader->npos++; 367 } 368 break; 369 case TGSI_FILE_CONSTANT: 370 case TGSI_FILE_TEMPORARY: 371 case TGSI_FILE_SAMPLER: 372 case TGSI_FILE_ADDRESS: 373 break; 374 375 case TGSI_FILE_SYSTEM_VALUE: 376 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 377 struct r600_bytecode_alu alu; 378 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 379 380 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 381 alu.src[0].sel = 0; 382 alu.src[0].chan = 3; 383 384 alu.dst.sel = 0; 385 alu.dst.chan = 3; 386 alu.dst.write = 1; 387 alu.last = 1; 388 389 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 390 return r; 391 break; 392 } 393 394 default: 395 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 396 return -EINVAL; 397 } 398 return 0; 399} 400 401static int r600_get_temp(struct r600_shader_ctx *ctx) 402{ 403 return ctx->temp_reg + ctx->max_driver_temp_used++; 404} 405 406/* 407 * for evergreen we need to scan the shader to find the number of GPRs we need to 408 * reserve for interpolation. 409 * 410 * we need to know if we are going to emit 411 * any centroid inputs 412 * if perspective and linear are required 413*/ 414static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 415{ 416 int i; 417 int num_baryc; 418 419 ctx->input_linear = FALSE; 420 ctx->input_perspective = FALSE; 421 ctx->input_centroid = FALSE; 422 ctx->num_interp_gpr = 1; 423 424 /* any centroid inputs */ 425 for (i = 0; i < ctx->info.num_inputs; i++) { 426 /* skip position/face */ 427 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 428 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 429 continue; 430 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 431 ctx->input_linear = TRUE; 432 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 433 ctx->input_perspective = TRUE; 434 if (ctx->info.input_centroid[i]) 435 ctx->input_centroid = TRUE; 436 } 437 438 num_baryc = 0; 439 /* ignoring sample for now */ 440 if (ctx->input_perspective) 441 num_baryc++; 442 if (ctx->input_linear) 443 num_baryc++; 444 if (ctx->input_centroid) 445 num_baryc *= 2; 446 447 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 448 449 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 450 return ctx->num_interp_gpr; 451} 452 453static void tgsi_src(struct r600_shader_ctx *ctx, 454 const struct tgsi_full_src_register *tgsi_src, 455 struct r600_shader_src *r600_src) 456{ 457 memset(r600_src, 0, sizeof(*r600_src)); 458 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 459 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 460 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 461 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 462 r600_src->neg = tgsi_src->Register.Negate; 463 r600_src->abs = tgsi_src->Register.Absolute; 464 465 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 466 int index; 467 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 468 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 469 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 470 471 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 472 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 473 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 474 return; 475 } 476 index = tgsi_src->Register.Index; 477 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 478 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 479 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 480 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ 481 r600_src->swizzle[0] = 3; 482 r600_src->swizzle[1] = 3; 483 r600_src->swizzle[2] = 3; 484 r600_src->swizzle[3] = 3; 485 r600_src->sel = 0; 486 } else { 487 if (tgsi_src->Register.Indirect) 488 r600_src->rel = V_SQ_REL_RELATIVE; 489 r600_src->sel = tgsi_src->Register.Index; 490 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 491 } 492} 493 494static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 495{ 496 struct r600_bytecode_vtx vtx; 497 unsigned int ar_reg; 498 int r; 499 500 if (offset) { 501 struct r600_bytecode_alu alu; 502 503 memset(&alu, 0, sizeof(alu)); 504 505 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 506 alu.src[0].sel = ctx->ar_reg; 507 508 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 509 alu.src[1].value = offset; 510 511 alu.dst.sel = dst_reg; 512 alu.dst.write = 1; 513 alu.last = 1; 514 515 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 516 return r; 517 518 ar_reg = dst_reg; 519 } else { 520 ar_reg = ctx->ar_reg; 521 } 522 523 memset(&vtx, 0, sizeof(vtx)); 524 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 525 vtx.src_gpr = ar_reg; 526 vtx.mega_fetch_count = 16; 527 vtx.dst_gpr = dst_reg; 528 vtx.dst_sel_x = 0; /* SEL_X */ 529 vtx.dst_sel_y = 1; /* SEL_Y */ 530 vtx.dst_sel_z = 2; /* SEL_Z */ 531 vtx.dst_sel_w = 3; /* SEL_W */ 532 vtx.data_format = FMT_32_32_32_32_FLOAT; 533 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 534 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 535 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 536 vtx.endian = r600_endian_swap(32); 537 538 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 539 return r; 540 541 return 0; 542} 543 544static int tgsi_split_constant(struct r600_shader_ctx *ctx) 545{ 546 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 547 struct r600_bytecode_alu alu; 548 int i, j, k, nconst, r; 549 550 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 551 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 552 nconst++; 553 } 554 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 555 } 556 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 557 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 558 continue; 559 } 560 561 if (ctx->src[i].rel) { 562 int treg = r600_get_temp(ctx); 563 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 564 return r; 565 566 ctx->src[i].sel = treg; 567 ctx->src[i].rel = 0; 568 j--; 569 } else if (j > 0) { 570 int treg = r600_get_temp(ctx); 571 for (k = 0; k < 4; k++) { 572 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 573 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 574 alu.src[0].sel = ctx->src[i].sel; 575 alu.src[0].chan = k; 576 alu.src[0].rel = ctx->src[i].rel; 577 alu.dst.sel = treg; 578 alu.dst.chan = k; 579 alu.dst.write = 1; 580 if (k == 3) 581 alu.last = 1; 582 r = r600_bytecode_add_alu(ctx->bc, &alu); 583 if (r) 584 return r; 585 } 586 ctx->src[i].sel = treg; 587 ctx->src[i].rel =0; 588 j--; 589 } 590 } 591 return 0; 592} 593 594/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 595static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 596{ 597 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 598 struct r600_bytecode_alu alu; 599 int i, j, k, nliteral, r; 600 601 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 602 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 603 nliteral++; 604 } 605 } 606 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 607 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 608 int treg = r600_get_temp(ctx); 609 for (k = 0; k < 4; k++) { 610 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 611 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 612 alu.src[0].sel = ctx->src[i].sel; 613 alu.src[0].chan = k; 614 alu.src[0].value = ctx->src[i].value[k]; 615 alu.dst.sel = treg; 616 alu.dst.chan = k; 617 alu.dst.write = 1; 618 if (k == 3) 619 alu.last = 1; 620 r = r600_bytecode_add_alu(ctx->bc, &alu); 621 if (r) 622 return r; 623 } 624 ctx->src[i].sel = treg; 625 j--; 626 } 627 } 628 return 0; 629} 630 631static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader) 632{ 633 struct r600_shader *shader = &pipeshader->shader; 634 struct tgsi_token *tokens = pipeshader->tokens; 635 struct tgsi_full_immediate *immediate; 636 struct tgsi_full_property *property; 637 struct r600_shader_ctx ctx; 638 struct r600_bytecode_output output[32]; 639 unsigned output_done, noutput; 640 unsigned opcode; 641 int i, j, r = 0, pos0; 642 643 ctx.bc = &shader->bc; 644 ctx.shader = shader; 645 r600_bytecode_init(ctx.bc, rctx->chip_class); 646 ctx.tokens = tokens; 647 tgsi_scan_shader(tokens, &ctx.info); 648 tgsi_parse_init(&ctx.parse, tokens); 649 ctx.type = ctx.parse.FullHeader.Processor.Processor; 650 shader->processor_type = ctx.type; 651 ctx.bc->type = shader->processor_type; 652 653 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) || 654 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color)); 655 656 shader->nr_cbufs = rctx->nr_cbufs; 657 658 /* register allocations */ 659 /* Values [0,127] correspond to GPR[0..127]. 660 * Values [128,159] correspond to constant buffer bank 0 661 * Values [160,191] correspond to constant buffer bank 1 662 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 663 * Values [256,287] correspond to constant buffer bank 2 (EG) 664 * Values [288,319] correspond to constant buffer bank 3 (EG) 665 * Other special values are shown in the list below. 666 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 667 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 668 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 669 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 670 * 248 SQ_ALU_SRC_0: special constant 0.0. 671 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 672 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 673 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 674 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 675 * 253 SQ_ALU_SRC_LITERAL: literal constant. 676 * 254 SQ_ALU_SRC_PV: previous vector result. 677 * 255 SQ_ALU_SRC_PS: previous scalar result. 678 */ 679 for (i = 0; i < TGSI_FILE_COUNT; i++) { 680 ctx.file_offset[i] = 0; 681 } 682 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 683 ctx.file_offset[TGSI_FILE_INPUT] = 1; 684 if (ctx.bc->chip_class >= EVERGREEN) { 685 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 686 } else { 687 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 688 } 689 } 690 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { 691 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 692 } 693 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 694 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 695 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 696 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 697 698 /* Outside the GPR range. This will be translated to one of the 699 * kcache banks later. */ 700 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 701 702 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 703 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 704 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 705 ctx.temp_reg = ctx.ar_reg + 1; 706 707 ctx.nliterals = 0; 708 ctx.literals = NULL; 709 shader->fs_write_all = FALSE; 710 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 711 tgsi_parse_token(&ctx.parse); 712 switch (ctx.parse.FullToken.Token.Type) { 713 case TGSI_TOKEN_TYPE_IMMEDIATE: 714 immediate = &ctx.parse.FullToken.FullImmediate; 715 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 716 if(ctx.literals == NULL) { 717 r = -ENOMEM; 718 goto out_err; 719 } 720 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 721 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 722 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 723 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 724 ctx.nliterals++; 725 break; 726 case TGSI_TOKEN_TYPE_DECLARATION: 727 r = tgsi_declaration(&ctx); 728 if (r) 729 goto out_err; 730 break; 731 case TGSI_TOKEN_TYPE_INSTRUCTION: 732 r = tgsi_is_supported(&ctx); 733 if (r) 734 goto out_err; 735 ctx.max_driver_temp_used = 0; 736 /* reserve first tmp for everyone */ 737 r600_get_temp(&ctx); 738 739 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 740 if ((r = tgsi_split_constant(&ctx))) 741 goto out_err; 742 if ((r = tgsi_split_literal_constant(&ctx))) 743 goto out_err; 744 if (ctx.bc->chip_class == CAYMAN) 745 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 746 else if (ctx.bc->chip_class >= EVERGREEN) 747 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 748 else 749 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 750 r = ctx.inst_info->process(&ctx); 751 if (r) 752 goto out_err; 753 break; 754 case TGSI_TOKEN_TYPE_PROPERTY: 755 property = &ctx.parse.FullToken.FullProperty; 756 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { 757 if (property->u[0].Data == 1) 758 shader->fs_write_all = TRUE; 759 } 760 break; 761 default: 762 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 763 r = -EINVAL; 764 goto out_err; 765 } 766 } 767 768 noutput = shader->noutput; 769 770 /* clamp color outputs */ 771 if (shader->clamp_color) { 772 for (i = 0; i < noutput; i++) { 773 if (shader->output[i].name == TGSI_SEMANTIC_COLOR || 774 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) { 775 776 int j; 777 for (j = 0; j < 4; j++) { 778 struct r600_bytecode_alu alu; 779 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 780 781 /* MOV_SAT R, R */ 782 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 783 alu.dst.sel = shader->output[i].gpr; 784 alu.dst.chan = j; 785 alu.dst.write = 1; 786 alu.dst.clamp = 1; 787 alu.src[0].sel = alu.dst.sel; 788 alu.src[0].chan = j; 789 790 if (j == 3) { 791 alu.last = 1; 792 } 793 r = r600_bytecode_add_alu(ctx.bc, &alu); 794 if (r) 795 return r; 796 } 797 } 798 } 799 } 800 801 /* export output */ 802 j = 0; 803 for (i = 0, pos0 = 0; i < noutput; i++) { 804 memset(&output[i], 0, sizeof(struct r600_bytecode_output)); 805 output[i + j].gpr = shader->output[i].gpr; 806 output[i + j].elem_size = 3; 807 output[i + j].swizzle_x = 0; 808 output[i + j].swizzle_y = 1; 809 output[i + j].swizzle_z = 2; 810 output[i + j].swizzle_w = 3; 811 output[i + j].burst_count = 1; 812 output[i + j].barrier = 1; 813 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 814 output[i + j].array_base = i - pos0; 815 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 816 switch (ctx.type) { 817 case TGSI_PROCESSOR_VERTEX: 818 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 819 output[i + j].array_base = 60; 820 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 821 /* position doesn't count in array_base */ 822 pos0++; 823 } 824 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 825 output[i + j].array_base = 61; 826 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 827 /* position doesn't count in array_base */ 828 pos0++; 829 } 830 break; 831 case TGSI_PROCESSOR_FRAGMENT: 832 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 833 output[i + j].array_base = shader->output[i].sid; 834 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 835 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { 836 for (j = 1; j < shader->nr_cbufs; j++) { 837 memset(&output[i + j], 0, sizeof(struct r600_bytecode_output)); 838 output[i + j].gpr = shader->output[i].gpr; 839 output[i + j].elem_size = 3; 840 output[i + j].swizzle_x = 0; 841 output[i + j].swizzle_y = 1; 842 output[i + j].swizzle_z = 2; 843 output[i + j].swizzle_w = 3; 844 output[i + j].burst_count = 1; 845 output[i + j].barrier = 1; 846 output[i + j].array_base = shader->output[i].sid + j; 847 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 848 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 849 } 850 j--; 851 } 852 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 853 output[i + j].array_base = 61; 854 output[i + j].swizzle_x = 2; 855 output[i + j].swizzle_y = 7; 856 output[i + j].swizzle_z = output[i + j].swizzle_w = 7; 857 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 858 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 859 output[i + j].array_base = 61; 860 output[i + j].swizzle_x = 7; 861 output[i + j].swizzle_y = 1; 862 output[i + j].swizzle_z = output[i + j].swizzle_w = 7; 863 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 864 } else { 865 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 866 r = -EINVAL; 867 goto out_err; 868 } 869 break; 870 default: 871 R600_ERR("unsupported processor type %d\n", ctx.type); 872 r = -EINVAL; 873 goto out_err; 874 } 875 } 876 noutput += j; 877 /* add fake param output for vertex shader if no param is exported */ 878 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 879 for (i = 0, pos0 = 0; i < noutput; i++) { 880 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 881 pos0 = 1; 882 break; 883 } 884 } 885 if (!pos0) { 886 memset(&output[i], 0, sizeof(struct r600_bytecode_output)); 887 output[i].gpr = 0; 888 output[i].elem_size = 3; 889 output[i].swizzle_x = 0; 890 output[i].swizzle_y = 1; 891 output[i].swizzle_z = 2; 892 output[i].swizzle_w = 3; 893 output[i].burst_count = 1; 894 output[i].barrier = 1; 895 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 896 output[i].array_base = 0; 897 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 898 noutput++; 899 } 900 } 901 /* add fake pixel export */ 902 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 903 memset(&output[0], 0, sizeof(struct r600_bytecode_output)); 904 output[0].gpr = 0; 905 output[0].elem_size = 3; 906 output[0].swizzle_x = 7; 907 output[0].swizzle_y = 7; 908 output[0].swizzle_z = 7; 909 output[0].swizzle_w = 7; 910 output[0].burst_count = 1; 911 output[0].barrier = 1; 912 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 913 output[0].array_base = 0; 914 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 915 noutput++; 916 } 917 /* set export done on last export of each type */ 918 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 919 if (ctx.bc->chip_class < CAYMAN) { 920 if (i == (noutput - 1)) { 921 output[i].end_of_program = 1; 922 } 923 } 924 if (!(output_done & (1 << output[i].type))) { 925 output_done |= (1 << output[i].type); 926 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 927 } 928 } 929 /* add output to bytecode */ 930 for (i = 0; i < noutput; i++) { 931 r = r600_bytecode_add_output(ctx.bc, &output[i]); 932 if (r) 933 goto out_err; 934 } 935 /* add program end */ 936 if (ctx.bc->chip_class == CAYMAN) 937 cm_bytecode_add_cf_end(ctx.bc); 938 939 free(ctx.literals); 940 tgsi_parse_free(&ctx.parse); 941 return 0; 942out_err: 943 free(ctx.literals); 944 tgsi_parse_free(&ctx.parse); 945 return r; 946} 947 948static int tgsi_unsupported(struct r600_shader_ctx *ctx) 949{ 950 R600_ERR("%s tgsi opcode unsupported\n", 951 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); 952 return -EINVAL; 953} 954 955static int tgsi_end(struct r600_shader_ctx *ctx) 956{ 957 return 0; 958} 959 960static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 961 const struct r600_shader_src *shader_src, 962 unsigned chan) 963{ 964 bc_src->sel = shader_src->sel; 965 bc_src->chan = shader_src->swizzle[chan]; 966 bc_src->neg = shader_src->neg; 967 bc_src->abs = shader_src->abs; 968 bc_src->rel = shader_src->rel; 969 bc_src->value = shader_src->value[bc_src->chan]; 970} 971 972static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 973{ 974 bc_src->abs = 1; 975 bc_src->neg = 0; 976} 977 978static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 979{ 980 bc_src->neg = !bc_src->neg; 981} 982 983static void tgsi_dst(struct r600_shader_ctx *ctx, 984 const struct tgsi_full_dst_register *tgsi_dst, 985 unsigned swizzle, 986 struct r600_bytecode_alu_dst *r600_dst) 987{ 988 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 989 990 r600_dst->sel = tgsi_dst->Register.Index; 991 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 992 r600_dst->chan = swizzle; 993 r600_dst->write = 1; 994 if (tgsi_dst->Register.Indirect) 995 r600_dst->rel = V_SQ_REL_RELATIVE; 996 if (inst->Instruction.Saturate) { 997 r600_dst->clamp = 1; 998 } 999} 1000 1001static int tgsi_last_instruction(unsigned writemask) 1002{ 1003 int i, lasti = 0; 1004 1005 for (i = 0; i < 4; i++) { 1006 if (writemask & (1 << i)) { 1007 lasti = i; 1008 } 1009 } 1010 return lasti; 1011} 1012 1013static int tgsi_int_to_flt(struct r600_shader_ctx *ctx) 1014{ 1015 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1016 struct r600_bytecode_alu alu; 1017 int i, j, r; 1018 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1019 1020 for (i = 0; i < lasti + 1; i++) { 1021 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1022 continue; 1023 1024 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1025 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1026 1027 alu.inst = ctx->inst_info->r600_opcode; 1028 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1029 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1030 } 1031 alu.last = 1; 1032 r = r600_bytecode_add_alu(ctx->bc, &alu); 1033 if (r) 1034 return r; 1035 } 1036 return 0; 1037} 1038 1039static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 1040{ 1041 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1042 struct r600_bytecode_alu alu; 1043 int i, j, r; 1044 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1045 1046 for (i = 0; i < lasti + 1; i++) { 1047 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1048 continue; 1049 1050 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1051 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1052 1053 alu.inst = ctx->inst_info->r600_opcode; 1054 if (!swap) { 1055 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1056 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1057 } 1058 } else { 1059 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 1060 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1061 } 1062 /* handle some special cases */ 1063 switch (ctx->inst_info->tgsi_opcode) { 1064 case TGSI_OPCODE_SUB: 1065 r600_bytecode_src_toggle_neg(&alu.src[1]); 1066 break; 1067 case TGSI_OPCODE_ABS: 1068 r600_bytecode_src_set_abs(&alu.src[0]); 1069 break; 1070 default: 1071 break; 1072 } 1073 if (i == lasti) { 1074 alu.last = 1; 1075 } 1076 r = r600_bytecode_add_alu(ctx->bc, &alu); 1077 if (r) 1078 return r; 1079 } 1080 return 0; 1081} 1082 1083static int tgsi_op2(struct r600_shader_ctx *ctx) 1084{ 1085 return tgsi_op2_s(ctx, 0); 1086} 1087 1088static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1089{ 1090 return tgsi_op2_s(ctx, 1); 1091} 1092 1093static int tgsi_ineg(struct r600_shader_ctx *ctx) 1094{ 1095 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1096 struct r600_bytecode_alu alu; 1097 int i, r; 1098 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1099 1100 for (i = 0; i < lasti + 1; i++) { 1101 1102 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1103 continue; 1104 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1105 alu.inst = ctx->inst_info->r600_opcode; 1106 1107 alu.src[0].sel = V_SQ_ALU_SRC_0; 1108 1109 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1110 1111 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1112 1113 if (i == lasti) { 1114 alu.last = 1; 1115 } 1116 r = r600_bytecode_add_alu(ctx->bc, &alu); 1117 if (r) 1118 return r; 1119 } 1120 return 0; 1121 1122} 1123 1124static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 1125{ 1126 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1127 int i, j, r; 1128 struct r600_bytecode_alu alu; 1129 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1130 1131 for (i = 0 ; i < last_slot; i++) { 1132 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1133 alu.inst = ctx->inst_info->r600_opcode; 1134 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1135 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 1136 } 1137 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1138 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1139 1140 if (i == last_slot - 1) 1141 alu.last = 1; 1142 r = r600_bytecode_add_alu(ctx->bc, &alu); 1143 if (r) 1144 return r; 1145 } 1146 return 0; 1147} 1148 1149/* 1150 * r600 - trunc to -PI..PI range 1151 * r700 - normalize by dividing by 2PI 1152 * see fdo bug 27901 1153 */ 1154static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 1155{ 1156 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 1157 static float double_pi = 3.1415926535 * 2; 1158 static float neg_pi = -3.1415926535; 1159 1160 int r; 1161 struct r600_bytecode_alu alu; 1162 1163 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1164 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1165 alu.is_op3 = 1; 1166 1167 alu.dst.chan = 0; 1168 alu.dst.sel = ctx->temp_reg; 1169 alu.dst.write = 1; 1170 1171 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1172 1173 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1174 alu.src[1].chan = 0; 1175 alu.src[1].value = *(uint32_t *)&half_inv_pi; 1176 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1177 alu.src[2].chan = 0; 1178 alu.last = 1; 1179 r = r600_bytecode_add_alu(ctx->bc, &alu); 1180 if (r) 1181 return r; 1182 1183 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1184 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1185 1186 alu.dst.chan = 0; 1187 alu.dst.sel = ctx->temp_reg; 1188 alu.dst.write = 1; 1189 1190 alu.src[0].sel = ctx->temp_reg; 1191 alu.src[0].chan = 0; 1192 alu.last = 1; 1193 r = r600_bytecode_add_alu(ctx->bc, &alu); 1194 if (r) 1195 return r; 1196 1197 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1198 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1199 alu.is_op3 = 1; 1200 1201 alu.dst.chan = 0; 1202 alu.dst.sel = ctx->temp_reg; 1203 alu.dst.write = 1; 1204 1205 alu.src[0].sel = ctx->temp_reg; 1206 alu.src[0].chan = 0; 1207 1208 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1209 alu.src[1].chan = 0; 1210 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1211 alu.src[2].chan = 0; 1212 1213 if (ctx->bc->chip_class == R600) { 1214 alu.src[1].value = *(uint32_t *)&double_pi; 1215 alu.src[2].value = *(uint32_t *)&neg_pi; 1216 } else { 1217 alu.src[1].sel = V_SQ_ALU_SRC_1; 1218 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1219 alu.src[2].neg = 1; 1220 } 1221 1222 alu.last = 1; 1223 r = r600_bytecode_add_alu(ctx->bc, &alu); 1224 if (r) 1225 return r; 1226 return 0; 1227} 1228 1229static int cayman_trig(struct r600_shader_ctx *ctx) 1230{ 1231 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1232 struct r600_bytecode_alu alu; 1233 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1234 int i, r; 1235 1236 r = tgsi_setup_trig(ctx); 1237 if (r) 1238 return r; 1239 1240 1241 for (i = 0; i < last_slot; i++) { 1242 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1243 alu.inst = ctx->inst_info->r600_opcode; 1244 alu.dst.chan = i; 1245 1246 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1247 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1248 1249 alu.src[0].sel = ctx->temp_reg; 1250 alu.src[0].chan = 0; 1251 if (i == last_slot - 1) 1252 alu.last = 1; 1253 r = r600_bytecode_add_alu(ctx->bc, &alu); 1254 if (r) 1255 return r; 1256 } 1257 return 0; 1258} 1259 1260static int tgsi_trig(struct r600_shader_ctx *ctx) 1261{ 1262 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1263 struct r600_bytecode_alu alu; 1264 int i, r; 1265 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1266 1267 r = tgsi_setup_trig(ctx); 1268 if (r) 1269 return r; 1270 1271 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1272 alu.inst = ctx->inst_info->r600_opcode; 1273 alu.dst.chan = 0; 1274 alu.dst.sel = ctx->temp_reg; 1275 alu.dst.write = 1; 1276 1277 alu.src[0].sel = ctx->temp_reg; 1278 alu.src[0].chan = 0; 1279 alu.last = 1; 1280 r = r600_bytecode_add_alu(ctx->bc, &alu); 1281 if (r) 1282 return r; 1283 1284 /* replicate result */ 1285 for (i = 0; i < lasti + 1; i++) { 1286 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1287 continue; 1288 1289 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1290 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1291 1292 alu.src[0].sel = ctx->temp_reg; 1293 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1294 if (i == lasti) 1295 alu.last = 1; 1296 r = r600_bytecode_add_alu(ctx->bc, &alu); 1297 if (r) 1298 return r; 1299 } 1300 return 0; 1301} 1302 1303static int tgsi_scs(struct r600_shader_ctx *ctx) 1304{ 1305 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1306 struct r600_bytecode_alu alu; 1307 int i, r; 1308 1309 /* We'll only need the trig stuff if we are going to write to the 1310 * X or Y components of the destination vector. 1311 */ 1312 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1313 r = tgsi_setup_trig(ctx); 1314 if (r) 1315 return r; 1316 } 1317 1318 /* dst.x = COS */ 1319 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1320 if (ctx->bc->chip_class == CAYMAN) { 1321 for (i = 0 ; i < 3; i++) { 1322 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1323 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1324 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1325 1326 if (i == 0) 1327 alu.dst.write = 1; 1328 else 1329 alu.dst.write = 0; 1330 alu.src[0].sel = ctx->temp_reg; 1331 alu.src[0].chan = 0; 1332 if (i == 2) 1333 alu.last = 1; 1334 r = r600_bytecode_add_alu(ctx->bc, &alu); 1335 if (r) 1336 return r; 1337 } 1338 } else { 1339 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1340 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1341 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1342 1343 alu.src[0].sel = ctx->temp_reg; 1344 alu.src[0].chan = 0; 1345 alu.last = 1; 1346 r = r600_bytecode_add_alu(ctx->bc, &alu); 1347 if (r) 1348 return r; 1349 } 1350 } 1351 1352 /* dst.y = SIN */ 1353 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1354 if (ctx->bc->chip_class == CAYMAN) { 1355 for (i = 0 ; i < 3; i++) { 1356 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1357 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1358 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1359 if (i == 1) 1360 alu.dst.write = 1; 1361 else 1362 alu.dst.write = 0; 1363 alu.src[0].sel = ctx->temp_reg; 1364 alu.src[0].chan = 0; 1365 if (i == 2) 1366 alu.last = 1; 1367 r = r600_bytecode_add_alu(ctx->bc, &alu); 1368 if (r) 1369 return r; 1370 } 1371 } else { 1372 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1373 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1374 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1375 1376 alu.src[0].sel = ctx->temp_reg; 1377 alu.src[0].chan = 0; 1378 alu.last = 1; 1379 r = r600_bytecode_add_alu(ctx->bc, &alu); 1380 if (r) 1381 return r; 1382 } 1383 } 1384 1385 /* dst.z = 0.0; */ 1386 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1387 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1388 1389 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1390 1391 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1392 1393 alu.src[0].sel = V_SQ_ALU_SRC_0; 1394 alu.src[0].chan = 0; 1395 1396 alu.last = 1; 1397 1398 r = r600_bytecode_add_alu(ctx->bc, &alu); 1399 if (r) 1400 return r; 1401 } 1402 1403 /* dst.w = 1.0; */ 1404 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1405 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1406 1407 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1408 1409 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1410 1411 alu.src[0].sel = V_SQ_ALU_SRC_1; 1412 alu.src[0].chan = 0; 1413 1414 alu.last = 1; 1415 1416 r = r600_bytecode_add_alu(ctx->bc, &alu); 1417 if (r) 1418 return r; 1419 } 1420 1421 return 0; 1422} 1423 1424static int tgsi_kill(struct r600_shader_ctx *ctx) 1425{ 1426 struct r600_bytecode_alu alu; 1427 int i, r; 1428 1429 for (i = 0; i < 4; i++) { 1430 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1431 alu.inst = ctx->inst_info->r600_opcode; 1432 1433 alu.dst.chan = i; 1434 1435 alu.src[0].sel = V_SQ_ALU_SRC_0; 1436 1437 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1438 alu.src[1].sel = V_SQ_ALU_SRC_1; 1439 alu.src[1].neg = 1; 1440 } else { 1441 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1442 } 1443 if (i == 3) { 1444 alu.last = 1; 1445 } 1446 r = r600_bytecode_add_alu(ctx->bc, &alu); 1447 if (r) 1448 return r; 1449 } 1450 1451 /* kill must be last in ALU */ 1452 ctx->bc->force_add_cf = 1; 1453 ctx->shader->uses_kill = TRUE; 1454 return 0; 1455} 1456 1457static int tgsi_lit(struct r600_shader_ctx *ctx) 1458{ 1459 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1460 struct r600_bytecode_alu alu; 1461 int r; 1462 1463 /* tmp.x = max(src.y, 0.0) */ 1464 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1465 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1466 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 1467 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1468 alu.src[1].chan = 1; 1469 1470 alu.dst.sel = ctx->temp_reg; 1471 alu.dst.chan = 0; 1472 alu.dst.write = 1; 1473 1474 alu.last = 1; 1475 r = r600_bytecode_add_alu(ctx->bc, &alu); 1476 if (r) 1477 return r; 1478 1479 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1480 { 1481 int chan; 1482 int sel; 1483 int i; 1484 1485 if (ctx->bc->chip_class == CAYMAN) { 1486 for (i = 0; i < 3; i++) { 1487 /* tmp.z = log(tmp.x) */ 1488 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1489 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1490 alu.src[0].sel = ctx->temp_reg; 1491 alu.src[0].chan = 0; 1492 alu.dst.sel = ctx->temp_reg; 1493 alu.dst.chan = i; 1494 if (i == 2) { 1495 alu.dst.write = 1; 1496 alu.last = 1; 1497 } else 1498 alu.dst.write = 0; 1499 1500 r = r600_bytecode_add_alu(ctx->bc, &alu); 1501 if (r) 1502 return r; 1503 } 1504 } else { 1505 /* tmp.z = log(tmp.x) */ 1506 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1507 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1508 alu.src[0].sel = ctx->temp_reg; 1509 alu.src[0].chan = 0; 1510 alu.dst.sel = ctx->temp_reg; 1511 alu.dst.chan = 2; 1512 alu.dst.write = 1; 1513 alu.last = 1; 1514 r = r600_bytecode_add_alu(ctx->bc, &alu); 1515 if (r) 1516 return r; 1517 } 1518 1519 chan = alu.dst.chan; 1520 sel = alu.dst.sel; 1521 1522 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 1523 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1524 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1525 alu.src[0].sel = sel; 1526 alu.src[0].chan = chan; 1527 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 1528 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 1529 alu.dst.sel = ctx->temp_reg; 1530 alu.dst.chan = 0; 1531 alu.dst.write = 1; 1532 alu.is_op3 = 1; 1533 alu.last = 1; 1534 r = r600_bytecode_add_alu(ctx->bc, &alu); 1535 if (r) 1536 return r; 1537 1538 if (ctx->bc->chip_class == CAYMAN) { 1539 for (i = 0; i < 3; i++) { 1540 /* dst.z = exp(tmp.x) */ 1541 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1542 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1543 alu.src[0].sel = ctx->temp_reg; 1544 alu.src[0].chan = 0; 1545 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1546 if (i == 2) { 1547 alu.dst.write = 1; 1548 alu.last = 1; 1549 } else 1550 alu.dst.write = 0; 1551 r = r600_bytecode_add_alu(ctx->bc, &alu); 1552 if (r) 1553 return r; 1554 } 1555 } else { 1556 /* dst.z = exp(tmp.x) */ 1557 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1558 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1559 alu.src[0].sel = ctx->temp_reg; 1560 alu.src[0].chan = 0; 1561 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1562 alu.last = 1; 1563 r = r600_bytecode_add_alu(ctx->bc, &alu); 1564 if (r) 1565 return r; 1566 } 1567 } 1568 1569 /* dst.x, <- 1.0 */ 1570 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1571 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1572 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1573 alu.src[0].chan = 0; 1574 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1575 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1576 r = r600_bytecode_add_alu(ctx->bc, &alu); 1577 if (r) 1578 return r; 1579 1580 /* dst.y = max(src.x, 0.0) */ 1581 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1582 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1583 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1584 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1585 alu.src[1].chan = 0; 1586 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1587 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1588 r = r600_bytecode_add_alu(ctx->bc, &alu); 1589 if (r) 1590 return r; 1591 1592 /* dst.w, <- 1.0 */ 1593 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1594 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1595 alu.src[0].sel = V_SQ_ALU_SRC_1; 1596 alu.src[0].chan = 0; 1597 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1598 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1599 alu.last = 1; 1600 r = r600_bytecode_add_alu(ctx->bc, &alu); 1601 if (r) 1602 return r; 1603 1604 return 0; 1605} 1606 1607static int tgsi_rsq(struct r600_shader_ctx *ctx) 1608{ 1609 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1610 struct r600_bytecode_alu alu; 1611 int i, r; 1612 1613 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1614 1615 /* FIXME: 1616 * For state trackers other than OpenGL, we'll want to use 1617 * _RECIPSQRT_IEEE instead. 1618 */ 1619 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1620 1621 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1622 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 1623 r600_bytecode_src_set_abs(&alu.src[i]); 1624 } 1625 alu.dst.sel = ctx->temp_reg; 1626 alu.dst.write = 1; 1627 alu.last = 1; 1628 r = r600_bytecode_add_alu(ctx->bc, &alu); 1629 if (r) 1630 return r; 1631 /* replicate result */ 1632 return tgsi_helper_tempx_replicate(ctx); 1633} 1634 1635static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1636{ 1637 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1638 struct r600_bytecode_alu alu; 1639 int i, r; 1640 1641 for (i = 0; i < 4; i++) { 1642 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1643 alu.src[0].sel = ctx->temp_reg; 1644 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1645 alu.dst.chan = i; 1646 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1647 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1648 if (i == 3) 1649 alu.last = 1; 1650 r = r600_bytecode_add_alu(ctx->bc, &alu); 1651 if (r) 1652 return r; 1653 } 1654 return 0; 1655} 1656 1657static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1658{ 1659 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1660 struct r600_bytecode_alu alu; 1661 int i, r; 1662 1663 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1664 alu.inst = ctx->inst_info->r600_opcode; 1665 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1666 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 1667 } 1668 alu.dst.sel = ctx->temp_reg; 1669 alu.dst.write = 1; 1670 alu.last = 1; 1671 r = r600_bytecode_add_alu(ctx->bc, &alu); 1672 if (r) 1673 return r; 1674 /* replicate result */ 1675 return tgsi_helper_tempx_replicate(ctx); 1676} 1677 1678static int cayman_pow(struct r600_shader_ctx *ctx) 1679{ 1680 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1681 int i, r; 1682 struct r600_bytecode_alu alu; 1683 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1684 1685 for (i = 0; i < 3; i++) { 1686 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1687 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1688 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1689 alu.dst.sel = ctx->temp_reg; 1690 alu.dst.chan = i; 1691 alu.dst.write = 1; 1692 if (i == 2) 1693 alu.last = 1; 1694 r = r600_bytecode_add_alu(ctx->bc, &alu); 1695 if (r) 1696 return r; 1697 } 1698 1699 /* b * LOG2(a) */ 1700 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1701 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1702 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 1703 alu.src[1].sel = ctx->temp_reg; 1704 alu.dst.sel = ctx->temp_reg; 1705 alu.dst.write = 1; 1706 alu.last = 1; 1707 r = r600_bytecode_add_alu(ctx->bc, &alu); 1708 if (r) 1709 return r; 1710 1711 for (i = 0; i < last_slot; i++) { 1712 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1713 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1714 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1715 alu.src[0].sel = ctx->temp_reg; 1716 1717 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1718 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1719 if (i == last_slot - 1) 1720 alu.last = 1; 1721 r = r600_bytecode_add_alu(ctx->bc, &alu); 1722 if (r) 1723 return r; 1724 } 1725 return 0; 1726} 1727 1728static int tgsi_pow(struct r600_shader_ctx *ctx) 1729{ 1730 struct r600_bytecode_alu alu; 1731 int r; 1732 1733 /* LOG2(a) */ 1734 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1735 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1736 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1737 alu.dst.sel = ctx->temp_reg; 1738 alu.dst.write = 1; 1739 alu.last = 1; 1740 r = r600_bytecode_add_alu(ctx->bc, &alu); 1741 if (r) 1742 return r; 1743 /* b * LOG2(a) */ 1744 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1745 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1746 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 1747 alu.src[1].sel = ctx->temp_reg; 1748 alu.dst.sel = ctx->temp_reg; 1749 alu.dst.write = 1; 1750 alu.last = 1; 1751 r = r600_bytecode_add_alu(ctx->bc, &alu); 1752 if (r) 1753 return r; 1754 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1755 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1756 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1757 alu.src[0].sel = ctx->temp_reg; 1758 alu.dst.sel = ctx->temp_reg; 1759 alu.dst.write = 1; 1760 alu.last = 1; 1761 r = r600_bytecode_add_alu(ctx->bc, &alu); 1762 if (r) 1763 return r; 1764 return tgsi_helper_tempx_replicate(ctx); 1765} 1766 1767static int tgsi_ssg(struct r600_shader_ctx *ctx) 1768{ 1769 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1770 struct r600_bytecode_alu alu; 1771 int i, r; 1772 1773 /* tmp = (src > 0 ? 1 : src) */ 1774 for (i = 0; i < 4; i++) { 1775 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1776 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1777 alu.is_op3 = 1; 1778 1779 alu.dst.sel = ctx->temp_reg; 1780 alu.dst.chan = i; 1781 1782 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 1783 alu.src[1].sel = V_SQ_ALU_SRC_1; 1784 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 1785 1786 if (i == 3) 1787 alu.last = 1; 1788 r = r600_bytecode_add_alu(ctx->bc, &alu); 1789 if (r) 1790 return r; 1791 } 1792 1793 /* dst = (-tmp > 0 ? -1 : tmp) */ 1794 for (i = 0; i < 4; i++) { 1795 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1796 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1797 alu.is_op3 = 1; 1798 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1799 1800 alu.src[0].sel = ctx->temp_reg; 1801 alu.src[0].chan = i; 1802 alu.src[0].neg = 1; 1803 1804 alu.src[1].sel = V_SQ_ALU_SRC_1; 1805 alu.src[1].neg = 1; 1806 1807 alu.src[2].sel = ctx->temp_reg; 1808 alu.src[2].chan = i; 1809 1810 if (i == 3) 1811 alu.last = 1; 1812 r = r600_bytecode_add_alu(ctx->bc, &alu); 1813 if (r) 1814 return r; 1815 } 1816 return 0; 1817} 1818 1819static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1820{ 1821 struct r600_bytecode_alu alu; 1822 int i, r; 1823 1824 for (i = 0; i < 4; i++) { 1825 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1826 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1827 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1828 alu.dst.chan = i; 1829 } else { 1830 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1831 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1832 alu.src[0].sel = ctx->temp_reg; 1833 alu.src[0].chan = i; 1834 } 1835 if (i == 3) { 1836 alu.last = 1; 1837 } 1838 r = r600_bytecode_add_alu(ctx->bc, &alu); 1839 if (r) 1840 return r; 1841 } 1842 return 0; 1843} 1844 1845static int tgsi_op3(struct r600_shader_ctx *ctx) 1846{ 1847 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1848 struct r600_bytecode_alu alu; 1849 int i, j, r; 1850 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1851 1852 for (i = 0; i < lasti + 1; i++) { 1853 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1854 continue; 1855 1856 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1857 alu.inst = ctx->inst_info->r600_opcode; 1858 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1859 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1860 } 1861 1862 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1863 alu.dst.chan = i; 1864 alu.dst.write = 1; 1865 alu.is_op3 = 1; 1866 if (i == lasti) { 1867 alu.last = 1; 1868 } 1869 r = r600_bytecode_add_alu(ctx->bc, &alu); 1870 if (r) 1871 return r; 1872 } 1873 return 0; 1874} 1875 1876static int tgsi_dp(struct r600_shader_ctx *ctx) 1877{ 1878 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1879 struct r600_bytecode_alu alu; 1880 int i, j, r; 1881 1882 for (i = 0; i < 4; i++) { 1883 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1884 alu.inst = ctx->inst_info->r600_opcode; 1885 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1886 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1887 } 1888 1889 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1890 alu.dst.chan = i; 1891 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1892 /* handle some special cases */ 1893 switch (ctx->inst_info->tgsi_opcode) { 1894 case TGSI_OPCODE_DP2: 1895 if (i > 1) { 1896 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1897 alu.src[0].chan = alu.src[1].chan = 0; 1898 } 1899 break; 1900 case TGSI_OPCODE_DP3: 1901 if (i > 2) { 1902 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1903 alu.src[0].chan = alu.src[1].chan = 0; 1904 } 1905 break; 1906 case TGSI_OPCODE_DPH: 1907 if (i == 3) { 1908 alu.src[0].sel = V_SQ_ALU_SRC_1; 1909 alu.src[0].chan = 0; 1910 alu.src[0].neg = 0; 1911 } 1912 break; 1913 default: 1914 break; 1915 } 1916 if (i == 3) { 1917 alu.last = 1; 1918 } 1919 r = r600_bytecode_add_alu(ctx->bc, &alu); 1920 if (r) 1921 return r; 1922 } 1923 return 0; 1924} 1925 1926static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 1927 unsigned index) 1928{ 1929 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1930 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 1931 inst->Src[index].Register.File != TGSI_FILE_INPUT) || 1932 ctx->src[index].neg || ctx->src[index].abs; 1933} 1934 1935static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 1936 unsigned index) 1937{ 1938 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1939 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 1940} 1941 1942static int tgsi_tex(struct r600_shader_ctx *ctx) 1943{ 1944 static float one_point_five = 1.5f; 1945 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1946 struct r600_bytecode_tex tex; 1947 struct r600_bytecode_alu alu; 1948 unsigned src_gpr; 1949 int r, i, j; 1950 int opcode; 1951 /* Texture fetch instructions can only use gprs as source. 1952 * Also they cannot negate the source or take the absolute value */ 1953 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0); 1954 boolean src_loaded = FALSE; 1955 unsigned sampler_src_reg = 1; 1956 u8 offset_x = 0, offset_y = 0, offset_z = 0; 1957 1958 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 1959 1960 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 1961 /* get offset values */ 1962 if (inst->Texture.NumOffsets) { 1963 assert(inst->Texture.NumOffsets == 1); 1964 1965 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 1966 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 1967 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 1968 } 1969 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 1970 /* TGSI moves the sampler to src reg 3 for TXD */ 1971 sampler_src_reg = 3; 1972 1973 for (i = 1; i < 3; i++) { 1974 /* set gradients h/v */ 1975 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 1976 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : 1977 SQ_TEX_INST_SET_GRADIENTS_V; 1978 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 1979 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 1980 1981 if (tgsi_tex_src_requires_loading(ctx, i)) { 1982 tex.src_gpr = r600_get_temp(ctx); 1983 tex.src_sel_x = 0; 1984 tex.src_sel_y = 1; 1985 tex.src_sel_z = 2; 1986 tex.src_sel_w = 3; 1987 1988 for (j = 0; j < 4; j++) { 1989 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1990 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1991 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 1992 alu.dst.sel = tex.src_gpr; 1993 alu.dst.chan = j; 1994 if (j == 3) 1995 alu.last = 1; 1996 alu.dst.write = 1; 1997 r = r600_bytecode_add_alu(ctx->bc, &alu); 1998 if (r) 1999 return r; 2000 } 2001 2002 } else { 2003 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); 2004 tex.src_sel_x = ctx->src[i].swizzle[0]; 2005 tex.src_sel_y = ctx->src[i].swizzle[1]; 2006 tex.src_sel_z = ctx->src[i].swizzle[2]; 2007 tex.src_sel_w = ctx->src[i].swizzle[3]; 2008 tex.src_rel = ctx->src[i].rel; 2009 } 2010 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ 2011 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 2012 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 2013 tex.coord_type_x = 1; 2014 tex.coord_type_y = 1; 2015 tex.coord_type_z = 1; 2016 tex.coord_type_w = 1; 2017 } 2018 r = r600_bytecode_add_tex(ctx->bc, &tex); 2019 if (r) 2020 return r; 2021 } 2022 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 2023 int out_chan; 2024 /* Add perspective divide */ 2025 if (ctx->bc->chip_class == CAYMAN) { 2026 out_chan = 2; 2027 for (i = 0; i < 3; i++) { 2028 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2029 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2030 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 2031 2032 alu.dst.sel = ctx->temp_reg; 2033 alu.dst.chan = i; 2034 if (i == 2) 2035 alu.last = 1; 2036 if (out_chan == i) 2037 alu.dst.write = 1; 2038 r = r600_bytecode_add_alu(ctx->bc, &alu); 2039 if (r) 2040 return r; 2041 } 2042 2043 } else { 2044 out_chan = 3; 2045 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2046 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2047 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 2048 2049 alu.dst.sel = ctx->temp_reg; 2050 alu.dst.chan = out_chan; 2051 alu.last = 1; 2052 alu.dst.write = 1; 2053 r = r600_bytecode_add_alu(ctx->bc, &alu); 2054 if (r) 2055 return r; 2056 } 2057 2058 for (i = 0; i < 3; i++) { 2059 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2060 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2061 alu.src[0].sel = ctx->temp_reg; 2062 alu.src[0].chan = out_chan; 2063 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2064 alu.dst.sel = ctx->temp_reg; 2065 alu.dst.chan = i; 2066 alu.dst.write = 1; 2067 r = r600_bytecode_add_alu(ctx->bc, &alu); 2068 if (r) 2069 return r; 2070 } 2071 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2072 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2073 alu.src[0].sel = V_SQ_ALU_SRC_1; 2074 alu.src[0].chan = 0; 2075 alu.dst.sel = ctx->temp_reg; 2076 alu.dst.chan = 3; 2077 alu.last = 1; 2078 alu.dst.write = 1; 2079 r = r600_bytecode_add_alu(ctx->bc, &alu); 2080 if (r) 2081 return r; 2082 src_loaded = TRUE; 2083 src_gpr = ctx->temp_reg; 2084 } 2085 2086 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2087 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 2088 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 2089 2090 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 2091 for (i = 0; i < 4; i++) { 2092 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2093 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 2094 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 2095 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 2096 alu.dst.sel = ctx->temp_reg; 2097 alu.dst.chan = i; 2098 if (i == 3) 2099 alu.last = 1; 2100 alu.dst.write = 1; 2101 r = r600_bytecode_add_alu(ctx->bc, &alu); 2102 if (r) 2103 return r; 2104 } 2105 2106 /* tmp1.z = RCP_e(|tmp1.z|) */ 2107 if (ctx->bc->chip_class == CAYMAN) { 2108 for (i = 0; i < 3; i++) { 2109 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2110 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2111 alu.src[0].sel = ctx->temp_reg; 2112 alu.src[0].chan = 2; 2113 alu.src[0].abs = 1; 2114 alu.dst.sel = ctx->temp_reg; 2115 alu.dst.chan = i; 2116 if (i == 2) 2117 alu.dst.write = 1; 2118 if (i == 2) 2119 alu.last = 1; 2120 r = r600_bytecode_add_alu(ctx->bc, &alu); 2121 if (r) 2122 return r; 2123 } 2124 } else { 2125 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2126 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2127 alu.src[0].sel = ctx->temp_reg; 2128 alu.src[0].chan = 2; 2129 alu.src[0].abs = 1; 2130 alu.dst.sel = ctx->temp_reg; 2131 alu.dst.chan = 2; 2132 alu.dst.write = 1; 2133 alu.last = 1; 2134 r = r600_bytecode_add_alu(ctx->bc, &alu); 2135 if (r) 2136 return r; 2137 } 2138 2139 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 2140 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 2141 * muladd has no writemask, have to use another temp 2142 */ 2143 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2144 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2145 alu.is_op3 = 1; 2146 2147 alu.src[0].sel = ctx->temp_reg; 2148 alu.src[0].chan = 0; 2149 alu.src[1].sel = ctx->temp_reg; 2150 alu.src[1].chan = 2; 2151 2152 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 2153 alu.src[2].chan = 0; 2154 alu.src[2].value = *(uint32_t *)&one_point_five; 2155 2156 alu.dst.sel = ctx->temp_reg; 2157 alu.dst.chan = 0; 2158 alu.dst.write = 1; 2159 2160 r = r600_bytecode_add_alu(ctx->bc, &alu); 2161 if (r) 2162 return r; 2163 2164 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2165 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2166 alu.is_op3 = 1; 2167 2168 alu.src[0].sel = ctx->temp_reg; 2169 alu.src[0].chan = 1; 2170 alu.src[1].sel = ctx->temp_reg; 2171 alu.src[1].chan = 2; 2172 2173 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 2174 alu.src[2].chan = 0; 2175 alu.src[2].value = *(uint32_t *)&one_point_five; 2176 2177 alu.dst.sel = ctx->temp_reg; 2178 alu.dst.chan = 1; 2179 alu.dst.write = 1; 2180 2181 alu.last = 1; 2182 r = r600_bytecode_add_alu(ctx->bc, &alu); 2183 if (r) 2184 return r; 2185 2186 src_loaded = TRUE; 2187 src_gpr = ctx->temp_reg; 2188 } 2189 2190 if (src_requires_loading && !src_loaded) { 2191 for (i = 0; i < 4; i++) { 2192 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2193 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2194 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2195 alu.dst.sel = ctx->temp_reg; 2196 alu.dst.chan = i; 2197 if (i == 3) 2198 alu.last = 1; 2199 alu.dst.write = 1; 2200 r = r600_bytecode_add_alu(ctx->bc, &alu); 2201 if (r) 2202 return r; 2203 } 2204 src_loaded = TRUE; 2205 src_gpr = ctx->temp_reg; 2206 } 2207 2208 opcode = ctx->inst_info->r600_opcode; 2209 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 2210 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 2211 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 2212 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 2213 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) { 2214 switch (opcode) { 2215 case SQ_TEX_INST_SAMPLE: 2216 opcode = SQ_TEX_INST_SAMPLE_C; 2217 break; 2218 case SQ_TEX_INST_SAMPLE_L: 2219 opcode = SQ_TEX_INST_SAMPLE_C_L; 2220 break; 2221 case SQ_TEX_INST_SAMPLE_LB: 2222 opcode = SQ_TEX_INST_SAMPLE_C_LB; 2223 break; 2224 case SQ_TEX_INST_SAMPLE_G: 2225 opcode = SQ_TEX_INST_SAMPLE_C_G; 2226 break; 2227 } 2228 } 2229 2230 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 2231 tex.inst = opcode; 2232 2233 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 2234 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 2235 tex.src_gpr = src_gpr; 2236 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 2237 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 2238 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 2239 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 2240 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 2241 if (src_loaded) { 2242 tex.src_sel_x = 0; 2243 tex.src_sel_y = 1; 2244 tex.src_sel_z = 2; 2245 tex.src_sel_w = 3; 2246 } else { 2247 tex.src_sel_x = ctx->src[0].swizzle[0]; 2248 tex.src_sel_y = ctx->src[0].swizzle[1]; 2249 tex.src_sel_z = ctx->src[0].swizzle[2]; 2250 tex.src_sel_w = ctx->src[0].swizzle[3]; 2251 tex.src_rel = ctx->src[0].rel; 2252 } 2253 2254 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2255 tex.src_sel_x = 1; 2256 tex.src_sel_y = 0; 2257 tex.src_sel_z = 3; 2258 tex.src_sel_w = 1; 2259 } 2260 2261 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 2262 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 2263 tex.coord_type_x = 1; 2264 tex.coord_type_y = 1; 2265 } 2266 tex.coord_type_z = 1; 2267 tex.coord_type_w = 1; 2268 2269 tex.offset_x = offset_x; 2270 tex.offset_y = offset_y; 2271 tex.offset_z = offset_z; 2272 2273 /* Put the depth for comparison in W. 2274 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 2275 * Some instructions expect the depth in Z. */ 2276 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 2277 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 2278 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 2279 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 2280 opcode != SQ_TEX_INST_SAMPLE_C_L && 2281 opcode != SQ_TEX_INST_SAMPLE_C_LB) { 2282 tex.src_sel_w = tex.src_sel_z; 2283 } 2284 2285 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 2286 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 2287 if (opcode == SQ_TEX_INST_SAMPLE_C_L || 2288 opcode == SQ_TEX_INST_SAMPLE_C_LB) { 2289 /* the array index is read from Y */ 2290 tex.coord_type_y = 0; 2291 } else { 2292 /* the array index is read from Z */ 2293 tex.coord_type_z = 0; 2294 tex.src_sel_z = tex.src_sel_y; 2295 } 2296 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 2297 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) 2298 /* the array index is read from Z */ 2299 tex.coord_type_z = 0; 2300 2301 r = r600_bytecode_add_tex(ctx->bc, &tex); 2302 if (r) 2303 return r; 2304 2305 /* add shadow ambient support - gallium doesn't do it yet */ 2306 return 0; 2307} 2308 2309static int tgsi_lrp(struct r600_shader_ctx *ctx) 2310{ 2311 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2312 struct r600_bytecode_alu alu; 2313 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2314 unsigned i; 2315 int r; 2316 2317 /* optimize if it's just an equal balance */ 2318 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 2319 for (i = 0; i < lasti + 1; i++) { 2320 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2321 continue; 2322 2323 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2324 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2325 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2326 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 2327 alu.omod = 3; 2328 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2329 alu.dst.chan = i; 2330 if (i == lasti) { 2331 alu.last = 1; 2332 } 2333 r = r600_bytecode_add_alu(ctx->bc, &alu); 2334 if (r) 2335 return r; 2336 } 2337 return 0; 2338 } 2339 2340 /* 1 - src0 */ 2341 for (i = 0; i < lasti + 1; i++) { 2342 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2343 continue; 2344 2345 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2346 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2347 alu.src[0].sel = V_SQ_ALU_SRC_1; 2348 alu.src[0].chan = 0; 2349 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2350 r600_bytecode_src_toggle_neg(&alu.src[1]); 2351 alu.dst.sel = ctx->temp_reg; 2352 alu.dst.chan = i; 2353 if (i == lasti) { 2354 alu.last = 1; 2355 } 2356 alu.dst.write = 1; 2357 r = r600_bytecode_add_alu(ctx->bc, &alu); 2358 if (r) 2359 return r; 2360 } 2361 2362 /* (1 - src0) * src2 */ 2363 for (i = 0; i < lasti + 1; i++) { 2364 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2365 continue; 2366 2367 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2368 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2369 alu.src[0].sel = ctx->temp_reg; 2370 alu.src[0].chan = i; 2371 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 2372 alu.dst.sel = ctx->temp_reg; 2373 alu.dst.chan = i; 2374 if (i == lasti) { 2375 alu.last = 1; 2376 } 2377 alu.dst.write = 1; 2378 r = r600_bytecode_add_alu(ctx->bc, &alu); 2379 if (r) 2380 return r; 2381 } 2382 2383 /* src0 * src1 + (1 - src0) * src2 */ 2384 for (i = 0; i < lasti + 1; i++) { 2385 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2386 continue; 2387 2388 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2389 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2390 alu.is_op3 = 1; 2391 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2392 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2393 alu.src[2].sel = ctx->temp_reg; 2394 alu.src[2].chan = i; 2395 2396 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2397 alu.dst.chan = i; 2398 if (i == lasti) { 2399 alu.last = 1; 2400 } 2401 r = r600_bytecode_add_alu(ctx->bc, &alu); 2402 if (r) 2403 return r; 2404 } 2405 return 0; 2406} 2407 2408static int tgsi_cmp(struct r600_shader_ctx *ctx) 2409{ 2410 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2411 struct r600_bytecode_alu alu; 2412 int i, r; 2413 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2414 2415 for (i = 0; i < lasti + 1; i++) { 2416 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2417 continue; 2418 2419 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2420 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2421 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2422 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 2423 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 2424 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2425 alu.dst.chan = i; 2426 alu.dst.write = 1; 2427 alu.is_op3 = 1; 2428 if (i == lasti) 2429 alu.last = 1; 2430 r = r600_bytecode_add_alu(ctx->bc, &alu); 2431 if (r) 2432 return r; 2433 } 2434 return 0; 2435} 2436 2437static int tgsi_xpd(struct r600_shader_ctx *ctx) 2438{ 2439 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2440 static const unsigned int src0_swizzle[] = {2, 0, 1}; 2441 static const unsigned int src1_swizzle[] = {1, 2, 0}; 2442 struct r600_bytecode_alu alu; 2443 uint32_t use_temp = 0; 2444 int i, r; 2445 2446 if (inst->Dst[0].Register.WriteMask != 0xf) 2447 use_temp = 1; 2448 2449 for (i = 0; i < 4; i++) { 2450 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2451 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2452 if (i < 3) { 2453 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 2454 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 2455 } else { 2456 alu.src[0].sel = V_SQ_ALU_SRC_0; 2457 alu.src[0].chan = i; 2458 alu.src[1].sel = V_SQ_ALU_SRC_0; 2459 alu.src[1].chan = i; 2460 } 2461 2462 alu.dst.sel = ctx->temp_reg; 2463 alu.dst.chan = i; 2464 alu.dst.write = 1; 2465 2466 if (i == 3) 2467 alu.last = 1; 2468 r = r600_bytecode_add_alu(ctx->bc, &alu); 2469 if (r) 2470 return r; 2471 } 2472 2473 for (i = 0; i < 4; i++) { 2474 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2475 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2476 2477 if (i < 3) { 2478 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 2479 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 2480 } else { 2481 alu.src[0].sel = V_SQ_ALU_SRC_0; 2482 alu.src[0].chan = i; 2483 alu.src[1].sel = V_SQ_ALU_SRC_0; 2484 alu.src[1].chan = i; 2485 } 2486 2487 alu.src[2].sel = ctx->temp_reg; 2488 alu.src[2].neg = 1; 2489 alu.src[2].chan = i; 2490 2491 if (use_temp) 2492 alu.dst.sel = ctx->temp_reg; 2493 else 2494 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2495 alu.dst.chan = i; 2496 alu.dst.write = 1; 2497 alu.is_op3 = 1; 2498 if (i == 3) 2499 alu.last = 1; 2500 r = r600_bytecode_add_alu(ctx->bc, &alu); 2501 if (r) 2502 return r; 2503 } 2504 if (use_temp) 2505 return tgsi_helper_copy(ctx, inst); 2506 return 0; 2507} 2508 2509static int tgsi_exp(struct r600_shader_ctx *ctx) 2510{ 2511 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2512 struct r600_bytecode_alu alu; 2513 int r; 2514 int i; 2515 2516 /* result.x = 2^floor(src); */ 2517 if (inst->Dst[0].Register.WriteMask & 1) { 2518 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2519 2520 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2521 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2522 2523 alu.dst.sel = ctx->temp_reg; 2524 alu.dst.chan = 0; 2525 alu.dst.write = 1; 2526 alu.last = 1; 2527 r = r600_bytecode_add_alu(ctx->bc, &alu); 2528 if (r) 2529 return r; 2530 2531 if (ctx->bc->chip_class == CAYMAN) { 2532 for (i = 0; i < 3; i++) { 2533 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2534 alu.src[0].sel = ctx->temp_reg; 2535 alu.src[0].chan = 0; 2536 2537 alu.dst.sel = ctx->temp_reg; 2538 alu.dst.chan = i; 2539 if (i == 0) 2540 alu.dst.write = 1; 2541 if (i == 2) 2542 alu.last = 1; 2543 r = r600_bytecode_add_alu(ctx->bc, &alu); 2544 if (r) 2545 return r; 2546 } 2547 } else { 2548 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2549 alu.src[0].sel = ctx->temp_reg; 2550 alu.src[0].chan = 0; 2551 2552 alu.dst.sel = ctx->temp_reg; 2553 alu.dst.chan = 0; 2554 alu.dst.write = 1; 2555 alu.last = 1; 2556 r = r600_bytecode_add_alu(ctx->bc, &alu); 2557 if (r) 2558 return r; 2559 } 2560 } 2561 2562 /* result.y = tmp - floor(tmp); */ 2563 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2564 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2565 2566 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2567 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2568 2569 alu.dst.sel = ctx->temp_reg; 2570#if 0 2571 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2572 if (r) 2573 return r; 2574#endif 2575 alu.dst.write = 1; 2576 alu.dst.chan = 1; 2577 2578 alu.last = 1; 2579 2580 r = r600_bytecode_add_alu(ctx->bc, &alu); 2581 if (r) 2582 return r; 2583 } 2584 2585 /* result.z = RoughApprox2ToX(tmp);*/ 2586 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2587 if (ctx->bc->chip_class == CAYMAN) { 2588 for (i = 0; i < 3; i++) { 2589 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2590 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2591 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2592 2593 alu.dst.sel = ctx->temp_reg; 2594 alu.dst.chan = i; 2595 if (i == 2) { 2596 alu.dst.write = 1; 2597 alu.last = 1; 2598 } 2599 2600 r = r600_bytecode_add_alu(ctx->bc, &alu); 2601 if (r) 2602 return r; 2603 } 2604 } else { 2605 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2606 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2607 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2608 2609 alu.dst.sel = ctx->temp_reg; 2610 alu.dst.write = 1; 2611 alu.dst.chan = 2; 2612 2613 alu.last = 1; 2614 2615 r = r600_bytecode_add_alu(ctx->bc, &alu); 2616 if (r) 2617 return r; 2618 } 2619 } 2620 2621 /* result.w = 1.0;*/ 2622 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2623 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2624 2625 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2626 alu.src[0].sel = V_SQ_ALU_SRC_1; 2627 alu.src[0].chan = 0; 2628 2629 alu.dst.sel = ctx->temp_reg; 2630 alu.dst.chan = 3; 2631 alu.dst.write = 1; 2632 alu.last = 1; 2633 r = r600_bytecode_add_alu(ctx->bc, &alu); 2634 if (r) 2635 return r; 2636 } 2637 return tgsi_helper_copy(ctx, inst); 2638} 2639 2640static int tgsi_log(struct r600_shader_ctx *ctx) 2641{ 2642 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2643 struct r600_bytecode_alu alu; 2644 int r; 2645 int i; 2646 2647 /* result.x = floor(log2(|src|)); */ 2648 if (inst->Dst[0].Register.WriteMask & 1) { 2649 if (ctx->bc->chip_class == CAYMAN) { 2650 for (i = 0; i < 3; i++) { 2651 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2652 2653 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2654 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2655 r600_bytecode_src_set_abs(&alu.src[0]); 2656 2657 alu.dst.sel = ctx->temp_reg; 2658 alu.dst.chan = i; 2659 if (i == 0) 2660 alu.dst.write = 1; 2661 if (i == 2) 2662 alu.last = 1; 2663 r = r600_bytecode_add_alu(ctx->bc, &alu); 2664 if (r) 2665 return r; 2666 } 2667 2668 } else { 2669 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2670 2671 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2672 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2673 r600_bytecode_src_set_abs(&alu.src[0]); 2674 2675 alu.dst.sel = ctx->temp_reg; 2676 alu.dst.chan = 0; 2677 alu.dst.write = 1; 2678 alu.last = 1; 2679 r = r600_bytecode_add_alu(ctx->bc, &alu); 2680 if (r) 2681 return r; 2682 } 2683 2684 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2685 alu.src[0].sel = ctx->temp_reg; 2686 alu.src[0].chan = 0; 2687 2688 alu.dst.sel = ctx->temp_reg; 2689 alu.dst.chan = 0; 2690 alu.dst.write = 1; 2691 alu.last = 1; 2692 2693 r = r600_bytecode_add_alu(ctx->bc, &alu); 2694 if (r) 2695 return r; 2696 } 2697 2698 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 2699 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2700 2701 if (ctx->bc->chip_class == CAYMAN) { 2702 for (i = 0; i < 3; i++) { 2703 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2704 2705 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2706 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2707 r600_bytecode_src_set_abs(&alu.src[0]); 2708 2709 alu.dst.sel = ctx->temp_reg; 2710 alu.dst.chan = i; 2711 if (i == 1) 2712 alu.dst.write = 1; 2713 if (i == 2) 2714 alu.last = 1; 2715 2716 r = r600_bytecode_add_alu(ctx->bc, &alu); 2717 if (r) 2718 return r; 2719 } 2720 } else { 2721 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2722 2723 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2724 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2725 r600_bytecode_src_set_abs(&alu.src[0]); 2726 2727 alu.dst.sel = ctx->temp_reg; 2728 alu.dst.chan = 1; 2729 alu.dst.write = 1; 2730 alu.last = 1; 2731 2732 r = r600_bytecode_add_alu(ctx->bc, &alu); 2733 if (r) 2734 return r; 2735 } 2736 2737 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2738 2739 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2740 alu.src[0].sel = ctx->temp_reg; 2741 alu.src[0].chan = 1; 2742 2743 alu.dst.sel = ctx->temp_reg; 2744 alu.dst.chan = 1; 2745 alu.dst.write = 1; 2746 alu.last = 1; 2747 2748 r = r600_bytecode_add_alu(ctx->bc, &alu); 2749 if (r) 2750 return r; 2751 2752 if (ctx->bc->chip_class == CAYMAN) { 2753 for (i = 0; i < 3; i++) { 2754 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2755 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2756 alu.src[0].sel = ctx->temp_reg; 2757 alu.src[0].chan = 1; 2758 2759 alu.dst.sel = ctx->temp_reg; 2760 alu.dst.chan = i; 2761 if (i == 1) 2762 alu.dst.write = 1; 2763 if (i == 2) 2764 alu.last = 1; 2765 2766 r = r600_bytecode_add_alu(ctx->bc, &alu); 2767 if (r) 2768 return r; 2769 } 2770 } else { 2771 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2772 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2773 alu.src[0].sel = ctx->temp_reg; 2774 alu.src[0].chan = 1; 2775 2776 alu.dst.sel = ctx->temp_reg; 2777 alu.dst.chan = 1; 2778 alu.dst.write = 1; 2779 alu.last = 1; 2780 2781 r = r600_bytecode_add_alu(ctx->bc, &alu); 2782 if (r) 2783 return r; 2784 } 2785 2786 if (ctx->bc->chip_class == CAYMAN) { 2787 for (i = 0; i < 3; i++) { 2788 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2789 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2790 alu.src[0].sel = ctx->temp_reg; 2791 alu.src[0].chan = 1; 2792 2793 alu.dst.sel = ctx->temp_reg; 2794 alu.dst.chan = i; 2795 if (i == 1) 2796 alu.dst.write = 1; 2797 if (i == 2) 2798 alu.last = 1; 2799 2800 r = r600_bytecode_add_alu(ctx->bc, &alu); 2801 if (r) 2802 return r; 2803 } 2804 } else { 2805 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2806 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2807 alu.src[0].sel = ctx->temp_reg; 2808 alu.src[0].chan = 1; 2809 2810 alu.dst.sel = ctx->temp_reg; 2811 alu.dst.chan = 1; 2812 alu.dst.write = 1; 2813 alu.last = 1; 2814 2815 r = r600_bytecode_add_alu(ctx->bc, &alu); 2816 if (r) 2817 return r; 2818 } 2819 2820 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2821 2822 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2823 2824 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2825 r600_bytecode_src_set_abs(&alu.src[0]); 2826 2827 alu.src[1].sel = ctx->temp_reg; 2828 alu.src[1].chan = 1; 2829 2830 alu.dst.sel = ctx->temp_reg; 2831 alu.dst.chan = 1; 2832 alu.dst.write = 1; 2833 alu.last = 1; 2834 2835 r = r600_bytecode_add_alu(ctx->bc, &alu); 2836 if (r) 2837 return r; 2838 } 2839 2840 /* result.z = log2(|src|);*/ 2841 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2842 if (ctx->bc->chip_class == CAYMAN) { 2843 for (i = 0; i < 3; i++) { 2844 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2845 2846 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2847 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2848 r600_bytecode_src_set_abs(&alu.src[0]); 2849 2850 alu.dst.sel = ctx->temp_reg; 2851 if (i == 2) 2852 alu.dst.write = 1; 2853 alu.dst.chan = i; 2854 if (i == 2) 2855 alu.last = 1; 2856 2857 r = r600_bytecode_add_alu(ctx->bc, &alu); 2858 if (r) 2859 return r; 2860 } 2861 } else { 2862 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2863 2864 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2865 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2866 r600_bytecode_src_set_abs(&alu.src[0]); 2867 2868 alu.dst.sel = ctx->temp_reg; 2869 alu.dst.write = 1; 2870 alu.dst.chan = 2; 2871 alu.last = 1; 2872 2873 r = r600_bytecode_add_alu(ctx->bc, &alu); 2874 if (r) 2875 return r; 2876 } 2877 } 2878 2879 /* result.w = 1.0; */ 2880 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2881 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2882 2883 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2884 alu.src[0].sel = V_SQ_ALU_SRC_1; 2885 alu.src[0].chan = 0; 2886 2887 alu.dst.sel = ctx->temp_reg; 2888 alu.dst.chan = 3; 2889 alu.dst.write = 1; 2890 alu.last = 1; 2891 2892 r = r600_bytecode_add_alu(ctx->bc, &alu); 2893 if (r) 2894 return r; 2895 } 2896 2897 return tgsi_helper_copy(ctx, inst); 2898} 2899 2900static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2901{ 2902 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2903 struct r600_bytecode_alu alu; 2904 int r; 2905 2906 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2907 2908 switch (inst->Instruction.Opcode) { 2909 case TGSI_OPCODE_ARL: 2910 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2911 break; 2912 case TGSI_OPCODE_ARR: 2913 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2914 break; 2915 default: 2916 assert(0); 2917 return -1; 2918 } 2919 2920 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2921 alu.last = 1; 2922 alu.dst.sel = ctx->ar_reg; 2923 alu.dst.write = 1; 2924 r = r600_bytecode_add_alu(ctx->bc, &alu); 2925 if (r) 2926 return r; 2927 2928 /* TODO: Note that the MOVA can be avoided if we never use AR for 2929 * indexing non-CB registers in the current ALU clause. Similarly, we 2930 * need to load AR from ar_reg again if we started a new clause 2931 * between ARL and AR usage. The easy way to do that is to remove 2932 * the MOVA here, and load it for the first AR access after ar_reg 2933 * has been modified in each clause. */ 2934 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2935 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2936 alu.src[0].sel = ctx->ar_reg; 2937 alu.src[0].chan = 0; 2938 alu.last = 1; 2939 r = r600_bytecode_add_alu(ctx->bc, &alu); 2940 if (r) 2941 return r; 2942 return 0; 2943} 2944static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2945{ 2946 /* TODO from r600c, ar values don't persist between clauses */ 2947 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2948 struct r600_bytecode_alu alu; 2949 int r; 2950 2951 switch (inst->Instruction.Opcode) { 2952 case TGSI_OPCODE_ARL: 2953 memset(&alu, 0, sizeof(alu)); 2954 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 2955 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2956 alu.dst.sel = ctx->ar_reg; 2957 alu.dst.write = 1; 2958 alu.last = 1; 2959 2960 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2961 return r; 2962 2963 memset(&alu, 0, sizeof(alu)); 2964 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2965 alu.src[0].sel = ctx->ar_reg; 2966 alu.dst.sel = ctx->ar_reg; 2967 alu.dst.write = 1; 2968 alu.last = 1; 2969 2970 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2971 return r; 2972 break; 2973 case TGSI_OPCODE_ARR: 2974 memset(&alu, 0, sizeof(alu)); 2975 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2976 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2977 alu.dst.sel = ctx->ar_reg; 2978 alu.dst.write = 1; 2979 alu.last = 1; 2980 2981 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2982 return r; 2983 break; 2984 default: 2985 assert(0); 2986 return -1; 2987 } 2988 2989 memset(&alu, 0, sizeof(alu)); 2990 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2991 alu.src[0].sel = ctx->ar_reg; 2992 alu.last = 1; 2993 2994 r = r600_bytecode_add_alu(ctx->bc, &alu); 2995 if (r) 2996 return r; 2997 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2998 return 0; 2999} 3000 3001static int tgsi_opdst(struct r600_shader_ctx *ctx) 3002{ 3003 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3004 struct r600_bytecode_alu alu; 3005 int i, r = 0; 3006 3007 for (i = 0; i < 4; i++) { 3008 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3009 3010 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3011 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3012 3013 if (i == 0 || i == 3) { 3014 alu.src[0].sel = V_SQ_ALU_SRC_1; 3015 } else { 3016 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3017 } 3018 3019 if (i == 0 || i == 2) { 3020 alu.src[1].sel = V_SQ_ALU_SRC_1; 3021 } else { 3022 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3023 } 3024 if (i == 3) 3025 alu.last = 1; 3026 r = r600_bytecode_add_alu(ctx->bc, &alu); 3027 if (r) 3028 return r; 3029 } 3030 return 0; 3031} 3032 3033static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 3034{ 3035 struct r600_bytecode_alu alu; 3036 int r; 3037 3038 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3039 alu.inst = opcode; 3040 alu.predicate = 1; 3041 3042 alu.dst.sel = ctx->temp_reg; 3043 alu.dst.write = 1; 3044 alu.dst.chan = 0; 3045 3046 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3047 alu.src[1].sel = V_SQ_ALU_SRC_0; 3048 alu.src[1].chan = 0; 3049 3050 alu.last = 1; 3051 3052 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 3053 if (r) 3054 return r; 3055 return 0; 3056} 3057 3058static int pops(struct r600_shader_ctx *ctx, int pops) 3059{ 3060 unsigned force_pop = ctx->bc->force_add_cf; 3061 3062 if (!force_pop) { 3063 int alu_pop = 3; 3064 if (ctx->bc->cf_last) { 3065 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) 3066 alu_pop = 0; 3067 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) 3068 alu_pop = 1; 3069 } 3070 alu_pop += pops; 3071 if (alu_pop == 1) { 3072 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; 3073 ctx->bc->force_add_cf = 1; 3074 } else if (alu_pop == 2) { 3075 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; 3076 ctx->bc->force_add_cf = 1; 3077 } else { 3078 force_pop = 1; 3079 } 3080 } 3081 3082 if (force_pop) { 3083 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 3084 ctx->bc->cf_last->pop_count = pops; 3085 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 3086 } 3087 3088 return 0; 3089} 3090 3091static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 3092{ 3093 switch(reason) { 3094 case FC_PUSH_VPM: 3095 ctx->bc->callstack[ctx->bc->call_sp].current--; 3096 break; 3097 case FC_PUSH_WQM: 3098 case FC_LOOP: 3099 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 3100 break; 3101 case FC_REP: 3102 /* TOODO : for 16 vp asic should -= 2; */ 3103 ctx->bc->callstack[ctx->bc->call_sp].current --; 3104 break; 3105 } 3106} 3107 3108static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 3109{ 3110 if (check_max_only) { 3111 int diff; 3112 switch (reason) { 3113 case FC_PUSH_VPM: 3114 diff = 1; 3115 break; 3116 case FC_PUSH_WQM: 3117 diff = 4; 3118 break; 3119 default: 3120 assert(0); 3121 diff = 0; 3122 } 3123 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 3124 ctx->bc->callstack[ctx->bc->call_sp].max) { 3125 ctx->bc->callstack[ctx->bc->call_sp].max = 3126 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 3127 } 3128 return; 3129 } 3130 switch (reason) { 3131 case FC_PUSH_VPM: 3132 ctx->bc->callstack[ctx->bc->call_sp].current++; 3133 break; 3134 case FC_PUSH_WQM: 3135 case FC_LOOP: 3136 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 3137 break; 3138 case FC_REP: 3139 ctx->bc->callstack[ctx->bc->call_sp].current++; 3140 break; 3141 } 3142 3143 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 3144 ctx->bc->callstack[ctx->bc->call_sp].max) { 3145 ctx->bc->callstack[ctx->bc->call_sp].max = 3146 ctx->bc->callstack[ctx->bc->call_sp].current; 3147 } 3148} 3149 3150static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 3151{ 3152 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 3153 3154 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid, 3155 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 3156 sp->mid[sp->num_mid] = ctx->bc->cf_last; 3157 sp->num_mid++; 3158} 3159 3160static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 3161{ 3162 ctx->bc->fc_sp++; 3163 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 3164 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 3165} 3166 3167static void fc_poplevel(struct r600_shader_ctx *ctx) 3168{ 3169 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 3170 if (sp->mid) { 3171 free(sp->mid); 3172 sp->mid = NULL; 3173 } 3174 sp->num_mid = 0; 3175 sp->start = NULL; 3176 sp->type = 0; 3177 ctx->bc->fc_sp--; 3178} 3179 3180#if 0 3181static int emit_return(struct r600_shader_ctx *ctx) 3182{ 3183 r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 3184 return 0; 3185} 3186 3187static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 3188{ 3189 3190 r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 3191 ctx->bc->cf_last->pop_count = pops; 3192 /* TODO work out offset */ 3193 return 0; 3194} 3195 3196static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 3197{ 3198 return 0; 3199} 3200 3201static void emit_testflag(struct r600_shader_ctx *ctx) 3202{ 3203 3204} 3205 3206static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 3207{ 3208 emit_testflag(ctx); 3209 emit_jump_to_offset(ctx, 1, 4); 3210 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 3211 pops(ctx, ifidx + 1); 3212 emit_return(ctx); 3213} 3214 3215static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 3216{ 3217 emit_testflag(ctx); 3218 3219 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3220 ctx->bc->cf_last->pop_count = 1; 3221 3222 fc_set_mid(ctx, fc_sp); 3223 3224 pops(ctx, 1); 3225} 3226#endif 3227 3228static int tgsi_if(struct r600_shader_ctx *ctx) 3229{ 3230 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 3231 3232 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 3233 3234 fc_pushlevel(ctx, FC_IF); 3235 3236 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 3237 return 0; 3238} 3239 3240static int tgsi_else(struct r600_shader_ctx *ctx) 3241{ 3242 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 3243 ctx->bc->cf_last->pop_count = 1; 3244 3245 fc_set_mid(ctx, ctx->bc->fc_sp); 3246 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 3247 return 0; 3248} 3249 3250static int tgsi_endif(struct r600_shader_ctx *ctx) 3251{ 3252 pops(ctx, 1); 3253 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 3254 R600_ERR("if/endif unbalanced in shader\n"); 3255 return -1; 3256 } 3257 3258 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 3259 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3260 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 3261 } else { 3262 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 3263 } 3264 fc_poplevel(ctx); 3265 3266 callstack_decrease_current(ctx, FC_PUSH_VPM); 3267 return 0; 3268} 3269 3270static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 3271{ 3272 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 3273 3274 fc_pushlevel(ctx, FC_LOOP); 3275 3276 /* check stack depth */ 3277 callstack_check_depth(ctx, FC_LOOP, 0); 3278 return 0; 3279} 3280 3281static int tgsi_endloop(struct r600_shader_ctx *ctx) 3282{ 3283 int i; 3284 3285 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 3286 3287 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 3288 R600_ERR("loop/endloop in shader code are not paired.\n"); 3289 return -EINVAL; 3290 } 3291 3292 /* fixup loop pointers - from r600isa 3293 LOOP END points to CF after LOOP START, 3294 LOOP START point to CF after LOOP END 3295 BRK/CONT point to LOOP END CF 3296 */ 3297 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 3298 3299 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3300 3301 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 3302 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 3303 } 3304 /* TODO add LOOPRET support */ 3305 fc_poplevel(ctx); 3306 callstack_decrease_current(ctx, FC_LOOP); 3307 return 0; 3308} 3309 3310static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 3311{ 3312 unsigned int fscp; 3313 3314 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 3315 { 3316 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 3317 break; 3318 } 3319 3320 if (fscp == 0) { 3321 R600_ERR("Break not inside loop/endloop pair\n"); 3322 return -EINVAL; 3323 } 3324 3325 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3326 ctx->bc->cf_last->pop_count = 1; 3327 3328 fc_set_mid(ctx, fscp); 3329 3330 pops(ctx, 1); 3331 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 3332 return 0; 3333} 3334 3335static int tgsi_umad(struct r600_shader_ctx *ctx) 3336{ 3337 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3338 struct r600_bytecode_alu alu; 3339 int i, j, r; 3340 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3341 3342 /* src0 * src1 */ 3343 for (i = 0; i < lasti + 1; i++) { 3344 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3345 continue; 3346 3347 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3348 3349 alu.dst.chan = i; 3350 alu.dst.sel = ctx->temp_reg; 3351 alu.dst.write = 1; 3352 3353 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT; 3354 for (j = 0; j < 2; j++) { 3355 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3356 } 3357 3358 if (i == lasti) { 3359 alu.last = 1; 3360 } 3361 r = r600_bytecode_add_alu(ctx->bc, &alu); 3362 if (r) 3363 return r; 3364 } 3365 3366 3367 for (i = 0; i < lasti + 1; i++) { 3368 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3369 continue; 3370 3371 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3372 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3373 3374 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT; 3375 3376 alu.src[0].sel = ctx->temp_reg; 3377 alu.src[0].chan = i; 3378 3379 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 3380 if (i == lasti) { 3381 alu.last = 1; 3382 } 3383 r = r600_bytecode_add_alu(ctx->bc, &alu); 3384 if (r) 3385 return r; 3386 } 3387 return 0; 3388} 3389 3390static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 3391 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3392 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3393 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3394 3395 /* FIXME: 3396 * For state trackers other than OpenGL, we'll want to use 3397 * _RECIP_IEEE instead. 3398 */ 3399 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 3400 3401 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 3402 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3403 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3404 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3405 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3406 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3407 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3408 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3409 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3410 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3411 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3412 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3413 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3414 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3415 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3416 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3417 /* gap */ 3418 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3419 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3420 /* gap */ 3421 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3422 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3423 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3424 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3425 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3426 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 3427 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3428 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3429 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3430 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3431 /* gap */ 3432 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3433 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3434 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3435 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3436 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3437 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3438 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3439 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3440 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3441 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3442 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3443 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3444 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3445 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3446 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3447 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3448 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3449 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3450 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3451 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3452 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3453 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3454 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3455 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3456 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3457 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3458 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3459 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3460 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3461 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3462 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3463 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3464 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3465 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3466 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3467 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3468 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 3469 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3470 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3471 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3472 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3473 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3474 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3475 /* gap */ 3476 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3477 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3478 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3479 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3480 /* gap */ 3481 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3482 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3483 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3484 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3485 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3486 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3487 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 3488 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3489 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3490 /* gap */ 3491 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3492 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3493 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3494 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3495 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 3496 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3497 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 3498 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 3499 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3500 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3501 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3502 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3503 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3504 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3505 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3506 /* gap */ 3507 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3508 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3509 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3510 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3511 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3512 /* gap */ 3513 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3514 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3515 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3516 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3517 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3518 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3519 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3520 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3521 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3522 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3523 /* gap */ 3524 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3525 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3526 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3527 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3528 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3529 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3530 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3531 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3532 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3533 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3534 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3535 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3536 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3537 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3538 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3539 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3540 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3541 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3542 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3543 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3544 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3545 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3546 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3547 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3548 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3549 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3550 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3551 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 3552 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 3553 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 3554 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 3555 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 3556 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 3557 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 3558 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 3559 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 3560 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 3561 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 3562 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 3563 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3564}; 3565 3566static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 3567 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3568 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3569 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3570 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3571 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, 3572 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3573 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3574 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3575 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3576 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3577 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3578 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3579 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3580 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3581 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3582 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3583 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3584 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3585 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3586 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3587 /* gap */ 3588 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3589 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3590 /* gap */ 3591 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3592 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3593 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3594 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3595 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3596 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 3597 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3598 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3599 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3600 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3601 /* gap */ 3602 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3603 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3604 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3605 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3606 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3607 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3608 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3609 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3610 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3611 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3612 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3613 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3614 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3615 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3616 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3617 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3618 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3619 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3620 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3621 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3622 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3623 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3624 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3625 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3626 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3627 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3628 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3629 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3630 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3631 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3632 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3633 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3634 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3635 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3636 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3637 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3638 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 3639 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3640 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3641 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3642 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3643 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3644 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3645 /* gap */ 3646 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3647 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3648 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3649 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3650 /* gap */ 3651 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3652 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3653 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3654 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3655 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3656 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_int_to_flt}, 3657 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 3658 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3659 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3660 /* gap */ 3661 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3662 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 3663 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 3664 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3665 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 3666 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3667 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 3668 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 3669 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3670 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3671 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3672 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3673 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3674 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3675 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3676 /* gap */ 3677 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3678 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3679 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3680 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3681 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3682 /* gap */ 3683 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3684 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3685 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3686 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3687 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3688 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3689 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3690 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3691 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3692 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3693 /* gap */ 3694 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3695 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2}, 3696 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3697 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 3698 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 3699 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, 3700 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 3701 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3702 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 3703 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3704 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2}, 3705 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 3706 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3707 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 3708 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 3709 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 3710 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3711 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2}, 3712 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 3713 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 3714 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3715 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 3716 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2}, 3717 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3718 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3719 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3720 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3721 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 3722 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 3723 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 3724 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 3725 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 3726 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 3727 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 3728 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 3729 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 3730 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 3731 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 3732 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 3733 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3734}; 3735 3736static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 3737 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3738 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3739 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3740 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, 3741 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, 3742 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3743 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3744 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3745 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3746 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3747 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3748 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3749 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3750 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3751 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3752 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3753 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3754 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3755 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3756 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3757 /* gap */ 3758 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3759 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3760 /* gap */ 3761 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3762 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3763 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3764 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3765 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3766 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 3767 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, 3768 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, 3769 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, 3770 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3771 /* gap */ 3772 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3773 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3774 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3775 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3776 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, 3777 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3778 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3779 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3780 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3781 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3782 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3783 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3784 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3785 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3786 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3787 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3788 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, 3789 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3790 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3791 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3792 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3793 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3794 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3795 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3796 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3797 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3798 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3799 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3800 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3801 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3802 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3803 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3804 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3805 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3806 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3807 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3808 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 3809 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3810 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3811 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3812 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3813 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3814 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3815 /* gap */ 3816 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3817 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3818 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3819 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3820 /* gap */ 3821 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3822 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3823 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3824 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3825 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3826 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3827 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 3828 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3829 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3830 /* gap */ 3831 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3832 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3833 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3834 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3835 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 3836 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3837 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 3838 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 3839 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3840 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3841 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3842 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3843 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3844 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3845 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3846 /* gap */ 3847 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3848 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3849 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3850 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3851 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3852 /* gap */ 3853 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3854 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3855 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3856 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3857 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3858 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3859 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3860 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3861 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3862 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3863 /* gap */ 3864 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3865 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3866 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3867 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 3868 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 3869 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3870 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3871 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3872 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3873 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3874 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3875 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3876 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3877 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3878 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3879 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3880 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3881 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3882 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3883 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3884 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3885 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3886 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3887 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3888 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3889 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3890 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3891 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 3892 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 3893 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 3894 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 3895 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 3896 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 3897 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 3898 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 3899 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 3900 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 3901 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 3902 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 3903 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3904}; 3905