r600_shader.c revision 54e8dcaad65cbe3603730414fd8d76ac53f89a86
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_info.h" 25#include "tgsi/tgsi_parse.h" 26#include "tgsi/tgsi_scan.h" 27#include "tgsi/tgsi_dump.h" 28#include "util/u_format.h" 29#include "r600_pipe.h" 30#include "r600_asm.h" 31#include "r600_sq.h" 32#include "r600_formats.h" 33#include "r600_opcodes.h" 34#include "r600d.h" 35#include <stdio.h> 36#include <errno.h> 37#include <byteswap.h> 38 39/* CAYMAN notes 40Why CAYMAN got loops for lots of instructions is explained here. 41 42-These 8xx t-slot only ops are implemented in all vector slots. 43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 44These 8xx t-slot only opcodes become vector ops, with all four 45slots expecting the arguments on sources a and b. Result is 46broadcast to all channels. 47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT 48These 8xx t-slot only opcodes become vector ops in the z, y, and 49x slots. 50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 52SQRT_IEEE/_64 53SIN/COS 54The w slot may have an independent co-issued operation, or if the 55result is required to be in the w slot, the opcode above may be 56issued in the w slot as well. 57The compiler must issue the source argument to slots z, y, and x 58*/ 59 60static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 61{ 62 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 63 struct r600_shader *rshader = &shader->shader; 64 uint32_t *ptr; 65 int i; 66 67 /* copy new shader */ 68 if (shader->bo == NULL) { 69 shader->bo = (struct r600_resource*) 70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4); 71 if (shader->bo == NULL) { 72 return -ENOMEM; 73 } 74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->ctx.cs, PIPE_TRANSFER_WRITE); 75 if (R600_BIG_ENDIAN) { 76 for (i = 0; i < rshader->bc.ndw; ++i) { 77 ptr[i] = bswap_32(rshader->bc.bytecode[i]); 78 } 79 } else { 80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr)); 81 } 82 rctx->ws->buffer_unmap(shader->bo->buf); 83 } 84 /* build state */ 85 switch (rshader->processor_type) { 86 case TGSI_PROCESSOR_VERTEX: 87 if (rctx->chip_class >= EVERGREEN) { 88 evergreen_pipe_shader_vs(ctx, shader); 89 } else { 90 r600_pipe_shader_vs(ctx, shader); 91 } 92 break; 93 case TGSI_PROCESSOR_FRAGMENT: 94 if (rctx->chip_class >= EVERGREEN) { 95 evergreen_pipe_shader_ps(ctx, shader); 96 } else { 97 r600_pipe_shader_ps(ctx, shader); 98 } 99 break; 100 default: 101 return -EINVAL; 102 } 103 return 0; 104} 105 106static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader); 107 108int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader) 109{ 110 static int dump_shaders = -1; 111 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 112 int r; 113 114 /* Would like some magic "get_bool_option_once" routine. 115 */ 116 if (dump_shaders == -1) 117 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 118 119 if (dump_shaders) { 120 fprintf(stderr, "--------------------------------------------------------------\n"); 121 tgsi_dump(shader->tokens, 0); 122 123 if (shader->so.num_outputs) { 124 unsigned i; 125 fprintf(stderr, "STREAMOUT\n"); 126 for (i = 0; i < shader->so.num_outputs; i++) { 127 unsigned mask = ((1 << shader->so.output[i].num_components) - 1) << 128 shader->so.output[i].start_component; 129 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i, 130 shader->so.output[i].output_buffer, shader->so.output[i].register_index, 131 mask & 1 ? "x" : "_", 132 (mask >> 1) & 1 ? "y" : "_", 133 (mask >> 2) & 1 ? "z" : "_", 134 (mask >> 3) & 1 ? "w" : "_"); 135 } 136 } 137 } 138 r = r600_shader_from_tgsi(rctx, shader); 139 if (r) { 140 R600_ERR("translation from TGSI failed !\n"); 141 return r; 142 } 143 r = r600_bytecode_build(&shader->shader.bc); 144 if (r) { 145 R600_ERR("building bytecode failed !\n"); 146 return r; 147 } 148 if (dump_shaders) { 149 r600_bytecode_dump(&shader->shader.bc); 150 fprintf(stderr, "______________________________________________________________\n"); 151 } 152 return r600_pipe_shader(ctx, shader); 153} 154 155void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 156{ 157 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 158 r600_bytecode_clear(&shader->shader.bc); 159 160 memset(&shader->shader,0,sizeof(struct r600_shader)); 161} 162 163/* 164 * tgsi -> r600 shader 165 */ 166struct r600_shader_tgsi_instruction; 167 168struct r600_shader_src { 169 unsigned sel; 170 unsigned swizzle[4]; 171 unsigned neg; 172 unsigned abs; 173 unsigned rel; 174 uint32_t value[4]; 175}; 176 177struct r600_shader_ctx { 178 struct tgsi_shader_info info; 179 struct tgsi_parse_context parse; 180 const struct tgsi_token *tokens; 181 unsigned type; 182 unsigned file_offset[TGSI_FILE_COUNT]; 183 unsigned temp_reg; 184 struct r600_shader_tgsi_instruction *inst_info; 185 struct r600_bytecode *bc; 186 struct r600_shader *shader; 187 struct r600_shader_src src[4]; 188 u32 *literals; 189 u32 nliterals; 190 u32 max_driver_temp_used; 191 /* needed for evergreen interpolation */ 192 boolean input_centroid; 193 boolean input_linear; 194 boolean input_perspective; 195 int num_interp_gpr; 196 int face_gpr; 197 int colors_used; 198 boolean clip_vertex_write; 199 unsigned cv_output; 200}; 201 202struct r600_shader_tgsi_instruction { 203 unsigned tgsi_opcode; 204 unsigned is_op3; 205 unsigned r600_opcode; 206 int (*process)(struct r600_shader_ctx *ctx); 207}; 208 209static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 210static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 211 212static int tgsi_is_supported(struct r600_shader_ctx *ctx) 213{ 214 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 215 int j; 216 217 if (i->Instruction.NumDstRegs > 1) { 218 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 219 return -EINVAL; 220 } 221 if (i->Instruction.Predicate) { 222 R600_ERR("predicate unsupported\n"); 223 return -EINVAL; 224 } 225#if 0 226 if (i->Instruction.Label) { 227 R600_ERR("label unsupported\n"); 228 return -EINVAL; 229 } 230#endif 231 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 232 if (i->Src[j].Register.Dimension) { 233 R600_ERR("unsupported src %d (dimension %d)\n", j, 234 i->Src[j].Register.Dimension); 235 return -EINVAL; 236 } 237 } 238 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 239 if (i->Dst[j].Register.Dimension) { 240 R600_ERR("unsupported dst (dimension)\n"); 241 return -EINVAL; 242 } 243 } 244 return 0; 245} 246 247static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 248{ 249 int i, r; 250 struct r600_bytecode_alu alu; 251 int gpr = 0, base_chan = 0; 252 int ij_index = 0; 253 254 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 255 ij_index = 0; 256 if (ctx->shader->input[input].centroid) 257 ij_index++; 258 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 259 ij_index = 0; 260 /* if we have perspective add one */ 261 if (ctx->input_perspective) { 262 ij_index++; 263 /* if we have perspective centroid */ 264 if (ctx->input_centroid) 265 ij_index++; 266 } 267 if (ctx->shader->input[input].centroid) 268 ij_index++; 269 } 270 271 /* work out gpr and base_chan from index */ 272 gpr = ij_index / 2; 273 base_chan = (2 * (ij_index % 2)) + 1; 274 275 for (i = 0; i < 8; i++) { 276 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 277 278 if (i < 4) 279 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 280 else 281 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 282 283 if ((i > 1) && (i < 6)) { 284 alu.dst.sel = ctx->shader->input[input].gpr; 285 alu.dst.write = 1; 286 } 287 288 alu.dst.chan = i % 4; 289 290 alu.src[0].sel = gpr; 291 alu.src[0].chan = (base_chan - (i % 2)); 292 293 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 294 295 alu.bank_swizzle_force = SQ_ALU_VEC_210; 296 if ((i % 4) == 3) 297 alu.last = 1; 298 r = r600_bytecode_add_alu(ctx->bc, &alu); 299 if (r) 300 return r; 301 } 302 return 0; 303} 304 305static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 306{ 307 int i, r; 308 struct r600_bytecode_alu alu; 309 310 for (i = 0; i < 4; i++) { 311 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 312 313 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_LOAD_P0; 314 315 alu.dst.sel = ctx->shader->input[input].gpr; 316 alu.dst.write = 1; 317 318 alu.dst.chan = i; 319 320 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 321 alu.src[0].chan = i; 322 323 if (i == 3) 324 alu.last = 1; 325 r = r600_bytecode_add_alu(ctx->bc, &alu); 326 if (r) 327 return r; 328 } 329 return 0; 330} 331 332/* 333 * Special export handling in shaders 334 * 335 * shader export ARRAY_BASE for EXPORT_POS: 336 * 60 is position 337 * 61 is misc vector 338 * 62, 63 are clip distance vectors 339 * 340 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 341 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 342 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 343 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 344 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 345 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 346 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 347 * exclusive from render target index) 348 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 349 * 350 * 351 * shader export ARRAY_BASE for EXPORT_PIXEL: 352 * 0-7 CB targets 353 * 61 computed Z vector 354 * 355 * The use of the values exported in the computed Z vector are controlled 356 * by DB_SHADER_CONTROL: 357 * Z_EXPORT_ENABLE - Z as a float in RED 358 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 359 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 360 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 361 * DB_SOURCE_FORMAT - export control restrictions 362 * 363 */ 364 365 366/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 367static int r600_spi_sid(struct r600_shader_io * io) 368{ 369 int index, name = io->name; 370 371 /* These params are handled differently, they don't need 372 * semantic indices, so we'll use 0 for them. 373 */ 374 if (name == TGSI_SEMANTIC_POSITION || 375 name == TGSI_SEMANTIC_PSIZE || 376 name == TGSI_SEMANTIC_FACE) 377 index = 0; 378 else { 379 if (name == TGSI_SEMANTIC_GENERIC) { 380 /* For generic params simply use sid from tgsi */ 381 index = io->sid; 382 } else { 383 /* For non-generic params - pack name and sid into 8 bits */ 384 index = 0x80 | (name<<3) | (io->sid); 385 } 386 387 /* Make sure that all really used indices have nonzero value, so 388 * we can just compare it to 0 later instead of comparing the name 389 * with different values to detect special cases. */ 390 index++; 391 } 392 393 return index; 394}; 395 396/* turn input into interpolate on EG */ 397static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 398{ 399 int r = 0; 400 401 if (ctx->shader->input[index].spi_sid) { 402 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 403 if (ctx->shader->input[index].interpolate > 0) { 404 r = evergreen_interp_alu(ctx, index); 405 } else { 406 r = evergreen_interp_flat(ctx, index); 407 } 408 } 409 return r; 410} 411 412static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 413{ 414 struct r600_bytecode_alu alu; 415 int i, r; 416 int gpr_front = ctx->shader->input[front].gpr; 417 int gpr_back = ctx->shader->input[back].gpr; 418 419 for (i = 0; i < 4; i++) { 420 memset(&alu, 0, sizeof(alu)); 421 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 422 alu.is_op3 = 1; 423 alu.dst.write = 1; 424 alu.dst.sel = gpr_front; 425 alu.src[0].sel = ctx->face_gpr; 426 alu.src[1].sel = gpr_front; 427 alu.src[2].sel = gpr_back; 428 429 alu.dst.chan = i; 430 alu.src[1].chan = i; 431 alu.src[2].chan = i; 432 alu.last = (i==3); 433 434 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 435 return r; 436 } 437 438 return 0; 439} 440 441static int tgsi_declaration(struct r600_shader_ctx *ctx) 442{ 443 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 444 unsigned i; 445 int r; 446 447 switch (d->Declaration.File) { 448 case TGSI_FILE_INPUT: 449 i = ctx->shader->ninput++; 450 ctx->shader->input[i].name = d->Semantic.Name; 451 ctx->shader->input[i].sid = d->Semantic.Index; 452 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 453 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 454 ctx->shader->input[i].centroid = d->Declaration.Centroid; 455 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; 456 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 457 if (ctx->shader->input[i].name == TGSI_SEMANTIC_FACE) 458 ctx->face_gpr = ctx->shader->input[i].gpr; 459 else if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) 460 ctx->colors_used++; 461 if (ctx->bc->chip_class >= EVERGREEN) { 462 r = evergreen_interp_input(ctx, i); 463 if (r) 464 return r; 465 } 466 } 467 break; 468 case TGSI_FILE_OUTPUT: 469 i = ctx->shader->noutput++; 470 ctx->shader->output[i].name = d->Semantic.Name; 471 ctx->shader->output[i].sid = d->Semantic.Index; 472 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 473 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; 474 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 475 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 476 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 477 switch (d->Semantic.Name) { 478 case TGSI_SEMANTIC_CLIPDIST: 479 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2); 480 break; 481 case TGSI_SEMANTIC_PSIZE: 482 ctx->shader->vs_out_misc_write = 1; 483 break; 484 case TGSI_SEMANTIC_CLIPVERTEX: 485 ctx->clip_vertex_write = TRUE; 486 ctx->cv_output = i; 487 break; 488 } 489 } 490 break; 491 case TGSI_FILE_CONSTANT: 492 case TGSI_FILE_TEMPORARY: 493 case TGSI_FILE_SAMPLER: 494 case TGSI_FILE_ADDRESS: 495 break; 496 497 case TGSI_FILE_SYSTEM_VALUE: 498 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 499 struct r600_bytecode_alu alu; 500 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 501 502 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 503 alu.src[0].sel = 0; 504 alu.src[0].chan = 3; 505 506 alu.dst.sel = 0; 507 alu.dst.chan = 3; 508 alu.dst.write = 1; 509 alu.last = 1; 510 511 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 512 return r; 513 break; 514 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 515 break; 516 default: 517 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 518 return -EINVAL; 519 } 520 return 0; 521} 522 523static int r600_get_temp(struct r600_shader_ctx *ctx) 524{ 525 return ctx->temp_reg + ctx->max_driver_temp_used++; 526} 527 528/* 529 * for evergreen we need to scan the shader to find the number of GPRs we need to 530 * reserve for interpolation. 531 * 532 * we need to know if we are going to emit 533 * any centroid inputs 534 * if perspective and linear are required 535*/ 536static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 537{ 538 int i; 539 int num_baryc; 540 541 ctx->input_linear = FALSE; 542 ctx->input_perspective = FALSE; 543 ctx->input_centroid = FALSE; 544 ctx->num_interp_gpr = 1; 545 546 /* any centroid inputs */ 547 for (i = 0; i < ctx->info.num_inputs; i++) { 548 /* skip position/face */ 549 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 550 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 551 continue; 552 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 553 ctx->input_linear = TRUE; 554 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 555 ctx->input_perspective = TRUE; 556 if (ctx->info.input_centroid[i]) 557 ctx->input_centroid = TRUE; 558 } 559 560 num_baryc = 0; 561 /* ignoring sample for now */ 562 if (ctx->input_perspective) 563 num_baryc++; 564 if (ctx->input_linear) 565 num_baryc++; 566 if (ctx->input_centroid) 567 num_baryc *= 2; 568 569 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 570 571 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 572 return ctx->num_interp_gpr; 573} 574 575static void tgsi_src(struct r600_shader_ctx *ctx, 576 const struct tgsi_full_src_register *tgsi_src, 577 struct r600_shader_src *r600_src) 578{ 579 memset(r600_src, 0, sizeof(*r600_src)); 580 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 581 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 582 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 583 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 584 r600_src->neg = tgsi_src->Register.Negate; 585 r600_src->abs = tgsi_src->Register.Absolute; 586 587 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 588 int index; 589 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 590 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 591 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 592 593 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 594 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 595 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 596 return; 597 } 598 index = tgsi_src->Register.Index; 599 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 600 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 601 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 602 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 603 r600_src->swizzle[0] = 3; 604 r600_src->swizzle[1] = 3; 605 r600_src->swizzle[2] = 3; 606 r600_src->swizzle[3] = 3; 607 r600_src->sel = 0; 608 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 609 r600_src->swizzle[0] = 0; 610 r600_src->swizzle[1] = 0; 611 r600_src->swizzle[2] = 0; 612 r600_src->swizzle[3] = 0; 613 r600_src->sel = 0; 614 } 615 } else { 616 if (tgsi_src->Register.Indirect) 617 r600_src->rel = V_SQ_REL_RELATIVE; 618 r600_src->sel = tgsi_src->Register.Index; 619 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 620 } 621} 622 623static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 624{ 625 struct r600_bytecode_vtx vtx; 626 unsigned int ar_reg; 627 int r; 628 629 if (offset) { 630 struct r600_bytecode_alu alu; 631 632 memset(&alu, 0, sizeof(alu)); 633 634 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 635 alu.src[0].sel = ctx->bc->ar_reg; 636 637 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 638 alu.src[1].value = offset; 639 640 alu.dst.sel = dst_reg; 641 alu.dst.write = 1; 642 alu.last = 1; 643 644 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 645 return r; 646 647 ar_reg = dst_reg; 648 } else { 649 ar_reg = ctx->bc->ar_reg; 650 } 651 652 memset(&vtx, 0, sizeof(vtx)); 653 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 654 vtx.src_gpr = ar_reg; 655 vtx.mega_fetch_count = 16; 656 vtx.dst_gpr = dst_reg; 657 vtx.dst_sel_x = 0; /* SEL_X */ 658 vtx.dst_sel_y = 1; /* SEL_Y */ 659 vtx.dst_sel_z = 2; /* SEL_Z */ 660 vtx.dst_sel_w = 3; /* SEL_W */ 661 vtx.data_format = FMT_32_32_32_32_FLOAT; 662 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 663 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 664 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 665 vtx.endian = r600_endian_swap(32); 666 667 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 668 return r; 669 670 return 0; 671} 672 673static int tgsi_split_constant(struct r600_shader_ctx *ctx) 674{ 675 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 676 struct r600_bytecode_alu alu; 677 int i, j, k, nconst, r; 678 679 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 680 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 681 nconst++; 682 } 683 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 684 } 685 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 686 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 687 continue; 688 } 689 690 if (ctx->src[i].rel) { 691 int treg = r600_get_temp(ctx); 692 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 693 return r; 694 695 ctx->src[i].sel = treg; 696 ctx->src[i].rel = 0; 697 j--; 698 } else if (j > 0) { 699 int treg = r600_get_temp(ctx); 700 for (k = 0; k < 4; k++) { 701 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 702 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 703 alu.src[0].sel = ctx->src[i].sel; 704 alu.src[0].chan = k; 705 alu.src[0].rel = ctx->src[i].rel; 706 alu.dst.sel = treg; 707 alu.dst.chan = k; 708 alu.dst.write = 1; 709 if (k == 3) 710 alu.last = 1; 711 r = r600_bytecode_add_alu(ctx->bc, &alu); 712 if (r) 713 return r; 714 } 715 ctx->src[i].sel = treg; 716 ctx->src[i].rel =0; 717 j--; 718 } 719 } 720 return 0; 721} 722 723/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 724static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 725{ 726 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 727 struct r600_bytecode_alu alu; 728 int i, j, k, nliteral, r; 729 730 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 731 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 732 nliteral++; 733 } 734 } 735 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 736 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 737 int treg = r600_get_temp(ctx); 738 for (k = 0; k < 4; k++) { 739 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 740 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 741 alu.src[0].sel = ctx->src[i].sel; 742 alu.src[0].chan = k; 743 alu.src[0].value = ctx->src[i].value[k]; 744 alu.dst.sel = treg; 745 alu.dst.chan = k; 746 alu.dst.write = 1; 747 if (k == 3) 748 alu.last = 1; 749 r = r600_bytecode_add_alu(ctx->bc, &alu); 750 if (r) 751 return r; 752 } 753 ctx->src[i].sel = treg; 754 j--; 755 } 756 } 757 return 0; 758} 759 760static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 761{ 762 int i, r, count = ctx->shader->ninput; 763 764 /* additional inputs will be allocated right after the existing inputs, 765 * we won't need them after the color selection, so we don't need to 766 * reserve these gprs for the rest of the shader code and to adjust 767 * output offsets etc. */ 768 int gpr = ctx->file_offset[TGSI_FILE_INPUT] + 769 ctx->info.file_max[TGSI_FILE_INPUT] + 1; 770 771 if (ctx->face_gpr == -1) { 772 i = ctx->shader->ninput++; 773 ctx->shader->input[i].name = TGSI_SEMANTIC_FACE; 774 ctx->shader->input[i].spi_sid = 0; 775 ctx->shader->input[i].gpr = gpr++; 776 ctx->face_gpr = ctx->shader->input[i].gpr; 777 } 778 779 for (i = 0; i < count; i++) { 780 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 781 int ni = ctx->shader->ninput++; 782 memcpy(&ctx->shader->input[ni],&ctx->shader->input[i], sizeof(struct r600_shader_io)); 783 ctx->shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 784 ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]); 785 ctx->shader->input[ni].gpr = gpr++; 786 787 if (ctx->bc->chip_class >= EVERGREEN) { 788 r = evergreen_interp_input(ctx, ni); 789 if (r) 790 return r; 791 } 792 793 r = select_twoside_color(ctx, i, ni); 794 if (r) 795 return r; 796 } 797 } 798 return 0; 799} 800 801static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader) 802{ 803 struct r600_shader *shader = &pipeshader->shader; 804 struct tgsi_token *tokens = pipeshader->tokens; 805 struct pipe_stream_output_info so = pipeshader->so; 806 struct tgsi_full_immediate *immediate; 807 struct tgsi_full_property *property; 808 struct r600_shader_ctx ctx; 809 struct r600_bytecode_output output[32]; 810 unsigned output_done, noutput; 811 unsigned opcode; 812 int i, j, k, r = 0; 813 int next_pixel_base = 0, next_pos_base = 60, next_param_base = 0; 814 815 ctx.bc = &shader->bc; 816 ctx.shader = shader; 817 r600_bytecode_init(ctx.bc, rctx->chip_class, rctx->family); 818 ctx.tokens = tokens; 819 tgsi_scan_shader(tokens, &ctx.info); 820 tgsi_parse_init(&ctx.parse, tokens); 821 ctx.type = ctx.parse.FullHeader.Processor.Processor; 822 shader->processor_type = ctx.type; 823 ctx.bc->type = shader->processor_type; 824 825 ctx.face_gpr = -1; 826 ctx.colors_used = 0; 827 ctx.clip_vertex_write = 0; 828 829 shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side; 830 831 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) || 832 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color)); 833 834 shader->nr_cbufs = rctx->nr_cbufs; 835 836 /* register allocations */ 837 /* Values [0,127] correspond to GPR[0..127]. 838 * Values [128,159] correspond to constant buffer bank 0 839 * Values [160,191] correspond to constant buffer bank 1 840 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 841 * Values [256,287] correspond to constant buffer bank 2 (EG) 842 * Values [288,319] correspond to constant buffer bank 3 (EG) 843 * Other special values are shown in the list below. 844 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 845 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 846 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 847 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 848 * 248 SQ_ALU_SRC_0: special constant 0.0. 849 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 850 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 851 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 852 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 853 * 253 SQ_ALU_SRC_LITERAL: literal constant. 854 * 254 SQ_ALU_SRC_PV: previous vector result. 855 * 255 SQ_ALU_SRC_PS: previous scalar result. 856 */ 857 for (i = 0; i < TGSI_FILE_COUNT; i++) { 858 ctx.file_offset[i] = 0; 859 } 860 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 861 ctx.file_offset[TGSI_FILE_INPUT] = 1; 862 if (ctx.bc->chip_class >= EVERGREEN) { 863 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 864 } else { 865 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 866 } 867 } 868 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { 869 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 870 } 871 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 872 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 873 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 874 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 875 876 /* Outside the GPR range. This will be translated to one of the 877 * kcache banks later. */ 878 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 879 880 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 881 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 882 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 883 ctx.temp_reg = ctx.bc->ar_reg + 1; 884 885 ctx.nliterals = 0; 886 ctx.literals = NULL; 887 shader->fs_write_all = FALSE; 888 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 889 tgsi_parse_token(&ctx.parse); 890 switch (ctx.parse.FullToken.Token.Type) { 891 case TGSI_TOKEN_TYPE_IMMEDIATE: 892 immediate = &ctx.parse.FullToken.FullImmediate; 893 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 894 if(ctx.literals == NULL) { 895 r = -ENOMEM; 896 goto out_err; 897 } 898 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 899 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 900 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 901 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 902 ctx.nliterals++; 903 break; 904 case TGSI_TOKEN_TYPE_DECLARATION: 905 r = tgsi_declaration(&ctx); 906 if (r) 907 goto out_err; 908 break; 909 case TGSI_TOKEN_TYPE_INSTRUCTION: 910 break; 911 case TGSI_TOKEN_TYPE_PROPERTY: 912 property = &ctx.parse.FullToken.FullProperty; 913 switch (property->Property.PropertyName) { 914 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: 915 if (property->u[0].Data == 1) 916 shader->fs_write_all = TRUE; 917 break; 918 case TGSI_PROPERTY_VS_PROHIBIT_UCPS: 919 if (property->u[0].Data == 1) 920 shader->vs_prohibit_ucps = TRUE; 921 break; 922 } 923 break; 924 default: 925 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 926 r = -EINVAL; 927 goto out_err; 928 } 929 } 930 931 if (shader->two_side && ctx.colors_used) { 932 if ((r = process_twoside_color_inputs(&ctx))) 933 return r; 934 } 935 936 tgsi_parse_init(&ctx.parse, tokens); 937 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 938 tgsi_parse_token(&ctx.parse); 939 switch (ctx.parse.FullToken.Token.Type) { 940 case TGSI_TOKEN_TYPE_INSTRUCTION: 941 r = tgsi_is_supported(&ctx); 942 if (r) 943 goto out_err; 944 ctx.max_driver_temp_used = 0; 945 /* reserve first tmp for everyone */ 946 r600_get_temp(&ctx); 947 948 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 949 if ((r = tgsi_split_constant(&ctx))) 950 goto out_err; 951 if ((r = tgsi_split_literal_constant(&ctx))) 952 goto out_err; 953 if (ctx.bc->chip_class == CAYMAN) 954 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 955 else if (ctx.bc->chip_class >= EVERGREEN) 956 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 957 else 958 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 959 r = ctx.inst_info->process(&ctx); 960 if (r) 961 goto out_err; 962 break; 963 default: 964 break; 965 } 966 } 967 968 noutput = shader->noutput; 969 970 if (ctx.clip_vertex_write) { 971 /* need to convert a clipvertex write into clipdistance writes and not export 972 the clip vertex anymore */ 973 974 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 975 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 976 shader->output[noutput].gpr = ctx.temp_reg; 977 noutput++; 978 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 979 shader->output[noutput].gpr = ctx.temp_reg+1; 980 noutput++; 981 982 shader->clip_dist_write = 0xFF; 983 984 for (i = 0; i < 8; i++) { 985 int oreg = i >> 2; 986 int ochan = i & 3; 987 988 for (j = 0; j < 4; j++) { 989 struct r600_bytecode_alu alu; 990 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 991 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4); 992 alu.src[0].sel = shader->output[ctx.cv_output].gpr; 993 alu.src[0].chan = j; 994 995 alu.src[1].sel = 512 + i; 996 alu.src[1].kc_bank = 1; 997 alu.src[1].chan = j; 998 999 alu.dst.sel = ctx.temp_reg + oreg; 1000 alu.dst.chan = j; 1001 alu.dst.write = (j == ochan); 1002 if (j == 3) 1003 alu.last = 1; 1004 r = r600_bytecode_add_alu(ctx.bc, &alu); 1005 if (r) 1006 return r; 1007 } 1008 } 1009 } 1010 1011 /* clamp color outputs */ 1012 if (shader->clamp_color) { 1013 for (i = 0; i < noutput; i++) { 1014 if (shader->output[i].name == TGSI_SEMANTIC_COLOR || 1015 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) { 1016 1017 int j; 1018 for (j = 0; j < 4; j++) { 1019 struct r600_bytecode_alu alu; 1020 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1021 1022 /* MOV_SAT R, R */ 1023 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1024 alu.dst.sel = shader->output[i].gpr; 1025 alu.dst.chan = j; 1026 alu.dst.write = 1; 1027 alu.dst.clamp = 1; 1028 alu.src[0].sel = alu.dst.sel; 1029 alu.src[0].chan = j; 1030 1031 if (j == 3) { 1032 alu.last = 1; 1033 } 1034 r = r600_bytecode_add_alu(ctx.bc, &alu); 1035 if (r) 1036 return r; 1037 } 1038 } 1039 } 1040 } 1041 1042 /* Add stream outputs. */ 1043 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) { 1044 for (i = 0; i < so.num_outputs; i++) { 1045 struct r600_bytecode_output output; 1046 1047 if (so.output[i].output_buffer >= 4) { 1048 R600_ERR("exceeded the max number of stream output buffers, got: %d\n", 1049 so.output[i].output_buffer); 1050 r = -EINVAL; 1051 goto out_err; 1052 } 1053 if (so.output[i].start_component) { 1054 R600_ERR("stream_output - start_component cannot be non-zero\n"); 1055 r = -EINVAL; 1056 goto out_err; 1057 } 1058 1059 memset(&output, 0, sizeof(struct r600_bytecode_output)); 1060 output.gpr = shader->output[so.output[i].register_index].gpr; 1061 output.elem_size = 0; 1062 output.array_base = so.output[i].dst_offset; 1063 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 1064 output.burst_count = 1; 1065 output.barrier = 1; 1066 /* array_size is an upper limit for the burst_count 1067 * with MEM_STREAM instructions */ 1068 output.array_size = 0xFFF; 1069 output.comp_mask = (1 << so.output[i].num_components) - 1; 1070 if (ctx.bc->chip_class >= EVERGREEN) { 1071 switch (so.output[i].output_buffer) { 1072 case 0: 1073 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0; 1074 break; 1075 case 1: 1076 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1; 1077 break; 1078 case 2: 1079 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2; 1080 break; 1081 case 3: 1082 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3; 1083 break; 1084 } 1085 } else { 1086 switch (so.output[i].output_buffer) { 1087 case 0: 1088 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0; 1089 break; 1090 case 1: 1091 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1; 1092 break; 1093 case 2: 1094 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2; 1095 break; 1096 case 3: 1097 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3; 1098 break; 1099 } 1100 } 1101 r = r600_bytecode_add_output(ctx.bc, &output); 1102 if (r) 1103 goto out_err; 1104 } 1105 } 1106 1107 /* export output */ 1108 for (i = 0, j = 0; i < noutput; i++, j++) { 1109 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1110 output[j].gpr = shader->output[i].gpr; 1111 output[j].elem_size = 3; 1112 output[j].swizzle_x = 0; 1113 output[j].swizzle_y = 1; 1114 output[j].swizzle_z = 2; 1115 output[j].swizzle_w = 3; 1116 output[j].burst_count = 1; 1117 output[j].barrier = 1; 1118 output[j].type = -1; 1119 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1120 switch (ctx.type) { 1121 case TGSI_PROCESSOR_VERTEX: 1122 switch (shader->output[i].name) { 1123 case TGSI_SEMANTIC_POSITION: 1124 output[j].array_base = next_pos_base++; 1125 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1126 break; 1127 1128 case TGSI_SEMANTIC_PSIZE: 1129 output[j].array_base = next_pos_base++; 1130 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1131 break; 1132 case TGSI_SEMANTIC_CLIPVERTEX: 1133 j--; 1134 break; 1135 case TGSI_SEMANTIC_CLIPDIST: 1136 output[j].array_base = next_pos_base++; 1137 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1138 /* spi_sid is 0 for clipdistance outputs that were generated 1139 * for clipvertex - we don't need to pass them to PS */ 1140 if (shader->output[i].spi_sid) { 1141 j++; 1142 /* duplicate it as PARAM to pass to the pixel shader */ 1143 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 1144 output[j].array_base = next_param_base++; 1145 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1146 } 1147 break; 1148 } 1149 break; 1150 case TGSI_PROCESSOR_FRAGMENT: 1151 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 1152 output[j].array_base = next_pixel_base++; 1153 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1154 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { 1155 for (k = 1; k < shader->nr_cbufs; k++) { 1156 j++; 1157 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1158 output[j].gpr = shader->output[i].gpr; 1159 output[j].elem_size = 3; 1160 output[j].swizzle_x = 0; 1161 output[j].swizzle_y = 1; 1162 output[j].swizzle_z = 2; 1163 output[j].swizzle_w = 3; 1164 output[j].burst_count = 1; 1165 output[j].barrier = 1; 1166 output[j].array_base = next_pixel_base++; 1167 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1168 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1169 } 1170 } 1171 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 1172 output[j].array_base = 61; 1173 output[j].swizzle_x = 2; 1174 output[j].swizzle_y = 7; 1175 output[j].swizzle_z = output[j].swizzle_w = 7; 1176 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1177 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 1178 output[j].array_base = 61; 1179 output[j].swizzle_x = 7; 1180 output[j].swizzle_y = 1; 1181 output[j].swizzle_z = output[j].swizzle_w = 7; 1182 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1183 } else { 1184 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 1185 r = -EINVAL; 1186 goto out_err; 1187 } 1188 break; 1189 default: 1190 R600_ERR("unsupported processor type %d\n", ctx.type); 1191 r = -EINVAL; 1192 goto out_err; 1193 } 1194 1195 if (output[j].type==-1) { 1196 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1197 output[j].array_base = next_param_base++; 1198 } 1199 } 1200 1201 /* add fake param output for vertex shader if no param is exported */ 1202 if (ctx.type == TGSI_PROCESSOR_VERTEX && next_param_base == 0) { 1203 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1204 output[j].gpr = 0; 1205 output[j].elem_size = 3; 1206 output[j].swizzle_x = 7; 1207 output[j].swizzle_y = 7; 1208 output[j].swizzle_z = 7; 1209 output[j].swizzle_w = 7; 1210 output[j].burst_count = 1; 1211 output[j].barrier = 1; 1212 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1213 output[j].array_base = 0; 1214 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1215 j++; 1216 } 1217 1218 /* add fake pixel export */ 1219 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && j == 0) { 1220 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 1221 output[j].gpr = 0; 1222 output[j].elem_size = 3; 1223 output[j].swizzle_x = 7; 1224 output[j].swizzle_y = 7; 1225 output[j].swizzle_z = 7; 1226 output[j].swizzle_w = 7; 1227 output[j].burst_count = 1; 1228 output[j].barrier = 1; 1229 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1230 output[j].array_base = 0; 1231 output[j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1232 j++; 1233 } 1234 1235 noutput = j; 1236 1237 /* set export done on last export of each type */ 1238 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 1239 if (ctx.bc->chip_class < CAYMAN) { 1240 if (i == (noutput - 1)) { 1241 output[i].end_of_program = 1; 1242 } 1243 } 1244 if (!(output_done & (1 << output[i].type))) { 1245 output_done |= (1 << output[i].type); 1246 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 1247 } 1248 } 1249 /* add output to bytecode */ 1250 for (i = 0; i < noutput; i++) { 1251 r = r600_bytecode_add_output(ctx.bc, &output[i]); 1252 if (r) 1253 goto out_err; 1254 } 1255 /* add program end */ 1256 if (ctx.bc->chip_class == CAYMAN) 1257 cm_bytecode_add_cf_end(ctx.bc); 1258 1259 free(ctx.literals); 1260 tgsi_parse_free(&ctx.parse); 1261 return 0; 1262out_err: 1263 free(ctx.literals); 1264 tgsi_parse_free(&ctx.parse); 1265 return r; 1266} 1267 1268static int tgsi_unsupported(struct r600_shader_ctx *ctx) 1269{ 1270 R600_ERR("%s tgsi opcode unsupported\n", 1271 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); 1272 return -EINVAL; 1273} 1274 1275static int tgsi_end(struct r600_shader_ctx *ctx) 1276{ 1277 return 0; 1278} 1279 1280static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 1281 const struct r600_shader_src *shader_src, 1282 unsigned chan) 1283{ 1284 bc_src->sel = shader_src->sel; 1285 bc_src->chan = shader_src->swizzle[chan]; 1286 bc_src->neg = shader_src->neg; 1287 bc_src->abs = shader_src->abs; 1288 bc_src->rel = shader_src->rel; 1289 bc_src->value = shader_src->value[bc_src->chan]; 1290} 1291 1292static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 1293{ 1294 bc_src->abs = 1; 1295 bc_src->neg = 0; 1296} 1297 1298static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 1299{ 1300 bc_src->neg = !bc_src->neg; 1301} 1302 1303static void tgsi_dst(struct r600_shader_ctx *ctx, 1304 const struct tgsi_full_dst_register *tgsi_dst, 1305 unsigned swizzle, 1306 struct r600_bytecode_alu_dst *r600_dst) 1307{ 1308 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1309 1310 r600_dst->sel = tgsi_dst->Register.Index; 1311 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 1312 r600_dst->chan = swizzle; 1313 r600_dst->write = 1; 1314 if (tgsi_dst->Register.Indirect) 1315 r600_dst->rel = V_SQ_REL_RELATIVE; 1316 if (inst->Instruction.Saturate) { 1317 r600_dst->clamp = 1; 1318 } 1319} 1320 1321static int tgsi_last_instruction(unsigned writemask) 1322{ 1323 int i, lasti = 0; 1324 1325 for (i = 0; i < 4; i++) { 1326 if (writemask & (1 << i)) { 1327 lasti = i; 1328 } 1329 } 1330 return lasti; 1331} 1332 1333static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 1334{ 1335 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1336 struct r600_bytecode_alu alu; 1337 int i, j, r; 1338 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1339 1340 for (i = 0; i < lasti + 1; i++) { 1341 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1342 continue; 1343 1344 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1345 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1346 1347 alu.inst = ctx->inst_info->r600_opcode; 1348 if (!swap) { 1349 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1350 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1351 } 1352 } else { 1353 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 1354 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1355 } 1356 /* handle some special cases */ 1357 switch (ctx->inst_info->tgsi_opcode) { 1358 case TGSI_OPCODE_SUB: 1359 r600_bytecode_src_toggle_neg(&alu.src[1]); 1360 break; 1361 case TGSI_OPCODE_ABS: 1362 r600_bytecode_src_set_abs(&alu.src[0]); 1363 break; 1364 default: 1365 break; 1366 } 1367 if (i == lasti || trans_only) { 1368 alu.last = 1; 1369 } 1370 r = r600_bytecode_add_alu(ctx->bc, &alu); 1371 if (r) 1372 return r; 1373 } 1374 return 0; 1375} 1376 1377static int tgsi_op2(struct r600_shader_ctx *ctx) 1378{ 1379 return tgsi_op2_s(ctx, 0, 0); 1380} 1381 1382static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1383{ 1384 return tgsi_op2_s(ctx, 1, 0); 1385} 1386 1387static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 1388{ 1389 return tgsi_op2_s(ctx, 0, 1); 1390} 1391 1392static int tgsi_ineg(struct r600_shader_ctx *ctx) 1393{ 1394 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1395 struct r600_bytecode_alu alu; 1396 int i, r; 1397 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1398 1399 for (i = 0; i < lasti + 1; i++) { 1400 1401 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1402 continue; 1403 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1404 alu.inst = ctx->inst_info->r600_opcode; 1405 1406 alu.src[0].sel = V_SQ_ALU_SRC_0; 1407 1408 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1409 1410 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1411 1412 if (i == lasti) { 1413 alu.last = 1; 1414 } 1415 r = r600_bytecode_add_alu(ctx->bc, &alu); 1416 if (r) 1417 return r; 1418 } 1419 return 0; 1420 1421} 1422 1423static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 1424{ 1425 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1426 int i, j, r; 1427 struct r600_bytecode_alu alu; 1428 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1429 1430 for (i = 0 ; i < last_slot; i++) { 1431 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1432 alu.inst = ctx->inst_info->r600_opcode; 1433 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1434 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 1435 } 1436 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1437 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1438 1439 if (i == last_slot - 1) 1440 alu.last = 1; 1441 r = r600_bytecode_add_alu(ctx->bc, &alu); 1442 if (r) 1443 return r; 1444 } 1445 return 0; 1446} 1447 1448/* 1449 * r600 - trunc to -PI..PI range 1450 * r700 - normalize by dividing by 2PI 1451 * see fdo bug 27901 1452 */ 1453static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 1454{ 1455 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 1456 static float double_pi = 3.1415926535 * 2; 1457 static float neg_pi = -3.1415926535; 1458 1459 int r; 1460 struct r600_bytecode_alu alu; 1461 1462 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1463 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1464 alu.is_op3 = 1; 1465 1466 alu.dst.chan = 0; 1467 alu.dst.sel = ctx->temp_reg; 1468 alu.dst.write = 1; 1469 1470 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1471 1472 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1473 alu.src[1].chan = 0; 1474 alu.src[1].value = *(uint32_t *)&half_inv_pi; 1475 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1476 alu.src[2].chan = 0; 1477 alu.last = 1; 1478 r = r600_bytecode_add_alu(ctx->bc, &alu); 1479 if (r) 1480 return r; 1481 1482 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1483 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1484 1485 alu.dst.chan = 0; 1486 alu.dst.sel = ctx->temp_reg; 1487 alu.dst.write = 1; 1488 1489 alu.src[0].sel = ctx->temp_reg; 1490 alu.src[0].chan = 0; 1491 alu.last = 1; 1492 r = r600_bytecode_add_alu(ctx->bc, &alu); 1493 if (r) 1494 return r; 1495 1496 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1497 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1498 alu.is_op3 = 1; 1499 1500 alu.dst.chan = 0; 1501 alu.dst.sel = ctx->temp_reg; 1502 alu.dst.write = 1; 1503 1504 alu.src[0].sel = ctx->temp_reg; 1505 alu.src[0].chan = 0; 1506 1507 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1508 alu.src[1].chan = 0; 1509 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1510 alu.src[2].chan = 0; 1511 1512 if (ctx->bc->chip_class == R600) { 1513 alu.src[1].value = *(uint32_t *)&double_pi; 1514 alu.src[2].value = *(uint32_t *)&neg_pi; 1515 } else { 1516 alu.src[1].sel = V_SQ_ALU_SRC_1; 1517 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1518 alu.src[2].neg = 1; 1519 } 1520 1521 alu.last = 1; 1522 r = r600_bytecode_add_alu(ctx->bc, &alu); 1523 if (r) 1524 return r; 1525 return 0; 1526} 1527 1528static int cayman_trig(struct r600_shader_ctx *ctx) 1529{ 1530 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1531 struct r600_bytecode_alu alu; 1532 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1533 int i, r; 1534 1535 r = tgsi_setup_trig(ctx); 1536 if (r) 1537 return r; 1538 1539 1540 for (i = 0; i < last_slot; i++) { 1541 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1542 alu.inst = ctx->inst_info->r600_opcode; 1543 alu.dst.chan = i; 1544 1545 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1546 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1547 1548 alu.src[0].sel = ctx->temp_reg; 1549 alu.src[0].chan = 0; 1550 if (i == last_slot - 1) 1551 alu.last = 1; 1552 r = r600_bytecode_add_alu(ctx->bc, &alu); 1553 if (r) 1554 return r; 1555 } 1556 return 0; 1557} 1558 1559static int tgsi_trig(struct r600_shader_ctx *ctx) 1560{ 1561 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1562 struct r600_bytecode_alu alu; 1563 int i, r; 1564 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1565 1566 r = tgsi_setup_trig(ctx); 1567 if (r) 1568 return r; 1569 1570 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1571 alu.inst = ctx->inst_info->r600_opcode; 1572 alu.dst.chan = 0; 1573 alu.dst.sel = ctx->temp_reg; 1574 alu.dst.write = 1; 1575 1576 alu.src[0].sel = ctx->temp_reg; 1577 alu.src[0].chan = 0; 1578 alu.last = 1; 1579 r = r600_bytecode_add_alu(ctx->bc, &alu); 1580 if (r) 1581 return r; 1582 1583 /* replicate result */ 1584 for (i = 0; i < lasti + 1; i++) { 1585 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1586 continue; 1587 1588 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1589 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1590 1591 alu.src[0].sel = ctx->temp_reg; 1592 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1593 if (i == lasti) 1594 alu.last = 1; 1595 r = r600_bytecode_add_alu(ctx->bc, &alu); 1596 if (r) 1597 return r; 1598 } 1599 return 0; 1600} 1601 1602static int tgsi_scs(struct r600_shader_ctx *ctx) 1603{ 1604 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1605 struct r600_bytecode_alu alu; 1606 int i, r; 1607 1608 /* We'll only need the trig stuff if we are going to write to the 1609 * X or Y components of the destination vector. 1610 */ 1611 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1612 r = tgsi_setup_trig(ctx); 1613 if (r) 1614 return r; 1615 } 1616 1617 /* dst.x = COS */ 1618 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1619 if (ctx->bc->chip_class == CAYMAN) { 1620 for (i = 0 ; i < 3; i++) { 1621 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1622 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1623 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1624 1625 if (i == 0) 1626 alu.dst.write = 1; 1627 else 1628 alu.dst.write = 0; 1629 alu.src[0].sel = ctx->temp_reg; 1630 alu.src[0].chan = 0; 1631 if (i == 2) 1632 alu.last = 1; 1633 r = r600_bytecode_add_alu(ctx->bc, &alu); 1634 if (r) 1635 return r; 1636 } 1637 } else { 1638 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1639 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1640 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1641 1642 alu.src[0].sel = ctx->temp_reg; 1643 alu.src[0].chan = 0; 1644 alu.last = 1; 1645 r = r600_bytecode_add_alu(ctx->bc, &alu); 1646 if (r) 1647 return r; 1648 } 1649 } 1650 1651 /* dst.y = SIN */ 1652 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1653 if (ctx->bc->chip_class == CAYMAN) { 1654 for (i = 0 ; i < 3; i++) { 1655 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1656 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1657 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1658 if (i == 1) 1659 alu.dst.write = 1; 1660 else 1661 alu.dst.write = 0; 1662 alu.src[0].sel = ctx->temp_reg; 1663 alu.src[0].chan = 0; 1664 if (i == 2) 1665 alu.last = 1; 1666 r = r600_bytecode_add_alu(ctx->bc, &alu); 1667 if (r) 1668 return r; 1669 } 1670 } else { 1671 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1672 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1673 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1674 1675 alu.src[0].sel = ctx->temp_reg; 1676 alu.src[0].chan = 0; 1677 alu.last = 1; 1678 r = r600_bytecode_add_alu(ctx->bc, &alu); 1679 if (r) 1680 return r; 1681 } 1682 } 1683 1684 /* dst.z = 0.0; */ 1685 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1686 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1687 1688 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1689 1690 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1691 1692 alu.src[0].sel = V_SQ_ALU_SRC_0; 1693 alu.src[0].chan = 0; 1694 1695 alu.last = 1; 1696 1697 r = r600_bytecode_add_alu(ctx->bc, &alu); 1698 if (r) 1699 return r; 1700 } 1701 1702 /* dst.w = 1.0; */ 1703 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1704 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1705 1706 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1707 1708 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1709 1710 alu.src[0].sel = V_SQ_ALU_SRC_1; 1711 alu.src[0].chan = 0; 1712 1713 alu.last = 1; 1714 1715 r = r600_bytecode_add_alu(ctx->bc, &alu); 1716 if (r) 1717 return r; 1718 } 1719 1720 return 0; 1721} 1722 1723static int tgsi_kill(struct r600_shader_ctx *ctx) 1724{ 1725 struct r600_bytecode_alu alu; 1726 int i, r; 1727 1728 for (i = 0; i < 4; i++) { 1729 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1730 alu.inst = ctx->inst_info->r600_opcode; 1731 1732 alu.dst.chan = i; 1733 1734 alu.src[0].sel = V_SQ_ALU_SRC_0; 1735 1736 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1737 alu.src[1].sel = V_SQ_ALU_SRC_1; 1738 alu.src[1].neg = 1; 1739 } else { 1740 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1741 } 1742 if (i == 3) { 1743 alu.last = 1; 1744 } 1745 r = r600_bytecode_add_alu(ctx->bc, &alu); 1746 if (r) 1747 return r; 1748 } 1749 1750 /* kill must be last in ALU */ 1751 ctx->bc->force_add_cf = 1; 1752 ctx->shader->uses_kill = TRUE; 1753 return 0; 1754} 1755 1756static int tgsi_lit(struct r600_shader_ctx *ctx) 1757{ 1758 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1759 struct r600_bytecode_alu alu; 1760 int r; 1761 1762 /* tmp.x = max(src.y, 0.0) */ 1763 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1764 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1765 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 1766 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1767 alu.src[1].chan = 1; 1768 1769 alu.dst.sel = ctx->temp_reg; 1770 alu.dst.chan = 0; 1771 alu.dst.write = 1; 1772 1773 alu.last = 1; 1774 r = r600_bytecode_add_alu(ctx->bc, &alu); 1775 if (r) 1776 return r; 1777 1778 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1779 { 1780 int chan; 1781 int sel; 1782 int i; 1783 1784 if (ctx->bc->chip_class == CAYMAN) { 1785 for (i = 0; i < 3; i++) { 1786 /* tmp.z = log(tmp.x) */ 1787 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1788 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1789 alu.src[0].sel = ctx->temp_reg; 1790 alu.src[0].chan = 0; 1791 alu.dst.sel = ctx->temp_reg; 1792 alu.dst.chan = i; 1793 if (i == 2) { 1794 alu.dst.write = 1; 1795 alu.last = 1; 1796 } else 1797 alu.dst.write = 0; 1798 1799 r = r600_bytecode_add_alu(ctx->bc, &alu); 1800 if (r) 1801 return r; 1802 } 1803 } else { 1804 /* tmp.z = log(tmp.x) */ 1805 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1806 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1807 alu.src[0].sel = ctx->temp_reg; 1808 alu.src[0].chan = 0; 1809 alu.dst.sel = ctx->temp_reg; 1810 alu.dst.chan = 2; 1811 alu.dst.write = 1; 1812 alu.last = 1; 1813 r = r600_bytecode_add_alu(ctx->bc, &alu); 1814 if (r) 1815 return r; 1816 } 1817 1818 chan = alu.dst.chan; 1819 sel = alu.dst.sel; 1820 1821 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 1822 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1823 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1824 alu.src[0].sel = sel; 1825 alu.src[0].chan = chan; 1826 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 1827 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 1828 alu.dst.sel = ctx->temp_reg; 1829 alu.dst.chan = 0; 1830 alu.dst.write = 1; 1831 alu.is_op3 = 1; 1832 alu.last = 1; 1833 r = r600_bytecode_add_alu(ctx->bc, &alu); 1834 if (r) 1835 return r; 1836 1837 if (ctx->bc->chip_class == CAYMAN) { 1838 for (i = 0; i < 3; i++) { 1839 /* dst.z = exp(tmp.x) */ 1840 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1841 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1842 alu.src[0].sel = ctx->temp_reg; 1843 alu.src[0].chan = 0; 1844 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1845 if (i == 2) { 1846 alu.dst.write = 1; 1847 alu.last = 1; 1848 } else 1849 alu.dst.write = 0; 1850 r = r600_bytecode_add_alu(ctx->bc, &alu); 1851 if (r) 1852 return r; 1853 } 1854 } else { 1855 /* dst.z = exp(tmp.x) */ 1856 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1857 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1858 alu.src[0].sel = ctx->temp_reg; 1859 alu.src[0].chan = 0; 1860 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1861 alu.last = 1; 1862 r = r600_bytecode_add_alu(ctx->bc, &alu); 1863 if (r) 1864 return r; 1865 } 1866 } 1867 1868 /* dst.x, <- 1.0 */ 1869 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1870 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1871 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1872 alu.src[0].chan = 0; 1873 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1874 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1875 r = r600_bytecode_add_alu(ctx->bc, &alu); 1876 if (r) 1877 return r; 1878 1879 /* dst.y = max(src.x, 0.0) */ 1880 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1881 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1882 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1883 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1884 alu.src[1].chan = 0; 1885 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1886 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1887 r = r600_bytecode_add_alu(ctx->bc, &alu); 1888 if (r) 1889 return r; 1890 1891 /* dst.w, <- 1.0 */ 1892 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1893 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1894 alu.src[0].sel = V_SQ_ALU_SRC_1; 1895 alu.src[0].chan = 0; 1896 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1897 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1898 alu.last = 1; 1899 r = r600_bytecode_add_alu(ctx->bc, &alu); 1900 if (r) 1901 return r; 1902 1903 return 0; 1904} 1905 1906static int tgsi_rsq(struct r600_shader_ctx *ctx) 1907{ 1908 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1909 struct r600_bytecode_alu alu; 1910 int i, r; 1911 1912 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1913 1914 /* FIXME: 1915 * For state trackers other than OpenGL, we'll want to use 1916 * _RECIPSQRT_IEEE instead. 1917 */ 1918 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1919 1920 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1921 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 1922 r600_bytecode_src_set_abs(&alu.src[i]); 1923 } 1924 alu.dst.sel = ctx->temp_reg; 1925 alu.dst.write = 1; 1926 alu.last = 1; 1927 r = r600_bytecode_add_alu(ctx->bc, &alu); 1928 if (r) 1929 return r; 1930 /* replicate result */ 1931 return tgsi_helper_tempx_replicate(ctx); 1932} 1933 1934static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1935{ 1936 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1937 struct r600_bytecode_alu alu; 1938 int i, r; 1939 1940 for (i = 0; i < 4; i++) { 1941 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1942 alu.src[0].sel = ctx->temp_reg; 1943 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1944 alu.dst.chan = i; 1945 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1946 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1947 if (i == 3) 1948 alu.last = 1; 1949 r = r600_bytecode_add_alu(ctx->bc, &alu); 1950 if (r) 1951 return r; 1952 } 1953 return 0; 1954} 1955 1956static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1957{ 1958 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1959 struct r600_bytecode_alu alu; 1960 int i, r; 1961 1962 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1963 alu.inst = ctx->inst_info->r600_opcode; 1964 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1965 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 1966 } 1967 alu.dst.sel = ctx->temp_reg; 1968 alu.dst.write = 1; 1969 alu.last = 1; 1970 r = r600_bytecode_add_alu(ctx->bc, &alu); 1971 if (r) 1972 return r; 1973 /* replicate result */ 1974 return tgsi_helper_tempx_replicate(ctx); 1975} 1976 1977static int cayman_pow(struct r600_shader_ctx *ctx) 1978{ 1979 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1980 int i, r; 1981 struct r600_bytecode_alu alu; 1982 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1983 1984 for (i = 0; i < 3; i++) { 1985 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1986 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1987 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1988 alu.dst.sel = ctx->temp_reg; 1989 alu.dst.chan = i; 1990 alu.dst.write = 1; 1991 if (i == 2) 1992 alu.last = 1; 1993 r = r600_bytecode_add_alu(ctx->bc, &alu); 1994 if (r) 1995 return r; 1996 } 1997 1998 /* b * LOG2(a) */ 1999 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2000 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2001 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 2002 alu.src[1].sel = ctx->temp_reg; 2003 alu.dst.sel = ctx->temp_reg; 2004 alu.dst.write = 1; 2005 alu.last = 1; 2006 r = r600_bytecode_add_alu(ctx->bc, &alu); 2007 if (r) 2008 return r; 2009 2010 for (i = 0; i < last_slot; i++) { 2011 /* POW(a,b) = EXP2(b * LOG2(a))*/ 2012 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2013 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2014 alu.src[0].sel = ctx->temp_reg; 2015 2016 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2017 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2018 if (i == last_slot - 1) 2019 alu.last = 1; 2020 r = r600_bytecode_add_alu(ctx->bc, &alu); 2021 if (r) 2022 return r; 2023 } 2024 return 0; 2025} 2026 2027static int tgsi_pow(struct r600_shader_ctx *ctx) 2028{ 2029 struct r600_bytecode_alu alu; 2030 int r; 2031 2032 /* LOG2(a) */ 2033 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2034 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2035 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2036 alu.dst.sel = ctx->temp_reg; 2037 alu.dst.write = 1; 2038 alu.last = 1; 2039 r = r600_bytecode_add_alu(ctx->bc, &alu); 2040 if (r) 2041 return r; 2042 /* b * LOG2(a) */ 2043 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2044 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2045 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 2046 alu.src[1].sel = ctx->temp_reg; 2047 alu.dst.sel = ctx->temp_reg; 2048 alu.dst.write = 1; 2049 alu.last = 1; 2050 r = r600_bytecode_add_alu(ctx->bc, &alu); 2051 if (r) 2052 return r; 2053 /* POW(a,b) = EXP2(b * LOG2(a))*/ 2054 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2055 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2056 alu.src[0].sel = ctx->temp_reg; 2057 alu.dst.sel = ctx->temp_reg; 2058 alu.dst.write = 1; 2059 alu.last = 1; 2060 r = r600_bytecode_add_alu(ctx->bc, &alu); 2061 if (r) 2062 return r; 2063 return tgsi_helper_tempx_replicate(ctx); 2064} 2065 2066static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 2067{ 2068 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2069 struct r600_bytecode_alu alu; 2070 int i, r; 2071 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2072 int tmp0 = ctx->temp_reg; 2073 int tmp1 = r600_get_temp(ctx); 2074 int tmp2 = r600_get_temp(ctx); 2075 2076 /* Unsigned path: 2077 * 2078 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 2079 * 2080 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 2081 * 2. tmp0.z = lo (tmp0.x * src2) 2082 * 3. tmp0.w = -tmp0.z 2083 * 4. tmp0.y = hi (tmp0.x * src2) 2084 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 2085 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 2086 * 7. tmp1.x = tmp0.x - tmp0.w 2087 * 8. tmp1.y = tmp0.x + tmp0.w 2088 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 2089 * 10. tmp0.z = hi(tmp0.x * src1) = q 2090 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 2091 * 2092 * 12. tmp0.w = src1 - tmp0.y = r 2093 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 2094 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 2095 * 2096 * if DIV 2097 * 2098 * 15. tmp1.z = tmp0.z + 1 = q + 1 2099 * 16. tmp1.w = tmp0.z - 1 = q - 1 2100 * 2101 * else MOD 2102 * 2103 * 15. tmp1.z = tmp0.w - src2 = r - src2 2104 * 16. tmp1.w = tmp0.w + src2 = r + src2 2105 * 2106 * endif 2107 * 2108 * 17. tmp1.x = tmp1.x & tmp1.y 2109 * 2110 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 2111 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 2112 * 2113 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 2114 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 2115 * 2116 * Signed path: 2117 * 2118 * Same as unsigned, using abs values of the operands, 2119 * and fixing the sign of the result in the end. 2120 */ 2121 2122 for (i = 0; i < 4; i++) { 2123 if (!(write_mask & (1<<i))) 2124 continue; 2125 2126 if (signed_op) { 2127 2128 /* tmp2.x = -src0 */ 2129 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2130 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2131 2132 alu.dst.sel = tmp2; 2133 alu.dst.chan = 0; 2134 alu.dst.write = 1; 2135 2136 alu.src[0].sel = V_SQ_ALU_SRC_0; 2137 2138 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2139 2140 alu.last = 1; 2141 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2142 return r; 2143 2144 /* tmp2.y = -src1 */ 2145 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2146 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2147 2148 alu.dst.sel = tmp2; 2149 alu.dst.chan = 1; 2150 alu.dst.write = 1; 2151 2152 alu.src[0].sel = V_SQ_ALU_SRC_0; 2153 2154 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2155 2156 alu.last = 1; 2157 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2158 return r; 2159 2160 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 2161 /* it will be a sign of the quotient */ 2162 if (!mod) { 2163 2164 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2165 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT); 2166 2167 alu.dst.sel = tmp2; 2168 alu.dst.chan = 2; 2169 alu.dst.write = 1; 2170 2171 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2172 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2173 2174 alu.last = 1; 2175 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2176 return r; 2177 } 2178 2179 /* tmp2.x = |src0| */ 2180 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2181 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2182 alu.is_op3 = 1; 2183 2184 alu.dst.sel = tmp2; 2185 alu.dst.chan = 0; 2186 alu.dst.write = 1; 2187 2188 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2189 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2190 alu.src[2].sel = tmp2; 2191 alu.src[2].chan = 0; 2192 2193 alu.last = 1; 2194 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2195 return r; 2196 2197 /* tmp2.y = |src1| */ 2198 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2199 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2200 alu.is_op3 = 1; 2201 2202 alu.dst.sel = tmp2; 2203 alu.dst.chan = 1; 2204 alu.dst.write = 1; 2205 2206 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2207 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2208 alu.src[2].sel = tmp2; 2209 alu.src[2].chan = 1; 2210 2211 alu.last = 1; 2212 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2213 return r; 2214 2215 } 2216 2217 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 2218 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2219 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT); 2220 2221 alu.dst.sel = tmp0; 2222 alu.dst.chan = 0; 2223 alu.dst.write = 1; 2224 2225 if (signed_op) { 2226 alu.src[0].sel = tmp2; 2227 alu.src[0].chan = 1; 2228 } else { 2229 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2230 } 2231 2232 alu.last = 1; 2233 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2234 return r; 2235 2236 /* 2. tmp0.z = lo (tmp0.x * src2) */ 2237 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2238 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 2239 2240 alu.dst.sel = tmp0; 2241 alu.dst.chan = 2; 2242 alu.dst.write = 1; 2243 2244 alu.src[0].sel = tmp0; 2245 alu.src[0].chan = 0; 2246 if (signed_op) { 2247 alu.src[1].sel = tmp2; 2248 alu.src[1].chan = 1; 2249 } else { 2250 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2251 } 2252 2253 alu.last = 1; 2254 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2255 return r; 2256 2257 /* 3. tmp0.w = -tmp0.z */ 2258 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2259 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2260 2261 alu.dst.sel = tmp0; 2262 alu.dst.chan = 3; 2263 alu.dst.write = 1; 2264 2265 alu.src[0].sel = V_SQ_ALU_SRC_0; 2266 alu.src[1].sel = tmp0; 2267 alu.src[1].chan = 2; 2268 2269 alu.last = 1; 2270 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2271 return r; 2272 2273 /* 4. tmp0.y = hi (tmp0.x * src2) */ 2274 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2275 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2276 2277 alu.dst.sel = tmp0; 2278 alu.dst.chan = 1; 2279 alu.dst.write = 1; 2280 2281 alu.src[0].sel = tmp0; 2282 alu.src[0].chan = 0; 2283 2284 if (signed_op) { 2285 alu.src[1].sel = tmp2; 2286 alu.src[1].chan = 1; 2287 } else { 2288 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2289 } 2290 2291 alu.last = 1; 2292 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2293 return r; 2294 2295 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 2296 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2297 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 2298 alu.is_op3 = 1; 2299 2300 alu.dst.sel = tmp0; 2301 alu.dst.chan = 2; 2302 alu.dst.write = 1; 2303 2304 alu.src[0].sel = tmp0; 2305 alu.src[0].chan = 1; 2306 alu.src[1].sel = tmp0; 2307 alu.src[1].chan = 3; 2308 alu.src[2].sel = tmp0; 2309 alu.src[2].chan = 2; 2310 2311 alu.last = 1; 2312 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2313 return r; 2314 2315 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 2316 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2317 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2318 2319 alu.dst.sel = tmp0; 2320 alu.dst.chan = 3; 2321 alu.dst.write = 1; 2322 2323 alu.src[0].sel = tmp0; 2324 alu.src[0].chan = 2; 2325 2326 alu.src[1].sel = tmp0; 2327 alu.src[1].chan = 0; 2328 2329 alu.last = 1; 2330 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2331 return r; 2332 2333 /* 7. tmp1.x = tmp0.x - tmp0.w */ 2334 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2335 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2336 2337 alu.dst.sel = tmp1; 2338 alu.dst.chan = 0; 2339 alu.dst.write = 1; 2340 2341 alu.src[0].sel = tmp0; 2342 alu.src[0].chan = 0; 2343 alu.src[1].sel = tmp0; 2344 alu.src[1].chan = 3; 2345 2346 alu.last = 1; 2347 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2348 return r; 2349 2350 /* 8. tmp1.y = tmp0.x + tmp0.w */ 2351 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2352 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 2353 2354 alu.dst.sel = tmp1; 2355 alu.dst.chan = 1; 2356 alu.dst.write = 1; 2357 2358 alu.src[0].sel = tmp0; 2359 alu.src[0].chan = 0; 2360 alu.src[1].sel = tmp0; 2361 alu.src[1].chan = 3; 2362 2363 alu.last = 1; 2364 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2365 return r; 2366 2367 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 2368 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2369 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 2370 alu.is_op3 = 1; 2371 2372 alu.dst.sel = tmp0; 2373 alu.dst.chan = 0; 2374 alu.dst.write = 1; 2375 2376 alu.src[0].sel = tmp0; 2377 alu.src[0].chan = 1; 2378 alu.src[1].sel = tmp1; 2379 alu.src[1].chan = 1; 2380 alu.src[2].sel = tmp1; 2381 alu.src[2].chan = 0; 2382 2383 alu.last = 1; 2384 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2385 return r; 2386 2387 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 2388 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2389 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2390 2391 alu.dst.sel = tmp0; 2392 alu.dst.chan = 2; 2393 alu.dst.write = 1; 2394 2395 alu.src[0].sel = tmp0; 2396 alu.src[0].chan = 0; 2397 2398 if (signed_op) { 2399 alu.src[1].sel = tmp2; 2400 alu.src[1].chan = 0; 2401 } else { 2402 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2403 } 2404 2405 alu.last = 1; 2406 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2407 return r; 2408 2409 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 2410 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2411 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 2412 2413 alu.dst.sel = tmp0; 2414 alu.dst.chan = 1; 2415 alu.dst.write = 1; 2416 2417 if (signed_op) { 2418 alu.src[0].sel = tmp2; 2419 alu.src[0].chan = 1; 2420 } else { 2421 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2422 } 2423 2424 alu.src[1].sel = tmp0; 2425 alu.src[1].chan = 2; 2426 2427 alu.last = 1; 2428 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2429 return r; 2430 2431 /* 12. tmp0.w = src1 - tmp0.y = r */ 2432 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2433 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2434 2435 alu.dst.sel = tmp0; 2436 alu.dst.chan = 3; 2437 alu.dst.write = 1; 2438 2439 if (signed_op) { 2440 alu.src[0].sel = tmp2; 2441 alu.src[0].chan = 0; 2442 } else { 2443 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2444 } 2445 2446 alu.src[1].sel = tmp0; 2447 alu.src[1].chan = 1; 2448 2449 alu.last = 1; 2450 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2451 return r; 2452 2453 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 2454 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2455 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT); 2456 2457 alu.dst.sel = tmp1; 2458 alu.dst.chan = 0; 2459 alu.dst.write = 1; 2460 2461 alu.src[0].sel = tmp0; 2462 alu.src[0].chan = 3; 2463 if (signed_op) { 2464 alu.src[1].sel = tmp2; 2465 alu.src[1].chan = 1; 2466 } else { 2467 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2468 } 2469 2470 alu.last = 1; 2471 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2472 return r; 2473 2474 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 2475 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2476 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT); 2477 2478 alu.dst.sel = tmp1; 2479 alu.dst.chan = 1; 2480 alu.dst.write = 1; 2481 2482 if (signed_op) { 2483 alu.src[0].sel = tmp2; 2484 alu.src[0].chan = 0; 2485 } else { 2486 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2487 } 2488 2489 alu.src[1].sel = tmp0; 2490 alu.src[1].chan = 1; 2491 2492 alu.last = 1; 2493 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2494 return r; 2495 2496 if (mod) { /* UMOD */ 2497 2498 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 2499 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2500 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2501 2502 alu.dst.sel = tmp1; 2503 alu.dst.chan = 2; 2504 alu.dst.write = 1; 2505 2506 alu.src[0].sel = tmp0; 2507 alu.src[0].chan = 3; 2508 2509 if (signed_op) { 2510 alu.src[1].sel = tmp2; 2511 alu.src[1].chan = 1; 2512 } else { 2513 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2514 } 2515 2516 alu.last = 1; 2517 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2518 return r; 2519 2520 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 2521 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2522 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 2523 2524 alu.dst.sel = tmp1; 2525 alu.dst.chan = 3; 2526 alu.dst.write = 1; 2527 2528 alu.src[0].sel = tmp0; 2529 alu.src[0].chan = 3; 2530 if (signed_op) { 2531 alu.src[1].sel = tmp2; 2532 alu.src[1].chan = 1; 2533 } else { 2534 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2535 } 2536 2537 alu.last = 1; 2538 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2539 return r; 2540 2541 } else { /* UDIV */ 2542 2543 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 2544 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2545 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 2546 2547 alu.dst.sel = tmp1; 2548 alu.dst.chan = 2; 2549 alu.dst.write = 1; 2550 2551 alu.src[0].sel = tmp0; 2552 alu.src[0].chan = 2; 2553 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 2554 2555 alu.last = 1; 2556 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2557 return r; 2558 2559 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 2560 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2561 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 2562 2563 alu.dst.sel = tmp1; 2564 alu.dst.chan = 3; 2565 alu.dst.write = 1; 2566 2567 alu.src[0].sel = tmp0; 2568 alu.src[0].chan = 2; 2569 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 2570 2571 alu.last = 1; 2572 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2573 return r; 2574 2575 } 2576 2577 /* 17. tmp1.x = tmp1.x & tmp1.y */ 2578 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2579 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT); 2580 2581 alu.dst.sel = tmp1; 2582 alu.dst.chan = 0; 2583 alu.dst.write = 1; 2584 2585 alu.src[0].sel = tmp1; 2586 alu.src[0].chan = 0; 2587 alu.src[1].sel = tmp1; 2588 alu.src[1].chan = 1; 2589 2590 alu.last = 1; 2591 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2592 return r; 2593 2594 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 2595 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 2596 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2597 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 2598 alu.is_op3 = 1; 2599 2600 alu.dst.sel = tmp0; 2601 alu.dst.chan = 2; 2602 alu.dst.write = 1; 2603 2604 alu.src[0].sel = tmp1; 2605 alu.src[0].chan = 0; 2606 alu.src[1].sel = tmp0; 2607 alu.src[1].chan = mod ? 3 : 2; 2608 alu.src[2].sel = tmp1; 2609 alu.src[2].chan = 2; 2610 2611 alu.last = 1; 2612 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2613 return r; 2614 2615 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 2616 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2617 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 2618 alu.is_op3 = 1; 2619 2620 if (signed_op) { 2621 alu.dst.sel = tmp0; 2622 alu.dst.chan = 2; 2623 alu.dst.write = 1; 2624 } else { 2625 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2626 } 2627 2628 alu.src[0].sel = tmp1; 2629 alu.src[0].chan = 1; 2630 alu.src[1].sel = tmp1; 2631 alu.src[1].chan = 3; 2632 alu.src[2].sel = tmp0; 2633 alu.src[2].chan = 2; 2634 2635 alu.last = 1; 2636 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2637 return r; 2638 2639 if (signed_op) { 2640 2641 /* fix the sign of the result */ 2642 2643 if (mod) { 2644 2645 /* tmp0.x = -tmp0.z */ 2646 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2647 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2648 2649 alu.dst.sel = tmp0; 2650 alu.dst.chan = 0; 2651 alu.dst.write = 1; 2652 2653 alu.src[0].sel = V_SQ_ALU_SRC_0; 2654 alu.src[1].sel = tmp0; 2655 alu.src[1].chan = 2; 2656 2657 alu.last = 1; 2658 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2659 return r; 2660 2661 /* sign of the remainder is the same as the sign of src0 */ 2662 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 2663 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2664 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2665 alu.is_op3 = 1; 2666 2667 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2668 2669 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2670 alu.src[1].sel = tmp0; 2671 alu.src[1].chan = 2; 2672 alu.src[2].sel = tmp0; 2673 alu.src[2].chan = 0; 2674 2675 alu.last = 1; 2676 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2677 return r; 2678 2679 } else { 2680 2681 /* tmp0.x = -tmp0.z */ 2682 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2683 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2684 2685 alu.dst.sel = tmp0; 2686 alu.dst.chan = 0; 2687 alu.dst.write = 1; 2688 2689 alu.src[0].sel = V_SQ_ALU_SRC_0; 2690 alu.src[1].sel = tmp0; 2691 alu.src[1].chan = 2; 2692 2693 alu.last = 1; 2694 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2695 return r; 2696 2697 /* fix the quotient sign (same as the sign of src0*src1) */ 2698 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 2699 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2700 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2701 alu.is_op3 = 1; 2702 2703 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2704 2705 alu.src[0].sel = tmp2; 2706 alu.src[0].chan = 2; 2707 alu.src[1].sel = tmp0; 2708 alu.src[1].chan = 2; 2709 alu.src[2].sel = tmp0; 2710 alu.src[2].chan = 0; 2711 2712 alu.last = 1; 2713 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2714 return r; 2715 } 2716 } 2717 } 2718 return 0; 2719} 2720 2721static int tgsi_udiv(struct r600_shader_ctx *ctx) 2722{ 2723 return tgsi_divmod(ctx, 0, 0); 2724} 2725 2726static int tgsi_umod(struct r600_shader_ctx *ctx) 2727{ 2728 return tgsi_divmod(ctx, 1, 0); 2729} 2730 2731static int tgsi_idiv(struct r600_shader_ctx *ctx) 2732{ 2733 return tgsi_divmod(ctx, 0, 1); 2734} 2735 2736static int tgsi_imod(struct r600_shader_ctx *ctx) 2737{ 2738 return tgsi_divmod(ctx, 1, 1); 2739} 2740 2741 2742static int tgsi_f2i(struct r600_shader_ctx *ctx) 2743{ 2744 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2745 struct r600_bytecode_alu alu; 2746 int i, r; 2747 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2748 int last_inst = tgsi_last_instruction(write_mask); 2749 2750 for (i = 0; i < 4; i++) { 2751 if (!(write_mask & (1<<i))) 2752 continue; 2753 2754 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2755 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC); 2756 2757 alu.dst.sel = ctx->temp_reg; 2758 alu.dst.chan = i; 2759 alu.dst.write = 1; 2760 2761 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2762 if (i == last_inst) 2763 alu.last = 1; 2764 r = r600_bytecode_add_alu(ctx->bc, &alu); 2765 if (r) 2766 return r; 2767 } 2768 2769 for (i = 0; i < 4; i++) { 2770 if (!(write_mask & (1<<i))) 2771 continue; 2772 2773 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2774 alu.inst = ctx->inst_info->r600_opcode; 2775 2776 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2777 2778 alu.src[0].sel = ctx->temp_reg; 2779 alu.src[0].chan = i; 2780 2781 if (i == last_inst) 2782 alu.last = 1; 2783 r = r600_bytecode_add_alu(ctx->bc, &alu); 2784 if (r) 2785 return r; 2786 } 2787 2788 return 0; 2789} 2790 2791static int tgsi_iabs(struct r600_shader_ctx *ctx) 2792{ 2793 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2794 struct r600_bytecode_alu alu; 2795 int i, r; 2796 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2797 int last_inst = tgsi_last_instruction(write_mask); 2798 2799 /* tmp = -src */ 2800 for (i = 0; i < 4; i++) { 2801 if (!(write_mask & (1<<i))) 2802 continue; 2803 2804 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2805 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2806 2807 alu.dst.sel = ctx->temp_reg; 2808 alu.dst.chan = i; 2809 alu.dst.write = 1; 2810 2811 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2812 alu.src[0].sel = V_SQ_ALU_SRC_0; 2813 2814 if (i == last_inst) 2815 alu.last = 1; 2816 r = r600_bytecode_add_alu(ctx->bc, &alu); 2817 if (r) 2818 return r; 2819 } 2820 2821 /* dst = (src >= 0 ? src : tmp) */ 2822 for (i = 0; i < 4; i++) { 2823 if (!(write_mask & (1<<i))) 2824 continue; 2825 2826 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2827 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2828 alu.is_op3 = 1; 2829 alu.dst.write = 1; 2830 2831 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2832 2833 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2834 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2835 alu.src[2].sel = ctx->temp_reg; 2836 alu.src[2].chan = i; 2837 2838 if (i == last_inst) 2839 alu.last = 1; 2840 r = r600_bytecode_add_alu(ctx->bc, &alu); 2841 if (r) 2842 return r; 2843 } 2844 return 0; 2845} 2846 2847static int tgsi_issg(struct r600_shader_ctx *ctx) 2848{ 2849 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2850 struct r600_bytecode_alu alu; 2851 int i, r; 2852 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2853 int last_inst = tgsi_last_instruction(write_mask); 2854 2855 /* tmp = (src >= 0 ? src : -1) */ 2856 for (i = 0; i < 4; i++) { 2857 if (!(write_mask & (1<<i))) 2858 continue; 2859 2860 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2861 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2862 alu.is_op3 = 1; 2863 2864 alu.dst.sel = ctx->temp_reg; 2865 alu.dst.chan = i; 2866 alu.dst.write = 1; 2867 2868 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2869 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2870 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 2871 2872 if (i == last_inst) 2873 alu.last = 1; 2874 r = r600_bytecode_add_alu(ctx->bc, &alu); 2875 if (r) 2876 return r; 2877 } 2878 2879 /* dst = (tmp > 0 ? 1 : tmp) */ 2880 for (i = 0; i < 4; i++) { 2881 if (!(write_mask & (1<<i))) 2882 continue; 2883 2884 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2885 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT); 2886 alu.is_op3 = 1; 2887 alu.dst.write = 1; 2888 2889 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2890 2891 alu.src[0].sel = ctx->temp_reg; 2892 alu.src[0].chan = i; 2893 2894 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 2895 2896 alu.src[2].sel = ctx->temp_reg; 2897 alu.src[2].chan = i; 2898 2899 if (i == last_inst) 2900 alu.last = 1; 2901 r = r600_bytecode_add_alu(ctx->bc, &alu); 2902 if (r) 2903 return r; 2904 } 2905 return 0; 2906} 2907 2908 2909 2910static int tgsi_ssg(struct r600_shader_ctx *ctx) 2911{ 2912 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2913 struct r600_bytecode_alu alu; 2914 int i, r; 2915 2916 /* tmp = (src > 0 ? 1 : src) */ 2917 for (i = 0; i < 4; i++) { 2918 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2919 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 2920 alu.is_op3 = 1; 2921 2922 alu.dst.sel = ctx->temp_reg; 2923 alu.dst.chan = i; 2924 2925 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2926 alu.src[1].sel = V_SQ_ALU_SRC_1; 2927 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 2928 2929 if (i == 3) 2930 alu.last = 1; 2931 r = r600_bytecode_add_alu(ctx->bc, &alu); 2932 if (r) 2933 return r; 2934 } 2935 2936 /* dst = (-tmp > 0 ? -1 : tmp) */ 2937 for (i = 0; i < 4; i++) { 2938 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2939 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 2940 alu.is_op3 = 1; 2941 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2942 2943 alu.src[0].sel = ctx->temp_reg; 2944 alu.src[0].chan = i; 2945 alu.src[0].neg = 1; 2946 2947 alu.src[1].sel = V_SQ_ALU_SRC_1; 2948 alu.src[1].neg = 1; 2949 2950 alu.src[2].sel = ctx->temp_reg; 2951 alu.src[2].chan = i; 2952 2953 if (i == 3) 2954 alu.last = 1; 2955 r = r600_bytecode_add_alu(ctx->bc, &alu); 2956 if (r) 2957 return r; 2958 } 2959 return 0; 2960} 2961 2962static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 2963{ 2964 struct r600_bytecode_alu alu; 2965 int i, r; 2966 2967 for (i = 0; i < 4; i++) { 2968 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2969 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 2970 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 2971 alu.dst.chan = i; 2972 } else { 2973 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2974 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2975 alu.src[0].sel = ctx->temp_reg; 2976 alu.src[0].chan = i; 2977 } 2978 if (i == 3) { 2979 alu.last = 1; 2980 } 2981 r = r600_bytecode_add_alu(ctx->bc, &alu); 2982 if (r) 2983 return r; 2984 } 2985 return 0; 2986} 2987 2988static int tgsi_op3(struct r600_shader_ctx *ctx) 2989{ 2990 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2991 struct r600_bytecode_alu alu; 2992 int i, j, r; 2993 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2994 2995 for (i = 0; i < lasti + 1; i++) { 2996 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2997 continue; 2998 2999 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3000 alu.inst = ctx->inst_info->r600_opcode; 3001 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3002 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3003 } 3004 3005 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3006 alu.dst.chan = i; 3007 alu.dst.write = 1; 3008 alu.is_op3 = 1; 3009 if (i == lasti) { 3010 alu.last = 1; 3011 } 3012 r = r600_bytecode_add_alu(ctx->bc, &alu); 3013 if (r) 3014 return r; 3015 } 3016 return 0; 3017} 3018 3019static int tgsi_dp(struct r600_shader_ctx *ctx) 3020{ 3021 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3022 struct r600_bytecode_alu alu; 3023 int i, j, r; 3024 3025 for (i = 0; i < 4; i++) { 3026 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3027 alu.inst = ctx->inst_info->r600_opcode; 3028 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3029 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3030 } 3031 3032 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3033 alu.dst.chan = i; 3034 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 3035 /* handle some special cases */ 3036 switch (ctx->inst_info->tgsi_opcode) { 3037 case TGSI_OPCODE_DP2: 3038 if (i > 1) { 3039 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 3040 alu.src[0].chan = alu.src[1].chan = 0; 3041 } 3042 break; 3043 case TGSI_OPCODE_DP3: 3044 if (i > 2) { 3045 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 3046 alu.src[0].chan = alu.src[1].chan = 0; 3047 } 3048 break; 3049 case TGSI_OPCODE_DPH: 3050 if (i == 3) { 3051 alu.src[0].sel = V_SQ_ALU_SRC_1; 3052 alu.src[0].chan = 0; 3053 alu.src[0].neg = 0; 3054 } 3055 break; 3056 default: 3057 break; 3058 } 3059 if (i == 3) { 3060 alu.last = 1; 3061 } 3062 r = r600_bytecode_add_alu(ctx->bc, &alu); 3063 if (r) 3064 return r; 3065 } 3066 return 0; 3067} 3068 3069static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 3070 unsigned index) 3071{ 3072 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3073 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 3074 inst->Src[index].Register.File != TGSI_FILE_INPUT) || 3075 ctx->src[index].neg || ctx->src[index].abs; 3076} 3077 3078static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 3079 unsigned index) 3080{ 3081 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3082 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 3083} 3084 3085static int tgsi_tex(struct r600_shader_ctx *ctx) 3086{ 3087 static float one_point_five = 1.5f; 3088 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3089 struct r600_bytecode_tex tex; 3090 struct r600_bytecode_alu alu; 3091 unsigned src_gpr; 3092 int r, i, j; 3093 int opcode; 3094 /* Texture fetch instructions can only use gprs as source. 3095 * Also they cannot negate the source or take the absolute value */ 3096 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0); 3097 boolean src_loaded = FALSE; 3098 unsigned sampler_src_reg = 1; 3099 u8 offset_x = 0, offset_y = 0, offset_z = 0; 3100 3101 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 3102 3103 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 3104 /* get offset values */ 3105 if (inst->Texture.NumOffsets) { 3106 assert(inst->Texture.NumOffsets == 1); 3107 3108 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 3109 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 3110 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 3111 } 3112 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 3113 /* TGSI moves the sampler to src reg 3 for TXD */ 3114 sampler_src_reg = 3; 3115 3116 for (i = 1; i < 3; i++) { 3117 /* set gradients h/v */ 3118 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 3119 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : 3120 SQ_TEX_INST_SET_GRADIENTS_V; 3121 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 3122 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 3123 3124 if (tgsi_tex_src_requires_loading(ctx, i)) { 3125 tex.src_gpr = r600_get_temp(ctx); 3126 tex.src_sel_x = 0; 3127 tex.src_sel_y = 1; 3128 tex.src_sel_z = 2; 3129 tex.src_sel_w = 3; 3130 3131 for (j = 0; j < 4; j++) { 3132 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3133 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3134 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 3135 alu.dst.sel = tex.src_gpr; 3136 alu.dst.chan = j; 3137 if (j == 3) 3138 alu.last = 1; 3139 alu.dst.write = 1; 3140 r = r600_bytecode_add_alu(ctx->bc, &alu); 3141 if (r) 3142 return r; 3143 } 3144 3145 } else { 3146 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); 3147 tex.src_sel_x = ctx->src[i].swizzle[0]; 3148 tex.src_sel_y = ctx->src[i].swizzle[1]; 3149 tex.src_sel_z = ctx->src[i].swizzle[2]; 3150 tex.src_sel_w = ctx->src[i].swizzle[3]; 3151 tex.src_rel = ctx->src[i].rel; 3152 } 3153 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ 3154 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 3155 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 3156 tex.coord_type_x = 1; 3157 tex.coord_type_y = 1; 3158 tex.coord_type_z = 1; 3159 tex.coord_type_w = 1; 3160 } 3161 r = r600_bytecode_add_tex(ctx->bc, &tex); 3162 if (r) 3163 return r; 3164 } 3165 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 3166 int out_chan; 3167 /* Add perspective divide */ 3168 if (ctx->bc->chip_class == CAYMAN) { 3169 out_chan = 2; 3170 for (i = 0; i < 3; i++) { 3171 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3172 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3173 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 3174 3175 alu.dst.sel = ctx->temp_reg; 3176 alu.dst.chan = i; 3177 if (i == 2) 3178 alu.last = 1; 3179 if (out_chan == i) 3180 alu.dst.write = 1; 3181 r = r600_bytecode_add_alu(ctx->bc, &alu); 3182 if (r) 3183 return r; 3184 } 3185 3186 } else { 3187 out_chan = 3; 3188 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3189 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3190 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 3191 3192 alu.dst.sel = ctx->temp_reg; 3193 alu.dst.chan = out_chan; 3194 alu.last = 1; 3195 alu.dst.write = 1; 3196 r = r600_bytecode_add_alu(ctx->bc, &alu); 3197 if (r) 3198 return r; 3199 } 3200 3201 for (i = 0; i < 3; i++) { 3202 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3203 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3204 alu.src[0].sel = ctx->temp_reg; 3205 alu.src[0].chan = out_chan; 3206 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3207 alu.dst.sel = ctx->temp_reg; 3208 alu.dst.chan = i; 3209 alu.dst.write = 1; 3210 r = r600_bytecode_add_alu(ctx->bc, &alu); 3211 if (r) 3212 return r; 3213 } 3214 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3215 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3216 alu.src[0].sel = V_SQ_ALU_SRC_1; 3217 alu.src[0].chan = 0; 3218 alu.dst.sel = ctx->temp_reg; 3219 alu.dst.chan = 3; 3220 alu.last = 1; 3221 alu.dst.write = 1; 3222 r = r600_bytecode_add_alu(ctx->bc, &alu); 3223 if (r) 3224 return r; 3225 src_loaded = TRUE; 3226 src_gpr = ctx->temp_reg; 3227 } 3228 3229 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE && 3230 inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { 3231 3232 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 3233 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 3234 3235 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 3236 for (i = 0; i < 4; i++) { 3237 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3238 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 3239 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 3240 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 3241 alu.dst.sel = ctx->temp_reg; 3242 alu.dst.chan = i; 3243 if (i == 3) 3244 alu.last = 1; 3245 alu.dst.write = 1; 3246 r = r600_bytecode_add_alu(ctx->bc, &alu); 3247 if (r) 3248 return r; 3249 } 3250 3251 /* tmp1.z = RCP_e(|tmp1.z|) */ 3252 if (ctx->bc->chip_class == CAYMAN) { 3253 for (i = 0; i < 3; i++) { 3254 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3255 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3256 alu.src[0].sel = ctx->temp_reg; 3257 alu.src[0].chan = 2; 3258 alu.src[0].abs = 1; 3259 alu.dst.sel = ctx->temp_reg; 3260 alu.dst.chan = i; 3261 if (i == 2) 3262 alu.dst.write = 1; 3263 if (i == 2) 3264 alu.last = 1; 3265 r = r600_bytecode_add_alu(ctx->bc, &alu); 3266 if (r) 3267 return r; 3268 } 3269 } else { 3270 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3271 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3272 alu.src[0].sel = ctx->temp_reg; 3273 alu.src[0].chan = 2; 3274 alu.src[0].abs = 1; 3275 alu.dst.sel = ctx->temp_reg; 3276 alu.dst.chan = 2; 3277 alu.dst.write = 1; 3278 alu.last = 1; 3279 r = r600_bytecode_add_alu(ctx->bc, &alu); 3280 if (r) 3281 return r; 3282 } 3283 3284 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 3285 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 3286 * muladd has no writemask, have to use another temp 3287 */ 3288 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3289 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 3290 alu.is_op3 = 1; 3291 3292 alu.src[0].sel = ctx->temp_reg; 3293 alu.src[0].chan = 0; 3294 alu.src[1].sel = ctx->temp_reg; 3295 alu.src[1].chan = 2; 3296 3297 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 3298 alu.src[2].chan = 0; 3299 alu.src[2].value = *(uint32_t *)&one_point_five; 3300 3301 alu.dst.sel = ctx->temp_reg; 3302 alu.dst.chan = 0; 3303 alu.dst.write = 1; 3304 3305 r = r600_bytecode_add_alu(ctx->bc, &alu); 3306 if (r) 3307 return r; 3308 3309 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3310 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 3311 alu.is_op3 = 1; 3312 3313 alu.src[0].sel = ctx->temp_reg; 3314 alu.src[0].chan = 1; 3315 alu.src[1].sel = ctx->temp_reg; 3316 alu.src[1].chan = 2; 3317 3318 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 3319 alu.src[2].chan = 0; 3320 alu.src[2].value = *(uint32_t *)&one_point_five; 3321 3322 alu.dst.sel = ctx->temp_reg; 3323 alu.dst.chan = 1; 3324 alu.dst.write = 1; 3325 3326 alu.last = 1; 3327 r = r600_bytecode_add_alu(ctx->bc, &alu); 3328 if (r) 3329 return r; 3330 3331 src_loaded = TRUE; 3332 src_gpr = ctx->temp_reg; 3333 } 3334 3335 if (src_requires_loading && !src_loaded) { 3336 for (i = 0; i < 4; i++) { 3337 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3338 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3339 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3340 alu.dst.sel = ctx->temp_reg; 3341 alu.dst.chan = i; 3342 if (i == 3) 3343 alu.last = 1; 3344 alu.dst.write = 1; 3345 r = r600_bytecode_add_alu(ctx->bc, &alu); 3346 if (r) 3347 return r; 3348 } 3349 src_loaded = TRUE; 3350 src_gpr = ctx->temp_reg; 3351 } 3352 3353 opcode = ctx->inst_info->r600_opcode; 3354 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 3355 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 3356 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 3357 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 3358 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) { 3359 switch (opcode) { 3360 case SQ_TEX_INST_SAMPLE: 3361 opcode = SQ_TEX_INST_SAMPLE_C; 3362 break; 3363 case SQ_TEX_INST_SAMPLE_L: 3364 opcode = SQ_TEX_INST_SAMPLE_C_L; 3365 break; 3366 case SQ_TEX_INST_SAMPLE_LB: 3367 opcode = SQ_TEX_INST_SAMPLE_C_LB; 3368 break; 3369 case SQ_TEX_INST_SAMPLE_G: 3370 opcode = SQ_TEX_INST_SAMPLE_C_G; 3371 break; 3372 } 3373 } 3374 3375 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 3376 tex.inst = opcode; 3377 3378 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 3379 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 3380 tex.src_gpr = src_gpr; 3381 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 3382 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 3383 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 3384 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 3385 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 3386 if (src_loaded) { 3387 tex.src_sel_x = 0; 3388 tex.src_sel_y = 1; 3389 tex.src_sel_z = 2; 3390 tex.src_sel_w = 3; 3391 } else { 3392 tex.src_sel_x = ctx->src[0].swizzle[0]; 3393 tex.src_sel_y = ctx->src[0].swizzle[1]; 3394 tex.src_sel_z = ctx->src[0].swizzle[2]; 3395 tex.src_sel_w = ctx->src[0].swizzle[3]; 3396 tex.src_rel = ctx->src[0].rel; 3397 } 3398 3399 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 3400 tex.src_sel_x = 1; 3401 tex.src_sel_y = 0; 3402 tex.src_sel_z = 3; 3403 tex.src_sel_w = 1; 3404 } 3405 3406 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 3407 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 3408 tex.coord_type_x = 1; 3409 tex.coord_type_y = 1; 3410 } 3411 tex.coord_type_z = 1; 3412 tex.coord_type_w = 1; 3413 3414 tex.offset_x = offset_x; 3415 tex.offset_y = offset_y; 3416 tex.offset_z = offset_z; 3417 3418 /* Put the depth for comparison in W. 3419 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 3420 * Some instructions expect the depth in Z. */ 3421 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 3422 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 3423 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 3424 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 3425 opcode != SQ_TEX_INST_SAMPLE_C_L && 3426 opcode != SQ_TEX_INST_SAMPLE_C_LB) { 3427 tex.src_sel_w = tex.src_sel_z; 3428 } 3429 3430 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 3431 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 3432 if (opcode == SQ_TEX_INST_SAMPLE_C_L || 3433 opcode == SQ_TEX_INST_SAMPLE_C_LB) { 3434 /* the array index is read from Y */ 3435 tex.coord_type_y = 0; 3436 } else { 3437 /* the array index is read from Z */ 3438 tex.coord_type_z = 0; 3439 tex.src_sel_z = tex.src_sel_y; 3440 } 3441 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 3442 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) 3443 /* the array index is read from Z */ 3444 tex.coord_type_z = 0; 3445 3446 r = r600_bytecode_add_tex(ctx->bc, &tex); 3447 if (r) 3448 return r; 3449 3450 /* add shadow ambient support - gallium doesn't do it yet */ 3451 return 0; 3452} 3453 3454static int tgsi_lrp(struct r600_shader_ctx *ctx) 3455{ 3456 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3457 struct r600_bytecode_alu alu; 3458 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3459 unsigned i; 3460 int r; 3461 3462 /* optimize if it's just an equal balance */ 3463 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 3464 for (i = 0; i < lasti + 1; i++) { 3465 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3466 continue; 3467 3468 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3469 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 3470 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3471 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 3472 alu.omod = 3; 3473 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3474 alu.dst.chan = i; 3475 if (i == lasti) { 3476 alu.last = 1; 3477 } 3478 r = r600_bytecode_add_alu(ctx->bc, &alu); 3479 if (r) 3480 return r; 3481 } 3482 return 0; 3483 } 3484 3485 /* 1 - src0 */ 3486 for (i = 0; i < lasti + 1; i++) { 3487 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3488 continue; 3489 3490 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3491 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 3492 alu.src[0].sel = V_SQ_ALU_SRC_1; 3493 alu.src[0].chan = 0; 3494 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3495 r600_bytecode_src_toggle_neg(&alu.src[1]); 3496 alu.dst.sel = ctx->temp_reg; 3497 alu.dst.chan = i; 3498 if (i == lasti) { 3499 alu.last = 1; 3500 } 3501 alu.dst.write = 1; 3502 r = r600_bytecode_add_alu(ctx->bc, &alu); 3503 if (r) 3504 return r; 3505 } 3506 3507 /* (1 - src0) * src2 */ 3508 for (i = 0; i < lasti + 1; i++) { 3509 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3510 continue; 3511 3512 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3513 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3514 alu.src[0].sel = ctx->temp_reg; 3515 alu.src[0].chan = i; 3516 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 3517 alu.dst.sel = ctx->temp_reg; 3518 alu.dst.chan = i; 3519 if (i == lasti) { 3520 alu.last = 1; 3521 } 3522 alu.dst.write = 1; 3523 r = r600_bytecode_add_alu(ctx->bc, &alu); 3524 if (r) 3525 return r; 3526 } 3527 3528 /* src0 * src1 + (1 - src0) * src2 */ 3529 for (i = 0; i < lasti + 1; i++) { 3530 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3531 continue; 3532 3533 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3534 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 3535 alu.is_op3 = 1; 3536 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3537 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3538 alu.src[2].sel = ctx->temp_reg; 3539 alu.src[2].chan = i; 3540 3541 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3542 alu.dst.chan = i; 3543 if (i == lasti) { 3544 alu.last = 1; 3545 } 3546 r = r600_bytecode_add_alu(ctx->bc, &alu); 3547 if (r) 3548 return r; 3549 } 3550 return 0; 3551} 3552 3553static int tgsi_cmp(struct r600_shader_ctx *ctx) 3554{ 3555 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3556 struct r600_bytecode_alu alu; 3557 int i, r; 3558 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3559 3560 for (i = 0; i < lasti + 1; i++) { 3561 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3562 continue; 3563 3564 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3565 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 3566 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3567 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 3568 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 3569 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3570 alu.dst.chan = i; 3571 alu.dst.write = 1; 3572 alu.is_op3 = 1; 3573 if (i == lasti) 3574 alu.last = 1; 3575 r = r600_bytecode_add_alu(ctx->bc, &alu); 3576 if (r) 3577 return r; 3578 } 3579 return 0; 3580} 3581 3582static int tgsi_xpd(struct r600_shader_ctx *ctx) 3583{ 3584 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3585 static const unsigned int src0_swizzle[] = {2, 0, 1}; 3586 static const unsigned int src1_swizzle[] = {1, 2, 0}; 3587 struct r600_bytecode_alu alu; 3588 uint32_t use_temp = 0; 3589 int i, r; 3590 3591 if (inst->Dst[0].Register.WriteMask != 0xf) 3592 use_temp = 1; 3593 3594 for (i = 0; i < 4; i++) { 3595 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3596 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3597 if (i < 3) { 3598 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 3599 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 3600 } else { 3601 alu.src[0].sel = V_SQ_ALU_SRC_0; 3602 alu.src[0].chan = i; 3603 alu.src[1].sel = V_SQ_ALU_SRC_0; 3604 alu.src[1].chan = i; 3605 } 3606 3607 alu.dst.sel = ctx->temp_reg; 3608 alu.dst.chan = i; 3609 alu.dst.write = 1; 3610 3611 if (i == 3) 3612 alu.last = 1; 3613 r = r600_bytecode_add_alu(ctx->bc, &alu); 3614 if (r) 3615 return r; 3616 } 3617 3618 for (i = 0; i < 4; i++) { 3619 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3620 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 3621 3622 if (i < 3) { 3623 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 3624 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 3625 } else { 3626 alu.src[0].sel = V_SQ_ALU_SRC_0; 3627 alu.src[0].chan = i; 3628 alu.src[1].sel = V_SQ_ALU_SRC_0; 3629 alu.src[1].chan = i; 3630 } 3631 3632 alu.src[2].sel = ctx->temp_reg; 3633 alu.src[2].neg = 1; 3634 alu.src[2].chan = i; 3635 3636 if (use_temp) 3637 alu.dst.sel = ctx->temp_reg; 3638 else 3639 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3640 alu.dst.chan = i; 3641 alu.dst.write = 1; 3642 alu.is_op3 = 1; 3643 if (i == 3) 3644 alu.last = 1; 3645 r = r600_bytecode_add_alu(ctx->bc, &alu); 3646 if (r) 3647 return r; 3648 } 3649 if (use_temp) 3650 return tgsi_helper_copy(ctx, inst); 3651 return 0; 3652} 3653 3654static int tgsi_exp(struct r600_shader_ctx *ctx) 3655{ 3656 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3657 struct r600_bytecode_alu alu; 3658 int r; 3659 int i; 3660 3661 /* result.x = 2^floor(src); */ 3662 if (inst->Dst[0].Register.WriteMask & 1) { 3663 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3664 3665 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 3666 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3667 3668 alu.dst.sel = ctx->temp_reg; 3669 alu.dst.chan = 0; 3670 alu.dst.write = 1; 3671 alu.last = 1; 3672 r = r600_bytecode_add_alu(ctx->bc, &alu); 3673 if (r) 3674 return r; 3675 3676 if (ctx->bc->chip_class == CAYMAN) { 3677 for (i = 0; i < 3; i++) { 3678 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3679 alu.src[0].sel = ctx->temp_reg; 3680 alu.src[0].chan = 0; 3681 3682 alu.dst.sel = ctx->temp_reg; 3683 alu.dst.chan = i; 3684 if (i == 0) 3685 alu.dst.write = 1; 3686 if (i == 2) 3687 alu.last = 1; 3688 r = r600_bytecode_add_alu(ctx->bc, &alu); 3689 if (r) 3690 return r; 3691 } 3692 } else { 3693 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3694 alu.src[0].sel = ctx->temp_reg; 3695 alu.src[0].chan = 0; 3696 3697 alu.dst.sel = ctx->temp_reg; 3698 alu.dst.chan = 0; 3699 alu.dst.write = 1; 3700 alu.last = 1; 3701 r = r600_bytecode_add_alu(ctx->bc, &alu); 3702 if (r) 3703 return r; 3704 } 3705 } 3706 3707 /* result.y = tmp - floor(tmp); */ 3708 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 3709 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3710 3711 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 3712 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3713 3714 alu.dst.sel = ctx->temp_reg; 3715#if 0 3716 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3717 if (r) 3718 return r; 3719#endif 3720 alu.dst.write = 1; 3721 alu.dst.chan = 1; 3722 3723 alu.last = 1; 3724 3725 r = r600_bytecode_add_alu(ctx->bc, &alu); 3726 if (r) 3727 return r; 3728 } 3729 3730 /* result.z = RoughApprox2ToX(tmp);*/ 3731 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 3732 if (ctx->bc->chip_class == CAYMAN) { 3733 for (i = 0; i < 3; i++) { 3734 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3735 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3736 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3737 3738 alu.dst.sel = ctx->temp_reg; 3739 alu.dst.chan = i; 3740 if (i == 2) { 3741 alu.dst.write = 1; 3742 alu.last = 1; 3743 } 3744 3745 r = r600_bytecode_add_alu(ctx->bc, &alu); 3746 if (r) 3747 return r; 3748 } 3749 } else { 3750 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3751 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3752 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3753 3754 alu.dst.sel = ctx->temp_reg; 3755 alu.dst.write = 1; 3756 alu.dst.chan = 2; 3757 3758 alu.last = 1; 3759 3760 r = r600_bytecode_add_alu(ctx->bc, &alu); 3761 if (r) 3762 return r; 3763 } 3764 } 3765 3766 /* result.w = 1.0;*/ 3767 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 3768 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3769 3770 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3771 alu.src[0].sel = V_SQ_ALU_SRC_1; 3772 alu.src[0].chan = 0; 3773 3774 alu.dst.sel = ctx->temp_reg; 3775 alu.dst.chan = 3; 3776 alu.dst.write = 1; 3777 alu.last = 1; 3778 r = r600_bytecode_add_alu(ctx->bc, &alu); 3779 if (r) 3780 return r; 3781 } 3782 return tgsi_helper_copy(ctx, inst); 3783} 3784 3785static int tgsi_log(struct r600_shader_ctx *ctx) 3786{ 3787 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3788 struct r600_bytecode_alu alu; 3789 int r; 3790 int i; 3791 3792 /* result.x = floor(log2(|src|)); */ 3793 if (inst->Dst[0].Register.WriteMask & 1) { 3794 if (ctx->bc->chip_class == CAYMAN) { 3795 for (i = 0; i < 3; i++) { 3796 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3797 3798 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3799 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3800 r600_bytecode_src_set_abs(&alu.src[0]); 3801 3802 alu.dst.sel = ctx->temp_reg; 3803 alu.dst.chan = i; 3804 if (i == 0) 3805 alu.dst.write = 1; 3806 if (i == 2) 3807 alu.last = 1; 3808 r = r600_bytecode_add_alu(ctx->bc, &alu); 3809 if (r) 3810 return r; 3811 } 3812 3813 } else { 3814 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3815 3816 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3817 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3818 r600_bytecode_src_set_abs(&alu.src[0]); 3819 3820 alu.dst.sel = ctx->temp_reg; 3821 alu.dst.chan = 0; 3822 alu.dst.write = 1; 3823 alu.last = 1; 3824 r = r600_bytecode_add_alu(ctx->bc, &alu); 3825 if (r) 3826 return r; 3827 } 3828 3829 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 3830 alu.src[0].sel = ctx->temp_reg; 3831 alu.src[0].chan = 0; 3832 3833 alu.dst.sel = ctx->temp_reg; 3834 alu.dst.chan = 0; 3835 alu.dst.write = 1; 3836 alu.last = 1; 3837 3838 r = r600_bytecode_add_alu(ctx->bc, &alu); 3839 if (r) 3840 return r; 3841 } 3842 3843 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 3844 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 3845 3846 if (ctx->bc->chip_class == CAYMAN) { 3847 for (i = 0; i < 3; i++) { 3848 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3849 3850 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3851 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3852 r600_bytecode_src_set_abs(&alu.src[0]); 3853 3854 alu.dst.sel = ctx->temp_reg; 3855 alu.dst.chan = i; 3856 if (i == 1) 3857 alu.dst.write = 1; 3858 if (i == 2) 3859 alu.last = 1; 3860 3861 r = r600_bytecode_add_alu(ctx->bc, &alu); 3862 if (r) 3863 return r; 3864 } 3865 } else { 3866 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3867 3868 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3869 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3870 r600_bytecode_src_set_abs(&alu.src[0]); 3871 3872 alu.dst.sel = ctx->temp_reg; 3873 alu.dst.chan = 1; 3874 alu.dst.write = 1; 3875 alu.last = 1; 3876 3877 r = r600_bytecode_add_alu(ctx->bc, &alu); 3878 if (r) 3879 return r; 3880 } 3881 3882 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3883 3884 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 3885 alu.src[0].sel = ctx->temp_reg; 3886 alu.src[0].chan = 1; 3887 3888 alu.dst.sel = ctx->temp_reg; 3889 alu.dst.chan = 1; 3890 alu.dst.write = 1; 3891 alu.last = 1; 3892 3893 r = r600_bytecode_add_alu(ctx->bc, &alu); 3894 if (r) 3895 return r; 3896 3897 if (ctx->bc->chip_class == CAYMAN) { 3898 for (i = 0; i < 3; i++) { 3899 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3900 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3901 alu.src[0].sel = ctx->temp_reg; 3902 alu.src[0].chan = 1; 3903 3904 alu.dst.sel = ctx->temp_reg; 3905 alu.dst.chan = i; 3906 if (i == 1) 3907 alu.dst.write = 1; 3908 if (i == 2) 3909 alu.last = 1; 3910 3911 r = r600_bytecode_add_alu(ctx->bc, &alu); 3912 if (r) 3913 return r; 3914 } 3915 } else { 3916 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3917 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3918 alu.src[0].sel = ctx->temp_reg; 3919 alu.src[0].chan = 1; 3920 3921 alu.dst.sel = ctx->temp_reg; 3922 alu.dst.chan = 1; 3923 alu.dst.write = 1; 3924 alu.last = 1; 3925 3926 r = r600_bytecode_add_alu(ctx->bc, &alu); 3927 if (r) 3928 return r; 3929 } 3930 3931 if (ctx->bc->chip_class == CAYMAN) { 3932 for (i = 0; i < 3; i++) { 3933 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3934 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3935 alu.src[0].sel = ctx->temp_reg; 3936 alu.src[0].chan = 1; 3937 3938 alu.dst.sel = ctx->temp_reg; 3939 alu.dst.chan = i; 3940 if (i == 1) 3941 alu.dst.write = 1; 3942 if (i == 2) 3943 alu.last = 1; 3944 3945 r = r600_bytecode_add_alu(ctx->bc, &alu); 3946 if (r) 3947 return r; 3948 } 3949 } else { 3950 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3951 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3952 alu.src[0].sel = ctx->temp_reg; 3953 alu.src[0].chan = 1; 3954 3955 alu.dst.sel = ctx->temp_reg; 3956 alu.dst.chan = 1; 3957 alu.dst.write = 1; 3958 alu.last = 1; 3959 3960 r = r600_bytecode_add_alu(ctx->bc, &alu); 3961 if (r) 3962 return r; 3963 } 3964 3965 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3966 3967 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3968 3969 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3970 r600_bytecode_src_set_abs(&alu.src[0]); 3971 3972 alu.src[1].sel = ctx->temp_reg; 3973 alu.src[1].chan = 1; 3974 3975 alu.dst.sel = ctx->temp_reg; 3976 alu.dst.chan = 1; 3977 alu.dst.write = 1; 3978 alu.last = 1; 3979 3980 r = r600_bytecode_add_alu(ctx->bc, &alu); 3981 if (r) 3982 return r; 3983 } 3984 3985 /* result.z = log2(|src|);*/ 3986 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 3987 if (ctx->bc->chip_class == CAYMAN) { 3988 for (i = 0; i < 3; i++) { 3989 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3990 3991 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3992 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3993 r600_bytecode_src_set_abs(&alu.src[0]); 3994 3995 alu.dst.sel = ctx->temp_reg; 3996 if (i == 2) 3997 alu.dst.write = 1; 3998 alu.dst.chan = i; 3999 if (i == 2) 4000 alu.last = 1; 4001 4002 r = r600_bytecode_add_alu(ctx->bc, &alu); 4003 if (r) 4004 return r; 4005 } 4006 } else { 4007 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4008 4009 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 4010 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4011 r600_bytecode_src_set_abs(&alu.src[0]); 4012 4013 alu.dst.sel = ctx->temp_reg; 4014 alu.dst.write = 1; 4015 alu.dst.chan = 2; 4016 alu.last = 1; 4017 4018 r = r600_bytecode_add_alu(ctx->bc, &alu); 4019 if (r) 4020 return r; 4021 } 4022 } 4023 4024 /* result.w = 1.0; */ 4025 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 4026 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4027 4028 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 4029 alu.src[0].sel = V_SQ_ALU_SRC_1; 4030 alu.src[0].chan = 0; 4031 4032 alu.dst.sel = ctx->temp_reg; 4033 alu.dst.chan = 3; 4034 alu.dst.write = 1; 4035 alu.last = 1; 4036 4037 r = r600_bytecode_add_alu(ctx->bc, &alu); 4038 if (r) 4039 return r; 4040 } 4041 4042 return tgsi_helper_copy(ctx, inst); 4043} 4044 4045static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 4046{ 4047 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4048 struct r600_bytecode_alu alu; 4049 int r; 4050 4051 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4052 4053 switch (inst->Instruction.Opcode) { 4054 case TGSI_OPCODE_ARL: 4055 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 4056 break; 4057 case TGSI_OPCODE_ARR: 4058 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 4059 break; 4060 case TGSI_OPCODE_UARL: 4061 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 4062 break; 4063 default: 4064 assert(0); 4065 return -1; 4066 } 4067 4068 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4069 alu.last = 1; 4070 alu.dst.sel = ctx->bc->ar_reg; 4071 alu.dst.write = 1; 4072 r = r600_bytecode_add_alu(ctx->bc, &alu); 4073 if (r) 4074 return r; 4075 4076 ctx->bc->ar_loaded = 0; 4077 return 0; 4078} 4079static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 4080{ 4081 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4082 struct r600_bytecode_alu alu; 4083 int r; 4084 4085 switch (inst->Instruction.Opcode) { 4086 case TGSI_OPCODE_ARL: 4087 memset(&alu, 0, sizeof(alu)); 4088 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 4089 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4090 alu.dst.sel = ctx->bc->ar_reg; 4091 alu.dst.write = 1; 4092 alu.last = 1; 4093 4094 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4095 return r; 4096 4097 memset(&alu, 0, sizeof(alu)); 4098 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 4099 alu.src[0].sel = ctx->bc->ar_reg; 4100 alu.dst.sel = ctx->bc->ar_reg; 4101 alu.dst.write = 1; 4102 alu.last = 1; 4103 4104 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4105 return r; 4106 break; 4107 case TGSI_OPCODE_ARR: 4108 memset(&alu, 0, sizeof(alu)); 4109 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 4110 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4111 alu.dst.sel = ctx->bc->ar_reg; 4112 alu.dst.write = 1; 4113 alu.last = 1; 4114 4115 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4116 return r; 4117 break; 4118 case TGSI_OPCODE_UARL: 4119 memset(&alu, 0, sizeof(alu)); 4120 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 4121 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4122 alu.dst.sel = ctx->bc->ar_reg; 4123 alu.dst.write = 1; 4124 alu.last = 1; 4125 4126 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4127 return r; 4128 break; 4129 default: 4130 assert(0); 4131 return -1; 4132 } 4133 4134 ctx->bc->ar_loaded = 0; 4135 return 0; 4136} 4137 4138static int tgsi_opdst(struct r600_shader_ctx *ctx) 4139{ 4140 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4141 struct r600_bytecode_alu alu; 4142 int i, r = 0; 4143 4144 for (i = 0; i < 4; i++) { 4145 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4146 4147 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 4148 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4149 4150 if (i == 0 || i == 3) { 4151 alu.src[0].sel = V_SQ_ALU_SRC_1; 4152 } else { 4153 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4154 } 4155 4156 if (i == 0 || i == 2) { 4157 alu.src[1].sel = V_SQ_ALU_SRC_1; 4158 } else { 4159 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4160 } 4161 if (i == 3) 4162 alu.last = 1; 4163 r = r600_bytecode_add_alu(ctx->bc, &alu); 4164 if (r) 4165 return r; 4166 } 4167 return 0; 4168} 4169 4170static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 4171{ 4172 struct r600_bytecode_alu alu; 4173 int r; 4174 4175 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4176 alu.inst = opcode; 4177 alu.predicate = 1; 4178 4179 alu.dst.sel = ctx->temp_reg; 4180 alu.dst.write = 1; 4181 alu.dst.chan = 0; 4182 4183 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4184 alu.src[1].sel = V_SQ_ALU_SRC_0; 4185 alu.src[1].chan = 0; 4186 4187 alu.last = 1; 4188 4189 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 4190 if (r) 4191 return r; 4192 return 0; 4193} 4194 4195static int pops(struct r600_shader_ctx *ctx, int pops) 4196{ 4197 unsigned force_pop = ctx->bc->force_add_cf; 4198 4199 if (!force_pop) { 4200 int alu_pop = 3; 4201 if (ctx->bc->cf_last) { 4202 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)) 4203 alu_pop = 0; 4204 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER)) 4205 alu_pop = 1; 4206 } 4207 alu_pop += pops; 4208 if (alu_pop == 1) { 4209 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER); 4210 ctx->bc->force_add_cf = 1; 4211 } else if (alu_pop == 2) { 4212 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER); 4213 ctx->bc->force_add_cf = 1; 4214 } else { 4215 force_pop = 1; 4216 } 4217 } 4218 4219 if (force_pop) { 4220 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 4221 ctx->bc->cf_last->pop_count = pops; 4222 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 4223 } 4224 4225 return 0; 4226} 4227 4228static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 4229{ 4230 switch(reason) { 4231 case FC_PUSH_VPM: 4232 ctx->bc->callstack[ctx->bc->call_sp].current--; 4233 break; 4234 case FC_PUSH_WQM: 4235 case FC_LOOP: 4236 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 4237 break; 4238 case FC_REP: 4239 /* TOODO : for 16 vp asic should -= 2; */ 4240 ctx->bc->callstack[ctx->bc->call_sp].current --; 4241 break; 4242 } 4243} 4244 4245static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 4246{ 4247 if (check_max_only) { 4248 int diff; 4249 switch (reason) { 4250 case FC_PUSH_VPM: 4251 diff = 1; 4252 break; 4253 case FC_PUSH_WQM: 4254 diff = 4; 4255 break; 4256 default: 4257 assert(0); 4258 diff = 0; 4259 } 4260 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 4261 ctx->bc->callstack[ctx->bc->call_sp].max) { 4262 ctx->bc->callstack[ctx->bc->call_sp].max = 4263 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 4264 } 4265 return; 4266 } 4267 switch (reason) { 4268 case FC_PUSH_VPM: 4269 ctx->bc->callstack[ctx->bc->call_sp].current++; 4270 break; 4271 case FC_PUSH_WQM: 4272 case FC_LOOP: 4273 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 4274 break; 4275 case FC_REP: 4276 ctx->bc->callstack[ctx->bc->call_sp].current++; 4277 break; 4278 } 4279 4280 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 4281 ctx->bc->callstack[ctx->bc->call_sp].max) { 4282 ctx->bc->callstack[ctx->bc->call_sp].max = 4283 ctx->bc->callstack[ctx->bc->call_sp].current; 4284 } 4285} 4286 4287static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 4288{ 4289 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 4290 4291 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid, 4292 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 4293 sp->mid[sp->num_mid] = ctx->bc->cf_last; 4294 sp->num_mid++; 4295} 4296 4297static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 4298{ 4299 ctx->bc->fc_sp++; 4300 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 4301 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 4302} 4303 4304static void fc_poplevel(struct r600_shader_ctx *ctx) 4305{ 4306 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 4307 if (sp->mid) { 4308 free(sp->mid); 4309 sp->mid = NULL; 4310 } 4311 sp->num_mid = 0; 4312 sp->start = NULL; 4313 sp->type = 0; 4314 ctx->bc->fc_sp--; 4315} 4316 4317#if 0 4318static int emit_return(struct r600_shader_ctx *ctx) 4319{ 4320 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); 4321 return 0; 4322} 4323 4324static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 4325{ 4326 4327 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 4328 ctx->bc->cf_last->pop_count = pops; 4329 /* TODO work out offset */ 4330 return 0; 4331} 4332 4333static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 4334{ 4335 return 0; 4336} 4337 4338static void emit_testflag(struct r600_shader_ctx *ctx) 4339{ 4340 4341} 4342 4343static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 4344{ 4345 emit_testflag(ctx); 4346 emit_jump_to_offset(ctx, 1, 4); 4347 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 4348 pops(ctx, ifidx + 1); 4349 emit_return(ctx); 4350} 4351 4352static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 4353{ 4354 emit_testflag(ctx); 4355 4356 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 4357 ctx->bc->cf_last->pop_count = 1; 4358 4359 fc_set_mid(ctx, fc_sp); 4360 4361 pops(ctx, 1); 4362} 4363#endif 4364 4365static int tgsi_if(struct r600_shader_ctx *ctx) 4366{ 4367 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)); 4368 4369 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 4370 4371 fc_pushlevel(ctx, FC_IF); 4372 4373 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 4374 return 0; 4375} 4376 4377static int tgsi_else(struct r600_shader_ctx *ctx) 4378{ 4379 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 4380 ctx->bc->cf_last->pop_count = 1; 4381 4382 fc_set_mid(ctx, ctx->bc->fc_sp); 4383 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 4384 return 0; 4385} 4386 4387static int tgsi_endif(struct r600_shader_ctx *ctx) 4388{ 4389 pops(ctx, 1); 4390 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 4391 R600_ERR("if/endif unbalanced in shader\n"); 4392 return -1; 4393 } 4394 4395 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 4396 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 4397 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 4398 } else { 4399 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 4400 } 4401 fc_poplevel(ctx); 4402 4403 callstack_decrease_current(ctx, FC_PUSH_VPM); 4404 return 0; 4405} 4406 4407static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 4408{ 4409 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 4410 4411 fc_pushlevel(ctx, FC_LOOP); 4412 4413 /* check stack depth */ 4414 callstack_check_depth(ctx, FC_LOOP, 0); 4415 return 0; 4416} 4417 4418static int tgsi_endloop(struct r600_shader_ctx *ctx) 4419{ 4420 int i; 4421 4422 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 4423 4424 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 4425 R600_ERR("loop/endloop in shader code are not paired.\n"); 4426 return -EINVAL; 4427 } 4428 4429 /* fixup loop pointers - from r600isa 4430 LOOP END points to CF after LOOP START, 4431 LOOP START point to CF after LOOP END 4432 BRK/CONT point to LOOP END CF 4433 */ 4434 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 4435 4436 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 4437 4438 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 4439 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 4440 } 4441 /* TODO add LOOPRET support */ 4442 fc_poplevel(ctx); 4443 callstack_decrease_current(ctx, FC_LOOP); 4444 return 0; 4445} 4446 4447static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 4448{ 4449 unsigned int fscp; 4450 4451 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 4452 { 4453 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 4454 break; 4455 } 4456 4457 if (fscp == 0) { 4458 R600_ERR("Break not inside loop/endloop pair\n"); 4459 return -EINVAL; 4460 } 4461 4462 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 4463 ctx->bc->cf_last->pop_count = 1; 4464 4465 fc_set_mid(ctx, fscp); 4466 4467 pops(ctx, 1); 4468 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 4469 return 0; 4470} 4471 4472static int tgsi_umad(struct r600_shader_ctx *ctx) 4473{ 4474 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4475 struct r600_bytecode_alu alu; 4476 int i, j, r; 4477 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4478 4479 /* src0 * src1 */ 4480 for (i = 0; i < lasti + 1; i++) { 4481 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4482 continue; 4483 4484 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4485 4486 alu.dst.chan = i; 4487 alu.dst.sel = ctx->temp_reg; 4488 alu.dst.write = 1; 4489 4490 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 4491 for (j = 0; j < 2; j++) { 4492 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 4493 } 4494 4495 alu.last = 1; 4496 r = r600_bytecode_add_alu(ctx->bc, &alu); 4497 if (r) 4498 return r; 4499 } 4500 4501 4502 for (i = 0; i < lasti + 1; i++) { 4503 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4504 continue; 4505 4506 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4507 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4508 4509 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 4510 4511 alu.src[0].sel = ctx->temp_reg; 4512 alu.src[0].chan = i; 4513 4514 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 4515 if (i == lasti) { 4516 alu.last = 1; 4517 } 4518 r = r600_bytecode_add_alu(ctx->bc, &alu); 4519 if (r) 4520 return r; 4521 } 4522 return 0; 4523} 4524 4525static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 4526 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 4527 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4528 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 4529 4530 /* FIXME: 4531 * For state trackers other than OpenGL, we'll want to use 4532 * _RECIP_IEEE instead. 4533 */ 4534 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 4535 4536 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 4537 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 4538 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 4539 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 4540 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4541 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4542 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4543 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 4544 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 4545 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 4546 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 4547 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 4548 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 4549 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4550 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 4551 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4552 /* gap */ 4553 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4554 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4555 /* gap */ 4556 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4557 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4558 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 4559 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4560 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 4561 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 4562 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 4563 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 4564 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 4565 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 4566 /* gap */ 4567 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4568 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4569 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4570 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4571 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 4572 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 4573 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 4574 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 4575 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4576 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4577 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4578 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4579 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4580 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 4581 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4582 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 4583 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 4584 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 4585 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 4586 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4587 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4588 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 4589 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4590 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4591 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4592 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4593 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4594 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4595 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4596 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 4597 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4598 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4599 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4600 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 4601 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 4602 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 4603 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 4604 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4605 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4606 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4607 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 4608 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 4609 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 4610 /* gap */ 4611 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4612 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4613 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 4614 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 4615 /* gap */ 4616 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4617 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4618 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4619 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4620 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4621 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, 4622 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 4623 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 4624 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans}, 4625 /* gap */ 4626 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4627 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 4628 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 4629 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, 4630 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 4631 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4632 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 4633 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 4634 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 4635 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4636 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4637 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 4638 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4639 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 4640 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4641 /* gap */ 4642 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4643 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4644 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4645 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4646 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4647 /* gap */ 4648 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4649 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4650 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4651 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4652 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4653 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4654 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4655 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4656 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 4657 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 4658 /* gap */ 4659 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4660 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans}, 4661 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 4662 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 4663 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 4664 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, 4665 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 4666 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans}, 4667 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 4668 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2}, 4669 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans}, 4670 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 4671 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, 4672 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 4673 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 4674 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 4675 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, 4676 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans}, 4677 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 4678 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 4679 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans}, 4680 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, 4681 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap}, 4682 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4683 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4684 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4685 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4686 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 4687 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 4688 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 4689 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 4690 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 4691 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 4692 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 4693 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 4694 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 4695 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 4696 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 4697 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 4698 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl}, 4699 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 4700 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 4701 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 4702 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4703}; 4704 4705static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 4706 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 4707 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4708 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 4709 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 4710 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, 4711 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 4712 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 4713 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 4714 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4715 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4716 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4717 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 4718 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 4719 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 4720 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 4721 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 4722 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 4723 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4724 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 4725 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4726 /* gap */ 4727 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4728 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4729 /* gap */ 4730 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4731 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4732 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 4733 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4734 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 4735 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 4736 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 4737 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 4738 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 4739 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 4740 /* gap */ 4741 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4742 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4743 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4744 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4745 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 4746 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 4747 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 4748 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 4749 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4750 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4751 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4752 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4753 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4754 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 4755 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4756 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 4757 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 4758 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 4759 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 4760 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4761 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4762 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 4763 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4764 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4765 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4766 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4767 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4768 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4769 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4770 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 4771 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4772 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4773 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4774 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 4775 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 4776 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 4777 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 4778 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4779 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4780 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4781 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 4782 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 4783 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 4784 /* gap */ 4785 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4786 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4787 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 4788 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 4789 /* gap */ 4790 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4791 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4792 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4793 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4794 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4795 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, 4796 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 4797 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 4798 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2}, 4799 /* gap */ 4800 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4801 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 4802 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 4803 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, 4804 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 4805 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4806 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 4807 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 4808 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 4809 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4810 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4811 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 4812 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4813 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 4814 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4815 /* gap */ 4816 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4817 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4818 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4819 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4820 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4821 /* gap */ 4822 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4823 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4824 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4825 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4826 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4827 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4828 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4829 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4830 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 4831 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 4832 /* gap */ 4833 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4834 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i}, 4835 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 4836 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 4837 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 4838 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, 4839 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 4840 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2}, 4841 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 4842 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i}, 4843 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans}, 4844 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 4845 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, 4846 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 4847 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 4848 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 4849 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, 4850 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans}, 4851 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 4852 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 4853 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2}, 4854 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, 4855 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2}, 4856 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4857 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4858 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4859 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4860 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 4861 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 4862 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 4863 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 4864 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 4865 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 4866 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 4867 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 4868 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 4869 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 4870 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 4871 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 4872 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, 4873 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 4874 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 4875 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 4876 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4877}; 4878 4879static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 4880 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 4881 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4882 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 4883 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, 4884 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, 4885 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 4886 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 4887 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 4888 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4889 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4890 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4891 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 4892 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 4893 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 4894 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 4895 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 4896 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 4897 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4898 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 4899 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4900 /* gap */ 4901 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4902 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4903 /* gap */ 4904 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4905 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4906 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 4907 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4908 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 4909 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 4910 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, 4911 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, 4912 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, 4913 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 4914 /* gap */ 4915 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4916 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4917 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4918 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4919 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, 4920 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 4921 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 4922 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 4923 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4924 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4925 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4926 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4927 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4928 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 4929 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4930 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 4931 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, 4932 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 4933 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 4934 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4935 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4936 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 4937 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4938 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4939 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4940 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4941 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4942 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4943 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4944 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 4945 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4946 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4947 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4948 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 4949 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 4950 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 4951 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 4952 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4953 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4954 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4955 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 4956 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 4957 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 4958 /* gap */ 4959 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4960 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4961 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 4962 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 4963 /* gap */ 4964 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4965 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4966 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4967 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4968 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4969 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4970 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 4971 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 4972 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4973 /* gap */ 4974 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4975 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4976 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4977 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4978 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 4979 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4980 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 4981 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 4982 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 4983 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4984 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4985 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 4986 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4987 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 4988 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4989 /* gap */ 4990 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4991 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4992 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4993 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4994 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4995 /* gap */ 4996 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4997 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4998 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4999 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5000 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5001 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5002 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5003 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5004 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 5005 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 5006 /* gap */ 5007 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5008 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5009 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5010 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 5011 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 5012 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5013 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5014 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5015 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5016 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5017 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5018 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5019 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5020 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5021 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5022 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5023 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5024 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5025 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5026 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5027 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5028 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5029 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5030 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5031 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5032 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5033 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5034 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 5035 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 5036 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 5037 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 5038 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 5039 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 5040 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 5041 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 5042 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 5043 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 5044 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 5045 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 5046 {TGSI_OPCODE_UARL, 0, 0, tgsi_unsupported}, 5047 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 5048 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5049}; 5050