r600_shader.c revision b69728cba53765f6799a66ebcab2058be6d53602
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_info.h" 25#include "tgsi/tgsi_parse.h" 26#include "tgsi/tgsi_scan.h" 27#include "tgsi/tgsi_dump.h" 28#include "util/u_format.h" 29#include "r600_pipe.h" 30#include "r600_asm.h" 31#include "r600_sq.h" 32#include "r600_formats.h" 33#include "r600_opcodes.h" 34#include "r600d.h" 35#include <stdio.h> 36#include <errno.h> 37#include <byteswap.h> 38 39/* CAYMAN notes 40Why CAYMAN got loops for lots of instructions is explained here. 41 42-These 8xx t-slot only ops are implemented in all vector slots. 43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 44These 8xx t-slot only opcodes become vector ops, with all four 45slots expecting the arguments on sources a and b. Result is 46broadcast to all channels. 47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT 48These 8xx t-slot only opcodes become vector ops in the z, y, and 49x slots. 50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 52SQRT_IEEE/_64 53SIN/COS 54The w slot may have an independent co-issued operation, or if the 55result is required to be in the w slot, the opcode above may be 56issued in the w slot as well. 57The compiler must issue the source argument to slots z, y, and x 58*/ 59 60static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 61{ 62 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 63 struct r600_shader *rshader = &shader->shader; 64 uint32_t *ptr; 65 int i; 66 67 /* copy new shader */ 68 if (shader->bo == NULL) { 69 shader->bo = (struct r600_resource*) 70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4); 71 if (shader->bo == NULL) { 72 return -ENOMEM; 73 } 74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->ctx.cs, PIPE_TRANSFER_WRITE); 75 if (R600_BIG_ENDIAN) { 76 for (i = 0; i < rshader->bc.ndw; ++i) { 77 ptr[i] = bswap_32(rshader->bc.bytecode[i]); 78 } 79 } else { 80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr)); 81 } 82 rctx->ws->buffer_unmap(shader->bo->buf); 83 } 84 /* build state */ 85 switch (rshader->processor_type) { 86 case TGSI_PROCESSOR_VERTEX: 87 if (rctx->chip_class >= EVERGREEN) { 88 evergreen_pipe_shader_vs(ctx, shader); 89 } else { 90 r600_pipe_shader_vs(ctx, shader); 91 } 92 break; 93 case TGSI_PROCESSOR_FRAGMENT: 94 if (rctx->chip_class >= EVERGREEN) { 95 evergreen_pipe_shader_ps(ctx, shader); 96 } else { 97 r600_pipe_shader_ps(ctx, shader); 98 } 99 break; 100 default: 101 return -EINVAL; 102 } 103 return 0; 104} 105 106static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader); 107 108int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader) 109{ 110 static int dump_shaders = -1; 111 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 112 int r; 113 114 /* Would like some magic "get_bool_option_once" routine. 115 */ 116 if (dump_shaders == -1) 117 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 118 119 if (dump_shaders) { 120 fprintf(stderr, "--------------------------------------------------------------\n"); 121 tgsi_dump(shader->tokens, 0); 122 123 if (shader->so.num_outputs) { 124 unsigned i; 125 fprintf(stderr, "STREAMOUT\n"); 126 for (i = 0; i < shader->so.num_outputs; i++) { 127 unsigned mask = ((1 << shader->so.output[i].num_components) - 1) << 128 shader->so.output[i].start_component; 129 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i, 130 shader->so.output[i].output_buffer, shader->so.output[i].register_index, 131 mask & 1 ? "x" : "_", 132 (mask >> 1) & 1 ? "y" : "_", 133 (mask >> 2) & 1 ? "z" : "_", 134 (mask >> 3) & 1 ? "w" : "_"); 135 } 136 } 137 } 138 r = r600_shader_from_tgsi(rctx, shader); 139 if (r) { 140 R600_ERR("translation from TGSI failed !\n"); 141 return r; 142 } 143 r = r600_bytecode_build(&shader->shader.bc); 144 if (r) { 145 R600_ERR("building bytecode failed !\n"); 146 return r; 147 } 148 if (dump_shaders) { 149 r600_bytecode_dump(&shader->shader.bc); 150 fprintf(stderr, "______________________________________________________________\n"); 151 } 152 return r600_pipe_shader(ctx, shader); 153} 154 155void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 156{ 157 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 158 r600_bytecode_clear(&shader->shader.bc); 159 160 memset(&shader->shader,0,sizeof(struct r600_shader)); 161} 162 163/* 164 * tgsi -> r600 shader 165 */ 166struct r600_shader_tgsi_instruction; 167 168struct r600_shader_src { 169 unsigned sel; 170 unsigned swizzle[4]; 171 unsigned neg; 172 unsigned abs; 173 unsigned rel; 174 uint32_t value[4]; 175}; 176 177struct r600_shader_ctx { 178 struct tgsi_shader_info info; 179 struct tgsi_parse_context parse; 180 const struct tgsi_token *tokens; 181 unsigned type; 182 unsigned file_offset[TGSI_FILE_COUNT]; 183 unsigned temp_reg; 184 struct r600_shader_tgsi_instruction *inst_info; 185 struct r600_bytecode *bc; 186 struct r600_shader *shader; 187 struct r600_shader_src src[4]; 188 u32 *literals; 189 u32 nliterals; 190 u32 max_driver_temp_used; 191 /* needed for evergreen interpolation */ 192 boolean input_centroid; 193 boolean input_linear; 194 boolean input_perspective; 195 int num_interp_gpr; 196 int face_gpr; 197 int colors_used; 198}; 199 200struct r600_shader_tgsi_instruction { 201 unsigned tgsi_opcode; 202 unsigned is_op3; 203 unsigned r600_opcode; 204 int (*process)(struct r600_shader_ctx *ctx); 205}; 206 207static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 208static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 209 210static int tgsi_is_supported(struct r600_shader_ctx *ctx) 211{ 212 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 213 int j; 214 215 if (i->Instruction.NumDstRegs > 1) { 216 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 217 return -EINVAL; 218 } 219 if (i->Instruction.Predicate) { 220 R600_ERR("predicate unsupported\n"); 221 return -EINVAL; 222 } 223#if 0 224 if (i->Instruction.Label) { 225 R600_ERR("label unsupported\n"); 226 return -EINVAL; 227 } 228#endif 229 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 230 if (i->Src[j].Register.Dimension) { 231 R600_ERR("unsupported src %d (dimension %d)\n", j, 232 i->Src[j].Register.Dimension); 233 return -EINVAL; 234 } 235 } 236 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 237 if (i->Dst[j].Register.Dimension) { 238 R600_ERR("unsupported dst (dimension)\n"); 239 return -EINVAL; 240 } 241 } 242 return 0; 243} 244 245static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 246{ 247 int i, r; 248 struct r600_bytecode_alu alu; 249 int gpr = 0, base_chan = 0; 250 int ij_index = 0; 251 252 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 253 ij_index = 0; 254 if (ctx->shader->input[input].centroid) 255 ij_index++; 256 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 257 ij_index = 0; 258 /* if we have perspective add one */ 259 if (ctx->input_perspective) { 260 ij_index++; 261 /* if we have perspective centroid */ 262 if (ctx->input_centroid) 263 ij_index++; 264 } 265 if (ctx->shader->input[input].centroid) 266 ij_index++; 267 } 268 269 /* work out gpr and base_chan from index */ 270 gpr = ij_index / 2; 271 base_chan = (2 * (ij_index % 2)) + 1; 272 273 for (i = 0; i < 8; i++) { 274 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 275 276 if (i < 4) 277 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 278 else 279 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 280 281 if ((i > 1) && (i < 6)) { 282 alu.dst.sel = ctx->shader->input[input].gpr; 283 alu.dst.write = 1; 284 } 285 286 alu.dst.chan = i % 4; 287 288 alu.src[0].sel = gpr; 289 alu.src[0].chan = (base_chan - (i % 2)); 290 291 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 292 293 alu.bank_swizzle_force = SQ_ALU_VEC_210; 294 if ((i % 4) == 3) 295 alu.last = 1; 296 r = r600_bytecode_add_alu(ctx->bc, &alu); 297 if (r) 298 return r; 299 } 300 return 0; 301} 302 303static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 304{ 305 int i, r; 306 struct r600_bytecode_alu alu; 307 308 for (i = 0; i < 4; i++) { 309 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 310 311 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_LOAD_P0; 312 313 alu.dst.sel = ctx->shader->input[input].gpr; 314 alu.dst.write = 1; 315 316 alu.dst.chan = i; 317 318 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 319 alu.src[0].chan = i; 320 321 if (i == 3) 322 alu.last = 1; 323 r = r600_bytecode_add_alu(ctx->bc, &alu); 324 if (r) 325 return r; 326 } 327 return 0; 328} 329 330/* 331 * Special export handling in shaders 332 * 333 * shader export ARRAY_BASE for EXPORT_POS: 334 * 60 is position 335 * 61 is misc vector 336 * 62, 63 are clip distance vectors 337 * 338 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 339 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 340 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 341 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 342 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 343 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 344 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 345 * exclusive from render target index) 346 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 347 * 348 * 349 * shader export ARRAY_BASE for EXPORT_PIXEL: 350 * 0-7 CB targets 351 * 61 computed Z vector 352 * 353 * The use of the values exported in the computed Z vector are controlled 354 * by DB_SHADER_CONTROL: 355 * Z_EXPORT_ENABLE - Z as a float in RED 356 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 357 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 358 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 359 * DB_SOURCE_FORMAT - export control restrictions 360 * 361 */ 362 363 364/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 365static int r600_spi_sid(struct r600_shader_io * io) 366{ 367 int index, name = io->name; 368 369 /* These params are handled differently, they don't need 370 * semantic indices, so we'll use 0 for them. 371 */ 372 if (name == TGSI_SEMANTIC_POSITION || 373 name == TGSI_SEMANTIC_PSIZE || 374 name == TGSI_SEMANTIC_FACE) 375 index = 0; 376 else { 377 if (name == TGSI_SEMANTIC_GENERIC) { 378 /* For generic params simply use sid from tgsi */ 379 index = io->sid; 380 } else { 381 /* For non-generic params - pack name and sid into 8 bits */ 382 index = 0x80 | (name<<3) | (io->sid); 383 } 384 385 /* Make sure that all really used indices have nonzero value, so 386 * we can just compare it to 0 later instead of comparing the name 387 * with different values to detect special cases. */ 388 index++; 389 } 390 391 return index; 392}; 393 394/* turn input into interpolate on EG */ 395static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 396{ 397 int r = 0; 398 399 if (ctx->shader->input[index].spi_sid) { 400 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 401 if (ctx->shader->input[index].interpolate > 0) { 402 r = evergreen_interp_alu(ctx, index); 403 } else { 404 r = evergreen_interp_flat(ctx, index); 405 } 406 } 407 return r; 408} 409 410static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 411{ 412 struct r600_bytecode_alu alu; 413 int i, r; 414 int gpr_front = ctx->shader->input[front].gpr; 415 int gpr_back = ctx->shader->input[back].gpr; 416 417 for (i = 0; i < 4; i++) { 418 memset(&alu, 0, sizeof(alu)); 419 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 420 alu.is_op3 = 1; 421 alu.dst.write = 1; 422 alu.dst.sel = gpr_front; 423 alu.src[0].sel = ctx->face_gpr; 424 alu.src[1].sel = gpr_front; 425 alu.src[2].sel = gpr_back; 426 427 alu.dst.chan = i; 428 alu.src[1].chan = i; 429 alu.src[2].chan = i; 430 alu.last = (i==3); 431 432 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 433 return r; 434 } 435 436 return 0; 437} 438 439static int tgsi_declaration(struct r600_shader_ctx *ctx) 440{ 441 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 442 unsigned i; 443 int r; 444 445 switch (d->Declaration.File) { 446 case TGSI_FILE_INPUT: 447 i = ctx->shader->ninput++; 448 ctx->shader->input[i].name = d->Semantic.Name; 449 ctx->shader->input[i].sid = d->Semantic.Index; 450 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 451 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 452 ctx->shader->input[i].centroid = d->Declaration.Centroid; 453 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; 454 if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { 455 if (ctx->shader->input[i].name == TGSI_SEMANTIC_FACE) 456 ctx->face_gpr = ctx->shader->input[i].gpr; 457 else if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) 458 ctx->colors_used++; 459 if (ctx->bc->chip_class >= EVERGREEN) { 460 r = evergreen_interp_input(ctx, i); 461 if (r) 462 return r; 463 } 464 } 465 break; 466 case TGSI_FILE_OUTPUT: 467 i = ctx->shader->noutput++; 468 ctx->shader->output[i].name = d->Semantic.Name; 469 ctx->shader->output[i].sid = d->Semantic.Index; 470 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 471 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; 472 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 473 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 474 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 475 switch (d->Semantic.Name) { 476 case TGSI_SEMANTIC_CLIPDIST: 477 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2); 478 break; 479 case TGSI_SEMANTIC_PSIZE: 480 ctx->shader->vs_out_misc_write = 1; 481 break; 482 } 483 } 484 break; 485 case TGSI_FILE_CONSTANT: 486 case TGSI_FILE_TEMPORARY: 487 case TGSI_FILE_SAMPLER: 488 case TGSI_FILE_ADDRESS: 489 break; 490 491 case TGSI_FILE_SYSTEM_VALUE: 492 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 493 struct r600_bytecode_alu alu; 494 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 495 496 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 497 alu.src[0].sel = 0; 498 alu.src[0].chan = 3; 499 500 alu.dst.sel = 0; 501 alu.dst.chan = 3; 502 alu.dst.write = 1; 503 alu.last = 1; 504 505 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 506 return r; 507 break; 508 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 509 break; 510 default: 511 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 512 return -EINVAL; 513 } 514 return 0; 515} 516 517static int r600_get_temp(struct r600_shader_ctx *ctx) 518{ 519 return ctx->temp_reg + ctx->max_driver_temp_used++; 520} 521 522/* 523 * for evergreen we need to scan the shader to find the number of GPRs we need to 524 * reserve for interpolation. 525 * 526 * we need to know if we are going to emit 527 * any centroid inputs 528 * if perspective and linear are required 529*/ 530static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 531{ 532 int i; 533 int num_baryc; 534 535 ctx->input_linear = FALSE; 536 ctx->input_perspective = FALSE; 537 ctx->input_centroid = FALSE; 538 ctx->num_interp_gpr = 1; 539 540 /* any centroid inputs */ 541 for (i = 0; i < ctx->info.num_inputs; i++) { 542 /* skip position/face */ 543 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 544 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 545 continue; 546 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 547 ctx->input_linear = TRUE; 548 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 549 ctx->input_perspective = TRUE; 550 if (ctx->info.input_centroid[i]) 551 ctx->input_centroid = TRUE; 552 } 553 554 num_baryc = 0; 555 /* ignoring sample for now */ 556 if (ctx->input_perspective) 557 num_baryc++; 558 if (ctx->input_linear) 559 num_baryc++; 560 if (ctx->input_centroid) 561 num_baryc *= 2; 562 563 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 564 565 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 566 return ctx->num_interp_gpr; 567} 568 569static void tgsi_src(struct r600_shader_ctx *ctx, 570 const struct tgsi_full_src_register *tgsi_src, 571 struct r600_shader_src *r600_src) 572{ 573 memset(r600_src, 0, sizeof(*r600_src)); 574 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 575 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 576 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 577 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 578 r600_src->neg = tgsi_src->Register.Negate; 579 r600_src->abs = tgsi_src->Register.Absolute; 580 581 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 582 int index; 583 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 584 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 585 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 586 587 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 588 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 589 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 590 return; 591 } 592 index = tgsi_src->Register.Index; 593 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 594 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 595 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 596 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 597 r600_src->swizzle[0] = 3; 598 r600_src->swizzle[1] = 3; 599 r600_src->swizzle[2] = 3; 600 r600_src->swizzle[3] = 3; 601 r600_src->sel = 0; 602 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 603 r600_src->swizzle[0] = 0; 604 r600_src->swizzle[1] = 0; 605 r600_src->swizzle[2] = 0; 606 r600_src->swizzle[3] = 0; 607 r600_src->sel = 0; 608 } 609 } else { 610 if (tgsi_src->Register.Indirect) 611 r600_src->rel = V_SQ_REL_RELATIVE; 612 r600_src->sel = tgsi_src->Register.Index; 613 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 614 } 615} 616 617static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 618{ 619 struct r600_bytecode_vtx vtx; 620 unsigned int ar_reg; 621 int r; 622 623 if (offset) { 624 struct r600_bytecode_alu alu; 625 626 memset(&alu, 0, sizeof(alu)); 627 628 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 629 alu.src[0].sel = ctx->bc->ar_reg; 630 631 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 632 alu.src[1].value = offset; 633 634 alu.dst.sel = dst_reg; 635 alu.dst.write = 1; 636 alu.last = 1; 637 638 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 639 return r; 640 641 ar_reg = dst_reg; 642 } else { 643 ar_reg = ctx->bc->ar_reg; 644 } 645 646 memset(&vtx, 0, sizeof(vtx)); 647 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 648 vtx.src_gpr = ar_reg; 649 vtx.mega_fetch_count = 16; 650 vtx.dst_gpr = dst_reg; 651 vtx.dst_sel_x = 0; /* SEL_X */ 652 vtx.dst_sel_y = 1; /* SEL_Y */ 653 vtx.dst_sel_z = 2; /* SEL_Z */ 654 vtx.dst_sel_w = 3; /* SEL_W */ 655 vtx.data_format = FMT_32_32_32_32_FLOAT; 656 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 657 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 658 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 659 vtx.endian = r600_endian_swap(32); 660 661 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 662 return r; 663 664 return 0; 665} 666 667static int tgsi_split_constant(struct r600_shader_ctx *ctx) 668{ 669 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 670 struct r600_bytecode_alu alu; 671 int i, j, k, nconst, r; 672 673 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 674 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 675 nconst++; 676 } 677 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 678 } 679 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 680 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 681 continue; 682 } 683 684 if (ctx->src[i].rel) { 685 int treg = r600_get_temp(ctx); 686 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 687 return r; 688 689 ctx->src[i].sel = treg; 690 ctx->src[i].rel = 0; 691 j--; 692 } else if (j > 0) { 693 int treg = r600_get_temp(ctx); 694 for (k = 0; k < 4; k++) { 695 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 696 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 697 alu.src[0].sel = ctx->src[i].sel; 698 alu.src[0].chan = k; 699 alu.src[0].rel = ctx->src[i].rel; 700 alu.dst.sel = treg; 701 alu.dst.chan = k; 702 alu.dst.write = 1; 703 if (k == 3) 704 alu.last = 1; 705 r = r600_bytecode_add_alu(ctx->bc, &alu); 706 if (r) 707 return r; 708 } 709 ctx->src[i].sel = treg; 710 ctx->src[i].rel =0; 711 j--; 712 } 713 } 714 return 0; 715} 716 717/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 718static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 719{ 720 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 721 struct r600_bytecode_alu alu; 722 int i, j, k, nliteral, r; 723 724 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 725 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 726 nliteral++; 727 } 728 } 729 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 730 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 731 int treg = r600_get_temp(ctx); 732 for (k = 0; k < 4; k++) { 733 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 734 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 735 alu.src[0].sel = ctx->src[i].sel; 736 alu.src[0].chan = k; 737 alu.src[0].value = ctx->src[i].value[k]; 738 alu.dst.sel = treg; 739 alu.dst.chan = k; 740 alu.dst.write = 1; 741 if (k == 3) 742 alu.last = 1; 743 r = r600_bytecode_add_alu(ctx->bc, &alu); 744 if (r) 745 return r; 746 } 747 ctx->src[i].sel = treg; 748 j--; 749 } 750 } 751 return 0; 752} 753 754static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 755{ 756 int i, r, count = ctx->shader->ninput; 757 758 /* additional inputs will be allocated right after the existing inputs, 759 * we won't need them after the color selection, so we don't need to 760 * reserve these gprs for the rest of the shader code and to adjust 761 * output offsets etc. */ 762 int gpr = ctx->file_offset[TGSI_FILE_INPUT] + 763 ctx->info.file_max[TGSI_FILE_INPUT] + 1; 764 765 if (ctx->face_gpr == -1) { 766 i = ctx->shader->ninput++; 767 ctx->shader->input[i].name = TGSI_SEMANTIC_FACE; 768 ctx->shader->input[i].spi_sid = 0; 769 ctx->shader->input[i].gpr = gpr++; 770 ctx->face_gpr = ctx->shader->input[i].gpr; 771 } 772 773 for (i = 0; i < count; i++) { 774 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 775 int ni = ctx->shader->ninput++; 776 memcpy(&ctx->shader->input[ni],&ctx->shader->input[i], sizeof(struct r600_shader_io)); 777 ctx->shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 778 ctx->shader->input[ni].spi_sid = r600_spi_sid(&ctx->shader->input[ni]); 779 ctx->shader->input[ni].gpr = gpr++; 780 781 if (ctx->bc->chip_class >= EVERGREEN) { 782 r = evergreen_interp_input(ctx, ni); 783 if (r) 784 return r; 785 } 786 787 r = select_twoside_color(ctx, i, ni); 788 if (r) 789 return r; 790 } 791 } 792 return 0; 793} 794 795static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader) 796{ 797 struct r600_shader *shader = &pipeshader->shader; 798 struct tgsi_token *tokens = pipeshader->tokens; 799 struct pipe_stream_output_info so = pipeshader->so; 800 struct tgsi_full_immediate *immediate; 801 struct tgsi_full_property *property; 802 struct r600_shader_ctx ctx; 803 struct r600_bytecode_output output[32]; 804 unsigned output_done, noutput; 805 unsigned opcode; 806 int i, j, r = 0, pos0; 807 808 ctx.bc = &shader->bc; 809 ctx.shader = shader; 810 r600_bytecode_init(ctx.bc, rctx->chip_class); 811 ctx.tokens = tokens; 812 tgsi_scan_shader(tokens, &ctx.info); 813 tgsi_parse_init(&ctx.parse, tokens); 814 ctx.type = ctx.parse.FullHeader.Processor.Processor; 815 shader->processor_type = ctx.type; 816 ctx.bc->type = shader->processor_type; 817 818 ctx.face_gpr = -1; 819 ctx.colors_used = 0; 820 821 shader->two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->two_side; 822 823 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) || 824 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color)); 825 826 shader->nr_cbufs = rctx->nr_cbufs; 827 828 /* register allocations */ 829 /* Values [0,127] correspond to GPR[0..127]. 830 * Values [128,159] correspond to constant buffer bank 0 831 * Values [160,191] correspond to constant buffer bank 1 832 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 833 * Values [256,287] correspond to constant buffer bank 2 (EG) 834 * Values [288,319] correspond to constant buffer bank 3 (EG) 835 * Other special values are shown in the list below. 836 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 837 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 838 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 839 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 840 * 248 SQ_ALU_SRC_0: special constant 0.0. 841 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 842 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 843 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 844 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 845 * 253 SQ_ALU_SRC_LITERAL: literal constant. 846 * 254 SQ_ALU_SRC_PV: previous vector result. 847 * 255 SQ_ALU_SRC_PS: previous scalar result. 848 */ 849 for (i = 0; i < TGSI_FILE_COUNT; i++) { 850 ctx.file_offset[i] = 0; 851 } 852 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 853 ctx.file_offset[TGSI_FILE_INPUT] = 1; 854 if (ctx.bc->chip_class >= EVERGREEN) { 855 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 856 } else { 857 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 858 } 859 } 860 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { 861 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 862 } 863 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 864 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 865 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 866 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 867 868 /* Outside the GPR range. This will be translated to one of the 869 * kcache banks later. */ 870 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 871 872 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 873 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 874 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 875 ctx.temp_reg = ctx.bc->ar_reg + 1; 876 877 ctx.nliterals = 0; 878 ctx.literals = NULL; 879 shader->fs_write_all = FALSE; 880 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 881 tgsi_parse_token(&ctx.parse); 882 switch (ctx.parse.FullToken.Token.Type) { 883 case TGSI_TOKEN_TYPE_IMMEDIATE: 884 immediate = &ctx.parse.FullToken.FullImmediate; 885 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 886 if(ctx.literals == NULL) { 887 r = -ENOMEM; 888 goto out_err; 889 } 890 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 891 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 892 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 893 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 894 ctx.nliterals++; 895 break; 896 case TGSI_TOKEN_TYPE_DECLARATION: 897 r = tgsi_declaration(&ctx); 898 if (r) 899 goto out_err; 900 break; 901 case TGSI_TOKEN_TYPE_INSTRUCTION: 902 break; 903 case TGSI_TOKEN_TYPE_PROPERTY: 904 property = &ctx.parse.FullToken.FullProperty; 905 switch (property->Property.PropertyName) { 906 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: 907 if (property->u[0].Data == 1) 908 shader->fs_write_all = TRUE; 909 break; 910 case TGSI_PROPERTY_VS_PROHIBIT_UCPS: 911 if (property->u[0].Data == 1) 912 shader->vs_prohibit_ucps = TRUE; 913 break; 914 } 915 break; 916 default: 917 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 918 r = -EINVAL; 919 goto out_err; 920 } 921 } 922 923 if (shader->two_side && ctx.colors_used) { 924 if ((r = process_twoside_color_inputs(&ctx))) 925 return r; 926 } 927 928 tgsi_parse_init(&ctx.parse, tokens); 929 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 930 tgsi_parse_token(&ctx.parse); 931 switch (ctx.parse.FullToken.Token.Type) { 932 case TGSI_TOKEN_TYPE_INSTRUCTION: 933 r = tgsi_is_supported(&ctx); 934 if (r) 935 goto out_err; 936 ctx.max_driver_temp_used = 0; 937 /* reserve first tmp for everyone */ 938 r600_get_temp(&ctx); 939 940 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 941 if ((r = tgsi_split_constant(&ctx))) 942 goto out_err; 943 if ((r = tgsi_split_literal_constant(&ctx))) 944 goto out_err; 945 if (ctx.bc->chip_class == CAYMAN) 946 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 947 else if (ctx.bc->chip_class >= EVERGREEN) 948 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 949 else 950 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 951 r = ctx.inst_info->process(&ctx); 952 if (r) 953 goto out_err; 954 break; 955 default: 956 break; 957 } 958 } 959 960 noutput = shader->noutput; 961 962 /* clamp color outputs */ 963 if (shader->clamp_color) { 964 for (i = 0; i < noutput; i++) { 965 if (shader->output[i].name == TGSI_SEMANTIC_COLOR || 966 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) { 967 968 int j; 969 for (j = 0; j < 4; j++) { 970 struct r600_bytecode_alu alu; 971 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 972 973 /* MOV_SAT R, R */ 974 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 975 alu.dst.sel = shader->output[i].gpr; 976 alu.dst.chan = j; 977 alu.dst.write = 1; 978 alu.dst.clamp = 1; 979 alu.src[0].sel = alu.dst.sel; 980 alu.src[0].chan = j; 981 982 if (j == 3) { 983 alu.last = 1; 984 } 985 r = r600_bytecode_add_alu(ctx.bc, &alu); 986 if (r) 987 return r; 988 } 989 } 990 } 991 } 992 993 /* Add stream outputs. */ 994 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) { 995 for (i = 0; i < so.num_outputs; i++) { 996 struct r600_bytecode_output output; 997 998 if (so.output[i].output_buffer >= 4) { 999 R600_ERR("exceeded the max number of stream output buffers, got: %d\n", 1000 so.output[i].output_buffer); 1001 r = -EINVAL; 1002 goto out_err; 1003 } 1004 if (so.output[i].start_component) { 1005 R600_ERR("stream_output - start_component cannot be non-zero\n"); 1006 r = -EINVAL; 1007 goto out_err; 1008 } 1009 1010 memset(&output, 0, sizeof(struct r600_bytecode_output)); 1011 output.gpr = shader->output[so.output[i].register_index].gpr; 1012 output.elem_size = 0; 1013 output.array_base = so.output[i].dst_offset; 1014 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 1015 output.burst_count = 1; 1016 output.barrier = 1; 1017 output.array_size = 0; 1018 output.comp_mask = (1 << so.output[i].num_components) - 1; 1019 if (ctx.bc->chip_class >= EVERGREEN) { 1020 switch (so.output[i].output_buffer) { 1021 case 0: 1022 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0; 1023 break; 1024 case 1: 1025 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1; 1026 break; 1027 case 2: 1028 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2; 1029 break; 1030 case 3: 1031 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3; 1032 break; 1033 } 1034 } else { 1035 switch (so.output[i].output_buffer) { 1036 case 0: 1037 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0; 1038 break; 1039 case 1: 1040 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1; 1041 break; 1042 case 2: 1043 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2; 1044 break; 1045 case 3: 1046 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3; 1047 break; 1048 } 1049 } 1050 r = r600_bytecode_add_output(ctx.bc, &output); 1051 if (r) 1052 goto out_err; 1053 } 1054 } 1055 1056 /* export output */ 1057 j = 0; 1058 1059 for (i = 0, pos0 = 0; i < noutput; i++) { 1060 memset(&output[i+j], 0, sizeof(struct r600_bytecode_output)); 1061 output[i + j].gpr = shader->output[i].gpr; 1062 output[i + j].elem_size = 3; 1063 output[i + j].swizzle_x = 0; 1064 output[i + j].swizzle_y = 1; 1065 output[i + j].swizzle_z = 2; 1066 output[i + j].swizzle_w = 3; 1067 output[i + j].burst_count = 1; 1068 output[i + j].barrier = 1; 1069 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1070 output[i + j].array_base = i+j - pos0; 1071 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1072 switch (ctx.type) { 1073 case TGSI_PROCESSOR_VERTEX: 1074 switch (shader->output[i].name) { 1075 case TGSI_SEMANTIC_POSITION: 1076 output[i + j].array_base = 60; 1077 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1078 /* position doesn't count in array_base */ 1079 pos0++; 1080 break; 1081 1082 case TGSI_SEMANTIC_PSIZE: 1083 output[i + j].array_base = 61; 1084 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1085 /* position doesn't count in array_base */ 1086 pos0++; 1087 break; 1088 1089 case TGSI_SEMANTIC_CLIPDIST: 1090 /* array base for enabled OUT_MISC_VEC & CCDIST[0|1]_VEC 1091 * vectors is allocated sequentially, starting from 61 */ 1092 output[i + j].array_base = 61 + shader->output[i].sid 1093 /* +1 if OUT_MISC_VEC is enabled */ 1094 + shader->vs_out_misc_write 1095 /* -1 if OUT_CCDIST0_VEC is disabled */ 1096 - (((shader->clip_dist_write & 0xF) == 0)? 1 : 0); 1097 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 1098 j++; 1099 pos0++; 1100 /* duplicate it as PARAM to pass to the pixel shader */ 1101 memcpy(&output[i+j], &output[i+j-1], sizeof(struct r600_bytecode_output)); 1102 output[i + j].array_base = i+j-pos0; 1103 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1104 break; 1105 } 1106 break; 1107 case TGSI_PROCESSOR_FRAGMENT: 1108 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 1109 output[i + j].array_base = shader->output[i].sid; 1110 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1111 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { 1112 for (j = 1; j < shader->nr_cbufs; j++) { 1113 memset(&output[i + j], 0, sizeof(struct r600_bytecode_output)); 1114 output[i + j].gpr = shader->output[i].gpr; 1115 output[i + j].elem_size = 3; 1116 output[i + j].swizzle_x = 0; 1117 output[i + j].swizzle_y = 1; 1118 output[i + j].swizzle_z = 2; 1119 output[i + j].swizzle_w = 3; 1120 output[i + j].burst_count = 1; 1121 output[i + j].barrier = 1; 1122 output[i + j].array_base = shader->output[i].sid + j; 1123 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1124 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1125 } 1126 j = shader->nr_cbufs-1; 1127 } 1128 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 1129 output[i + j].array_base = 61; 1130 output[i + j].swizzle_x = 2; 1131 output[i + j].swizzle_y = 7; 1132 output[i + j].swizzle_z = output[i + j].swizzle_w = 7; 1133 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1134 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 1135 output[i + j].array_base = 61; 1136 output[i + j].swizzle_x = 7; 1137 output[i + j].swizzle_y = 1; 1138 output[i + j].swizzle_z = output[i + j].swizzle_w = 7; 1139 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1140 } else { 1141 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 1142 r = -EINVAL; 1143 goto out_err; 1144 } 1145 break; 1146 default: 1147 R600_ERR("unsupported processor type %d\n", ctx.type); 1148 r = -EINVAL; 1149 goto out_err; 1150 } 1151 } 1152 noutput += j; 1153 /* add fake param output for vertex shader if no param is exported */ 1154 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 1155 for (i = 0, pos0 = 0; i < noutput; i++) { 1156 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 1157 pos0 = 1; 1158 break; 1159 } 1160 } 1161 if (!pos0) { 1162 memset(&output[i], 0, sizeof(struct r600_bytecode_output)); 1163 output[i].gpr = 0; 1164 output[i].elem_size = 3; 1165 output[i].swizzle_x = 7; 1166 output[i].swizzle_y = 7; 1167 output[i].swizzle_z = 7; 1168 output[i].swizzle_w = 7; 1169 output[i].burst_count = 1; 1170 output[i].barrier = 1; 1171 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1172 output[i].array_base = 0; 1173 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1174 noutput++; 1175 } 1176 } 1177 /* add fake pixel export */ 1178 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 1179 memset(&output[0], 0, sizeof(struct r600_bytecode_output)); 1180 output[0].gpr = 0; 1181 output[0].elem_size = 3; 1182 output[0].swizzle_x = 7; 1183 output[0].swizzle_y = 7; 1184 output[0].swizzle_z = 7; 1185 output[0].swizzle_w = 7; 1186 output[0].burst_count = 1; 1187 output[0].barrier = 1; 1188 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1189 output[0].array_base = 0; 1190 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1191 noutput++; 1192 } 1193 /* set export done on last export of each type */ 1194 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 1195 if (ctx.bc->chip_class < CAYMAN) { 1196 if (i == (noutput - 1)) { 1197 output[i].end_of_program = 1; 1198 } 1199 } 1200 if (!(output_done & (1 << output[i].type))) { 1201 output_done |= (1 << output[i].type); 1202 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 1203 } 1204 } 1205 /* add output to bytecode */ 1206 for (i = 0; i < noutput; i++) { 1207 r = r600_bytecode_add_output(ctx.bc, &output[i]); 1208 if (r) 1209 goto out_err; 1210 } 1211 /* add program end */ 1212 if (ctx.bc->chip_class == CAYMAN) 1213 cm_bytecode_add_cf_end(ctx.bc); 1214 1215 free(ctx.literals); 1216 tgsi_parse_free(&ctx.parse); 1217 return 0; 1218out_err: 1219 free(ctx.literals); 1220 tgsi_parse_free(&ctx.parse); 1221 return r; 1222} 1223 1224static int tgsi_unsupported(struct r600_shader_ctx *ctx) 1225{ 1226 R600_ERR("%s tgsi opcode unsupported\n", 1227 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); 1228 return -EINVAL; 1229} 1230 1231static int tgsi_end(struct r600_shader_ctx *ctx) 1232{ 1233 return 0; 1234} 1235 1236static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 1237 const struct r600_shader_src *shader_src, 1238 unsigned chan) 1239{ 1240 bc_src->sel = shader_src->sel; 1241 bc_src->chan = shader_src->swizzle[chan]; 1242 bc_src->neg = shader_src->neg; 1243 bc_src->abs = shader_src->abs; 1244 bc_src->rel = shader_src->rel; 1245 bc_src->value = shader_src->value[bc_src->chan]; 1246} 1247 1248static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 1249{ 1250 bc_src->abs = 1; 1251 bc_src->neg = 0; 1252} 1253 1254static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 1255{ 1256 bc_src->neg = !bc_src->neg; 1257} 1258 1259static void tgsi_dst(struct r600_shader_ctx *ctx, 1260 const struct tgsi_full_dst_register *tgsi_dst, 1261 unsigned swizzle, 1262 struct r600_bytecode_alu_dst *r600_dst) 1263{ 1264 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1265 1266 r600_dst->sel = tgsi_dst->Register.Index; 1267 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 1268 r600_dst->chan = swizzle; 1269 r600_dst->write = 1; 1270 if (tgsi_dst->Register.Indirect) 1271 r600_dst->rel = V_SQ_REL_RELATIVE; 1272 if (inst->Instruction.Saturate) { 1273 r600_dst->clamp = 1; 1274 } 1275} 1276 1277static int tgsi_last_instruction(unsigned writemask) 1278{ 1279 int i, lasti = 0; 1280 1281 for (i = 0; i < 4; i++) { 1282 if (writemask & (1 << i)) { 1283 lasti = i; 1284 } 1285 } 1286 return lasti; 1287} 1288 1289static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 1290{ 1291 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1292 struct r600_bytecode_alu alu; 1293 int i, j, r; 1294 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1295 1296 for (i = 0; i < lasti + 1; i++) { 1297 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1298 continue; 1299 1300 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1301 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1302 1303 alu.inst = ctx->inst_info->r600_opcode; 1304 if (!swap) { 1305 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1306 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1307 } 1308 } else { 1309 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 1310 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1311 } 1312 /* handle some special cases */ 1313 switch (ctx->inst_info->tgsi_opcode) { 1314 case TGSI_OPCODE_SUB: 1315 r600_bytecode_src_toggle_neg(&alu.src[1]); 1316 break; 1317 case TGSI_OPCODE_ABS: 1318 r600_bytecode_src_set_abs(&alu.src[0]); 1319 break; 1320 default: 1321 break; 1322 } 1323 if (i == lasti || trans_only) { 1324 alu.last = 1; 1325 } 1326 r = r600_bytecode_add_alu(ctx->bc, &alu); 1327 if (r) 1328 return r; 1329 } 1330 return 0; 1331} 1332 1333static int tgsi_op2(struct r600_shader_ctx *ctx) 1334{ 1335 return tgsi_op2_s(ctx, 0, 0); 1336} 1337 1338static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1339{ 1340 return tgsi_op2_s(ctx, 1, 0); 1341} 1342 1343static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 1344{ 1345 return tgsi_op2_s(ctx, 0, 1); 1346} 1347 1348static int tgsi_ineg(struct r600_shader_ctx *ctx) 1349{ 1350 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1351 struct r600_bytecode_alu alu; 1352 int i, r; 1353 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1354 1355 for (i = 0; i < lasti + 1; i++) { 1356 1357 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1358 continue; 1359 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1360 alu.inst = ctx->inst_info->r600_opcode; 1361 1362 alu.src[0].sel = V_SQ_ALU_SRC_0; 1363 1364 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1365 1366 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1367 1368 if (i == lasti) { 1369 alu.last = 1; 1370 } 1371 r = r600_bytecode_add_alu(ctx->bc, &alu); 1372 if (r) 1373 return r; 1374 } 1375 return 0; 1376 1377} 1378 1379static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 1380{ 1381 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1382 int i, j, r; 1383 struct r600_bytecode_alu alu; 1384 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1385 1386 for (i = 0 ; i < last_slot; i++) { 1387 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1388 alu.inst = ctx->inst_info->r600_opcode; 1389 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1390 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 1391 } 1392 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1393 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1394 1395 if (i == last_slot - 1) 1396 alu.last = 1; 1397 r = r600_bytecode_add_alu(ctx->bc, &alu); 1398 if (r) 1399 return r; 1400 } 1401 return 0; 1402} 1403 1404/* 1405 * r600 - trunc to -PI..PI range 1406 * r700 - normalize by dividing by 2PI 1407 * see fdo bug 27901 1408 */ 1409static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 1410{ 1411 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 1412 static float double_pi = 3.1415926535 * 2; 1413 static float neg_pi = -3.1415926535; 1414 1415 int r; 1416 struct r600_bytecode_alu alu; 1417 1418 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1419 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1420 alu.is_op3 = 1; 1421 1422 alu.dst.chan = 0; 1423 alu.dst.sel = ctx->temp_reg; 1424 alu.dst.write = 1; 1425 1426 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1427 1428 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1429 alu.src[1].chan = 0; 1430 alu.src[1].value = *(uint32_t *)&half_inv_pi; 1431 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1432 alu.src[2].chan = 0; 1433 alu.last = 1; 1434 r = r600_bytecode_add_alu(ctx->bc, &alu); 1435 if (r) 1436 return r; 1437 1438 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1439 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1440 1441 alu.dst.chan = 0; 1442 alu.dst.sel = ctx->temp_reg; 1443 alu.dst.write = 1; 1444 1445 alu.src[0].sel = ctx->temp_reg; 1446 alu.src[0].chan = 0; 1447 alu.last = 1; 1448 r = r600_bytecode_add_alu(ctx->bc, &alu); 1449 if (r) 1450 return r; 1451 1452 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1453 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1454 alu.is_op3 = 1; 1455 1456 alu.dst.chan = 0; 1457 alu.dst.sel = ctx->temp_reg; 1458 alu.dst.write = 1; 1459 1460 alu.src[0].sel = ctx->temp_reg; 1461 alu.src[0].chan = 0; 1462 1463 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1464 alu.src[1].chan = 0; 1465 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1466 alu.src[2].chan = 0; 1467 1468 if (ctx->bc->chip_class == R600) { 1469 alu.src[1].value = *(uint32_t *)&double_pi; 1470 alu.src[2].value = *(uint32_t *)&neg_pi; 1471 } else { 1472 alu.src[1].sel = V_SQ_ALU_SRC_1; 1473 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1474 alu.src[2].neg = 1; 1475 } 1476 1477 alu.last = 1; 1478 r = r600_bytecode_add_alu(ctx->bc, &alu); 1479 if (r) 1480 return r; 1481 return 0; 1482} 1483 1484static int cayman_trig(struct r600_shader_ctx *ctx) 1485{ 1486 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1487 struct r600_bytecode_alu alu; 1488 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1489 int i, r; 1490 1491 r = tgsi_setup_trig(ctx); 1492 if (r) 1493 return r; 1494 1495 1496 for (i = 0; i < last_slot; i++) { 1497 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1498 alu.inst = ctx->inst_info->r600_opcode; 1499 alu.dst.chan = i; 1500 1501 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1502 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1503 1504 alu.src[0].sel = ctx->temp_reg; 1505 alu.src[0].chan = 0; 1506 if (i == last_slot - 1) 1507 alu.last = 1; 1508 r = r600_bytecode_add_alu(ctx->bc, &alu); 1509 if (r) 1510 return r; 1511 } 1512 return 0; 1513} 1514 1515static int tgsi_trig(struct r600_shader_ctx *ctx) 1516{ 1517 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1518 struct r600_bytecode_alu alu; 1519 int i, r; 1520 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1521 1522 r = tgsi_setup_trig(ctx); 1523 if (r) 1524 return r; 1525 1526 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1527 alu.inst = ctx->inst_info->r600_opcode; 1528 alu.dst.chan = 0; 1529 alu.dst.sel = ctx->temp_reg; 1530 alu.dst.write = 1; 1531 1532 alu.src[0].sel = ctx->temp_reg; 1533 alu.src[0].chan = 0; 1534 alu.last = 1; 1535 r = r600_bytecode_add_alu(ctx->bc, &alu); 1536 if (r) 1537 return r; 1538 1539 /* replicate result */ 1540 for (i = 0; i < lasti + 1; i++) { 1541 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1542 continue; 1543 1544 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1545 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1546 1547 alu.src[0].sel = ctx->temp_reg; 1548 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1549 if (i == lasti) 1550 alu.last = 1; 1551 r = r600_bytecode_add_alu(ctx->bc, &alu); 1552 if (r) 1553 return r; 1554 } 1555 return 0; 1556} 1557 1558static int tgsi_scs(struct r600_shader_ctx *ctx) 1559{ 1560 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1561 struct r600_bytecode_alu alu; 1562 int i, r; 1563 1564 /* We'll only need the trig stuff if we are going to write to the 1565 * X or Y components of the destination vector. 1566 */ 1567 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1568 r = tgsi_setup_trig(ctx); 1569 if (r) 1570 return r; 1571 } 1572 1573 /* dst.x = COS */ 1574 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1575 if (ctx->bc->chip_class == CAYMAN) { 1576 for (i = 0 ; i < 3; i++) { 1577 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1578 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1579 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1580 1581 if (i == 0) 1582 alu.dst.write = 1; 1583 else 1584 alu.dst.write = 0; 1585 alu.src[0].sel = ctx->temp_reg; 1586 alu.src[0].chan = 0; 1587 if (i == 2) 1588 alu.last = 1; 1589 r = r600_bytecode_add_alu(ctx->bc, &alu); 1590 if (r) 1591 return r; 1592 } 1593 } else { 1594 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1595 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1596 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1597 1598 alu.src[0].sel = ctx->temp_reg; 1599 alu.src[0].chan = 0; 1600 alu.last = 1; 1601 r = r600_bytecode_add_alu(ctx->bc, &alu); 1602 if (r) 1603 return r; 1604 } 1605 } 1606 1607 /* dst.y = SIN */ 1608 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1609 if (ctx->bc->chip_class == CAYMAN) { 1610 for (i = 0 ; i < 3; i++) { 1611 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1612 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1613 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1614 if (i == 1) 1615 alu.dst.write = 1; 1616 else 1617 alu.dst.write = 0; 1618 alu.src[0].sel = ctx->temp_reg; 1619 alu.src[0].chan = 0; 1620 if (i == 2) 1621 alu.last = 1; 1622 r = r600_bytecode_add_alu(ctx->bc, &alu); 1623 if (r) 1624 return r; 1625 } 1626 } else { 1627 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1628 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1629 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1630 1631 alu.src[0].sel = ctx->temp_reg; 1632 alu.src[0].chan = 0; 1633 alu.last = 1; 1634 r = r600_bytecode_add_alu(ctx->bc, &alu); 1635 if (r) 1636 return r; 1637 } 1638 } 1639 1640 /* dst.z = 0.0; */ 1641 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1642 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1643 1644 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1645 1646 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1647 1648 alu.src[0].sel = V_SQ_ALU_SRC_0; 1649 alu.src[0].chan = 0; 1650 1651 alu.last = 1; 1652 1653 r = r600_bytecode_add_alu(ctx->bc, &alu); 1654 if (r) 1655 return r; 1656 } 1657 1658 /* dst.w = 1.0; */ 1659 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1660 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1661 1662 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1663 1664 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1665 1666 alu.src[0].sel = V_SQ_ALU_SRC_1; 1667 alu.src[0].chan = 0; 1668 1669 alu.last = 1; 1670 1671 r = r600_bytecode_add_alu(ctx->bc, &alu); 1672 if (r) 1673 return r; 1674 } 1675 1676 return 0; 1677} 1678 1679static int tgsi_kill(struct r600_shader_ctx *ctx) 1680{ 1681 struct r600_bytecode_alu alu; 1682 int i, r; 1683 1684 for (i = 0; i < 4; i++) { 1685 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1686 alu.inst = ctx->inst_info->r600_opcode; 1687 1688 alu.dst.chan = i; 1689 1690 alu.src[0].sel = V_SQ_ALU_SRC_0; 1691 1692 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1693 alu.src[1].sel = V_SQ_ALU_SRC_1; 1694 alu.src[1].neg = 1; 1695 } else { 1696 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1697 } 1698 if (i == 3) { 1699 alu.last = 1; 1700 } 1701 r = r600_bytecode_add_alu(ctx->bc, &alu); 1702 if (r) 1703 return r; 1704 } 1705 1706 /* kill must be last in ALU */ 1707 ctx->bc->force_add_cf = 1; 1708 ctx->shader->uses_kill = TRUE; 1709 return 0; 1710} 1711 1712static int tgsi_lit(struct r600_shader_ctx *ctx) 1713{ 1714 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1715 struct r600_bytecode_alu alu; 1716 int r; 1717 1718 /* tmp.x = max(src.y, 0.0) */ 1719 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1720 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1721 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 1722 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1723 alu.src[1].chan = 1; 1724 1725 alu.dst.sel = ctx->temp_reg; 1726 alu.dst.chan = 0; 1727 alu.dst.write = 1; 1728 1729 alu.last = 1; 1730 r = r600_bytecode_add_alu(ctx->bc, &alu); 1731 if (r) 1732 return r; 1733 1734 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1735 { 1736 int chan; 1737 int sel; 1738 int i; 1739 1740 if (ctx->bc->chip_class == CAYMAN) { 1741 for (i = 0; i < 3; i++) { 1742 /* tmp.z = log(tmp.x) */ 1743 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1744 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1745 alu.src[0].sel = ctx->temp_reg; 1746 alu.src[0].chan = 0; 1747 alu.dst.sel = ctx->temp_reg; 1748 alu.dst.chan = i; 1749 if (i == 2) { 1750 alu.dst.write = 1; 1751 alu.last = 1; 1752 } else 1753 alu.dst.write = 0; 1754 1755 r = r600_bytecode_add_alu(ctx->bc, &alu); 1756 if (r) 1757 return r; 1758 } 1759 } else { 1760 /* tmp.z = log(tmp.x) */ 1761 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1762 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1763 alu.src[0].sel = ctx->temp_reg; 1764 alu.src[0].chan = 0; 1765 alu.dst.sel = ctx->temp_reg; 1766 alu.dst.chan = 2; 1767 alu.dst.write = 1; 1768 alu.last = 1; 1769 r = r600_bytecode_add_alu(ctx->bc, &alu); 1770 if (r) 1771 return r; 1772 } 1773 1774 chan = alu.dst.chan; 1775 sel = alu.dst.sel; 1776 1777 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 1778 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1779 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1780 alu.src[0].sel = sel; 1781 alu.src[0].chan = chan; 1782 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 1783 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 1784 alu.dst.sel = ctx->temp_reg; 1785 alu.dst.chan = 0; 1786 alu.dst.write = 1; 1787 alu.is_op3 = 1; 1788 alu.last = 1; 1789 r = r600_bytecode_add_alu(ctx->bc, &alu); 1790 if (r) 1791 return r; 1792 1793 if (ctx->bc->chip_class == CAYMAN) { 1794 for (i = 0; i < 3; i++) { 1795 /* dst.z = exp(tmp.x) */ 1796 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1797 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1798 alu.src[0].sel = ctx->temp_reg; 1799 alu.src[0].chan = 0; 1800 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1801 if (i == 2) { 1802 alu.dst.write = 1; 1803 alu.last = 1; 1804 } else 1805 alu.dst.write = 0; 1806 r = r600_bytecode_add_alu(ctx->bc, &alu); 1807 if (r) 1808 return r; 1809 } 1810 } else { 1811 /* dst.z = exp(tmp.x) */ 1812 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1813 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1814 alu.src[0].sel = ctx->temp_reg; 1815 alu.src[0].chan = 0; 1816 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1817 alu.last = 1; 1818 r = r600_bytecode_add_alu(ctx->bc, &alu); 1819 if (r) 1820 return r; 1821 } 1822 } 1823 1824 /* dst.x, <- 1.0 */ 1825 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1826 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1827 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1828 alu.src[0].chan = 0; 1829 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1830 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1831 r = r600_bytecode_add_alu(ctx->bc, &alu); 1832 if (r) 1833 return r; 1834 1835 /* dst.y = max(src.x, 0.0) */ 1836 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1837 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1838 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1839 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1840 alu.src[1].chan = 0; 1841 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1842 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1843 r = r600_bytecode_add_alu(ctx->bc, &alu); 1844 if (r) 1845 return r; 1846 1847 /* dst.w, <- 1.0 */ 1848 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1849 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1850 alu.src[0].sel = V_SQ_ALU_SRC_1; 1851 alu.src[0].chan = 0; 1852 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1853 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1854 alu.last = 1; 1855 r = r600_bytecode_add_alu(ctx->bc, &alu); 1856 if (r) 1857 return r; 1858 1859 return 0; 1860} 1861 1862static int tgsi_rsq(struct r600_shader_ctx *ctx) 1863{ 1864 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1865 struct r600_bytecode_alu alu; 1866 int i, r; 1867 1868 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1869 1870 /* FIXME: 1871 * For state trackers other than OpenGL, we'll want to use 1872 * _RECIPSQRT_IEEE instead. 1873 */ 1874 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1875 1876 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1877 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 1878 r600_bytecode_src_set_abs(&alu.src[i]); 1879 } 1880 alu.dst.sel = ctx->temp_reg; 1881 alu.dst.write = 1; 1882 alu.last = 1; 1883 r = r600_bytecode_add_alu(ctx->bc, &alu); 1884 if (r) 1885 return r; 1886 /* replicate result */ 1887 return tgsi_helper_tempx_replicate(ctx); 1888} 1889 1890static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1891{ 1892 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1893 struct r600_bytecode_alu alu; 1894 int i, r; 1895 1896 for (i = 0; i < 4; i++) { 1897 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1898 alu.src[0].sel = ctx->temp_reg; 1899 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1900 alu.dst.chan = i; 1901 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1902 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1903 if (i == 3) 1904 alu.last = 1; 1905 r = r600_bytecode_add_alu(ctx->bc, &alu); 1906 if (r) 1907 return r; 1908 } 1909 return 0; 1910} 1911 1912static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1913{ 1914 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1915 struct r600_bytecode_alu alu; 1916 int i, r; 1917 1918 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1919 alu.inst = ctx->inst_info->r600_opcode; 1920 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1921 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 1922 } 1923 alu.dst.sel = ctx->temp_reg; 1924 alu.dst.write = 1; 1925 alu.last = 1; 1926 r = r600_bytecode_add_alu(ctx->bc, &alu); 1927 if (r) 1928 return r; 1929 /* replicate result */ 1930 return tgsi_helper_tempx_replicate(ctx); 1931} 1932 1933static int cayman_pow(struct r600_shader_ctx *ctx) 1934{ 1935 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1936 int i, r; 1937 struct r600_bytecode_alu alu; 1938 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1939 1940 for (i = 0; i < 3; i++) { 1941 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1942 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1943 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1944 alu.dst.sel = ctx->temp_reg; 1945 alu.dst.chan = i; 1946 alu.dst.write = 1; 1947 if (i == 2) 1948 alu.last = 1; 1949 r = r600_bytecode_add_alu(ctx->bc, &alu); 1950 if (r) 1951 return r; 1952 } 1953 1954 /* b * LOG2(a) */ 1955 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1956 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1957 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 1958 alu.src[1].sel = ctx->temp_reg; 1959 alu.dst.sel = ctx->temp_reg; 1960 alu.dst.write = 1; 1961 alu.last = 1; 1962 r = r600_bytecode_add_alu(ctx->bc, &alu); 1963 if (r) 1964 return r; 1965 1966 for (i = 0; i < last_slot; i++) { 1967 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1968 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1969 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1970 alu.src[0].sel = ctx->temp_reg; 1971 1972 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1973 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1974 if (i == last_slot - 1) 1975 alu.last = 1; 1976 r = r600_bytecode_add_alu(ctx->bc, &alu); 1977 if (r) 1978 return r; 1979 } 1980 return 0; 1981} 1982 1983static int tgsi_pow(struct r600_shader_ctx *ctx) 1984{ 1985 struct r600_bytecode_alu alu; 1986 int r; 1987 1988 /* LOG2(a) */ 1989 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1990 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1991 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1992 alu.dst.sel = ctx->temp_reg; 1993 alu.dst.write = 1; 1994 alu.last = 1; 1995 r = r600_bytecode_add_alu(ctx->bc, &alu); 1996 if (r) 1997 return r; 1998 /* b * LOG2(a) */ 1999 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2000 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2001 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 2002 alu.src[1].sel = ctx->temp_reg; 2003 alu.dst.sel = ctx->temp_reg; 2004 alu.dst.write = 1; 2005 alu.last = 1; 2006 r = r600_bytecode_add_alu(ctx->bc, &alu); 2007 if (r) 2008 return r; 2009 /* POW(a,b) = EXP2(b * LOG2(a))*/ 2010 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2011 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2012 alu.src[0].sel = ctx->temp_reg; 2013 alu.dst.sel = ctx->temp_reg; 2014 alu.dst.write = 1; 2015 alu.last = 1; 2016 r = r600_bytecode_add_alu(ctx->bc, &alu); 2017 if (r) 2018 return r; 2019 return tgsi_helper_tempx_replicate(ctx); 2020} 2021 2022static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 2023{ 2024 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2025 struct r600_bytecode_alu alu; 2026 int i, r; 2027 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2028 int tmp0 = ctx->temp_reg; 2029 int tmp1 = r600_get_temp(ctx); 2030 int tmp2 = r600_get_temp(ctx); 2031 2032 /* Unsigned path: 2033 * 2034 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 2035 * 2036 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 2037 * 2. tmp0.z = lo (tmp0.x * src2) 2038 * 3. tmp0.w = -tmp0.z 2039 * 4. tmp0.y = hi (tmp0.x * src2) 2040 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 2041 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 2042 * 7. tmp1.x = tmp0.x - tmp0.w 2043 * 8. tmp1.y = tmp0.x + tmp0.w 2044 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 2045 * 10. tmp0.z = hi(tmp0.x * src1) = q 2046 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 2047 * 2048 * 12. tmp0.w = src1 - tmp0.y = r 2049 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 2050 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 2051 * 2052 * if DIV 2053 * 2054 * 15. tmp1.z = tmp0.z + 1 = q + 1 2055 * 16. tmp1.w = tmp0.z - 1 = q - 1 2056 * 2057 * else MOD 2058 * 2059 * 15. tmp1.z = tmp0.w - src2 = r - src2 2060 * 16. tmp1.w = tmp0.w + src2 = r + src2 2061 * 2062 * endif 2063 * 2064 * 17. tmp1.x = tmp1.x & tmp1.y 2065 * 2066 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 2067 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 2068 * 2069 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 2070 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 2071 * 2072 * Signed path: 2073 * 2074 * Same as unsigned, using abs values of the operands, 2075 * and fixing the sign of the result in the end. 2076 */ 2077 2078 for (i = 0; i < 4; i++) { 2079 if (!(write_mask & (1<<i))) 2080 continue; 2081 2082 if (signed_op) { 2083 2084 /* tmp2.x = -src0 */ 2085 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2086 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2087 2088 alu.dst.sel = tmp2; 2089 alu.dst.chan = 0; 2090 alu.dst.write = 1; 2091 2092 alu.src[0].sel = V_SQ_ALU_SRC_0; 2093 2094 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2095 2096 alu.last = 1; 2097 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2098 return r; 2099 2100 /* tmp2.y = -src1 */ 2101 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2102 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2103 2104 alu.dst.sel = tmp2; 2105 alu.dst.chan = 1; 2106 alu.dst.write = 1; 2107 2108 alu.src[0].sel = V_SQ_ALU_SRC_0; 2109 2110 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2111 2112 alu.last = 1; 2113 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2114 return r; 2115 2116 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 2117 /* it will be a sign of the quotient */ 2118 if (!mod) { 2119 2120 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2121 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT); 2122 2123 alu.dst.sel = tmp2; 2124 alu.dst.chan = 2; 2125 alu.dst.write = 1; 2126 2127 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2128 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2129 2130 alu.last = 1; 2131 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2132 return r; 2133 } 2134 2135 /* tmp2.x = |src0| */ 2136 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2137 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2138 alu.is_op3 = 1; 2139 2140 alu.dst.sel = tmp2; 2141 alu.dst.chan = 0; 2142 alu.dst.write = 1; 2143 2144 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2145 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2146 alu.src[2].sel = tmp2; 2147 alu.src[2].chan = 0; 2148 2149 alu.last = 1; 2150 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2151 return r; 2152 2153 /* tmp2.y = |src1| */ 2154 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2155 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2156 alu.is_op3 = 1; 2157 2158 alu.dst.sel = tmp2; 2159 alu.dst.chan = 1; 2160 alu.dst.write = 1; 2161 2162 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2163 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2164 alu.src[2].sel = tmp2; 2165 alu.src[2].chan = 1; 2166 2167 alu.last = 1; 2168 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2169 return r; 2170 2171 } 2172 2173 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 2174 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2175 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT); 2176 2177 alu.dst.sel = tmp0; 2178 alu.dst.chan = 0; 2179 alu.dst.write = 1; 2180 2181 if (signed_op) { 2182 alu.src[0].sel = tmp2; 2183 alu.src[0].chan = 1; 2184 } else { 2185 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2186 } 2187 2188 alu.last = 1; 2189 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2190 return r; 2191 2192 /* 2. tmp0.z = lo (tmp0.x * src2) */ 2193 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2194 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 2195 2196 alu.dst.sel = tmp0; 2197 alu.dst.chan = 2; 2198 alu.dst.write = 1; 2199 2200 alu.src[0].sel = tmp0; 2201 alu.src[0].chan = 0; 2202 if (signed_op) { 2203 alu.src[1].sel = tmp2; 2204 alu.src[1].chan = 1; 2205 } else { 2206 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2207 } 2208 2209 alu.last = 1; 2210 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2211 return r; 2212 2213 /* 3. tmp0.w = -tmp0.z */ 2214 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2215 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2216 2217 alu.dst.sel = tmp0; 2218 alu.dst.chan = 3; 2219 alu.dst.write = 1; 2220 2221 alu.src[0].sel = V_SQ_ALU_SRC_0; 2222 alu.src[1].sel = tmp0; 2223 alu.src[1].chan = 2; 2224 2225 alu.last = 1; 2226 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2227 return r; 2228 2229 /* 4. tmp0.y = hi (tmp0.x * src2) */ 2230 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2231 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2232 2233 alu.dst.sel = tmp0; 2234 alu.dst.chan = 1; 2235 alu.dst.write = 1; 2236 2237 alu.src[0].sel = tmp0; 2238 alu.src[0].chan = 0; 2239 2240 if (signed_op) { 2241 alu.src[1].sel = tmp2; 2242 alu.src[1].chan = 1; 2243 } else { 2244 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2245 } 2246 2247 alu.last = 1; 2248 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2249 return r; 2250 2251 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 2252 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2253 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 2254 alu.is_op3 = 1; 2255 2256 alu.dst.sel = tmp0; 2257 alu.dst.chan = 2; 2258 alu.dst.write = 1; 2259 2260 alu.src[0].sel = tmp0; 2261 alu.src[0].chan = 1; 2262 alu.src[1].sel = tmp0; 2263 alu.src[1].chan = 3; 2264 alu.src[2].sel = tmp0; 2265 alu.src[2].chan = 2; 2266 2267 alu.last = 1; 2268 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2269 return r; 2270 2271 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 2272 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2273 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2274 2275 alu.dst.sel = tmp0; 2276 alu.dst.chan = 3; 2277 alu.dst.write = 1; 2278 2279 alu.src[0].sel = tmp0; 2280 alu.src[0].chan = 2; 2281 2282 alu.src[1].sel = tmp0; 2283 alu.src[1].chan = 0; 2284 2285 alu.last = 1; 2286 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2287 return r; 2288 2289 /* 7. tmp1.x = tmp0.x - tmp0.w */ 2290 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2291 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2292 2293 alu.dst.sel = tmp1; 2294 alu.dst.chan = 0; 2295 alu.dst.write = 1; 2296 2297 alu.src[0].sel = tmp0; 2298 alu.src[0].chan = 0; 2299 alu.src[1].sel = tmp0; 2300 alu.src[1].chan = 3; 2301 2302 alu.last = 1; 2303 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2304 return r; 2305 2306 /* 8. tmp1.y = tmp0.x + tmp0.w */ 2307 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2308 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 2309 2310 alu.dst.sel = tmp1; 2311 alu.dst.chan = 1; 2312 alu.dst.write = 1; 2313 2314 alu.src[0].sel = tmp0; 2315 alu.src[0].chan = 0; 2316 alu.src[1].sel = tmp0; 2317 alu.src[1].chan = 3; 2318 2319 alu.last = 1; 2320 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2321 return r; 2322 2323 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 2324 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2325 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 2326 alu.is_op3 = 1; 2327 2328 alu.dst.sel = tmp0; 2329 alu.dst.chan = 0; 2330 alu.dst.write = 1; 2331 2332 alu.src[0].sel = tmp0; 2333 alu.src[0].chan = 1; 2334 alu.src[1].sel = tmp1; 2335 alu.src[1].chan = 1; 2336 alu.src[2].sel = tmp1; 2337 alu.src[2].chan = 0; 2338 2339 alu.last = 1; 2340 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2341 return r; 2342 2343 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 2344 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2345 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT); 2346 2347 alu.dst.sel = tmp0; 2348 alu.dst.chan = 2; 2349 alu.dst.write = 1; 2350 2351 alu.src[0].sel = tmp0; 2352 alu.src[0].chan = 0; 2353 2354 if (signed_op) { 2355 alu.src[1].sel = tmp2; 2356 alu.src[1].chan = 0; 2357 } else { 2358 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2359 } 2360 2361 alu.last = 1; 2362 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2363 return r; 2364 2365 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 2366 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2367 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 2368 2369 alu.dst.sel = tmp0; 2370 alu.dst.chan = 1; 2371 alu.dst.write = 1; 2372 2373 if (signed_op) { 2374 alu.src[0].sel = tmp2; 2375 alu.src[0].chan = 1; 2376 } else { 2377 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2378 } 2379 2380 alu.src[1].sel = tmp0; 2381 alu.src[1].chan = 2; 2382 2383 alu.last = 1; 2384 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2385 return r; 2386 2387 /* 12. tmp0.w = src1 - tmp0.y = r */ 2388 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2389 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2390 2391 alu.dst.sel = tmp0; 2392 alu.dst.chan = 3; 2393 alu.dst.write = 1; 2394 2395 if (signed_op) { 2396 alu.src[0].sel = tmp2; 2397 alu.src[0].chan = 0; 2398 } else { 2399 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2400 } 2401 2402 alu.src[1].sel = tmp0; 2403 alu.src[1].chan = 1; 2404 2405 alu.last = 1; 2406 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2407 return r; 2408 2409 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 2410 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2411 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT); 2412 2413 alu.dst.sel = tmp1; 2414 alu.dst.chan = 0; 2415 alu.dst.write = 1; 2416 2417 alu.src[0].sel = tmp0; 2418 alu.src[0].chan = 3; 2419 if (signed_op) { 2420 alu.src[1].sel = tmp2; 2421 alu.src[1].chan = 1; 2422 } else { 2423 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2424 } 2425 2426 alu.last = 1; 2427 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2428 return r; 2429 2430 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 2431 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2432 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT); 2433 2434 alu.dst.sel = tmp1; 2435 alu.dst.chan = 1; 2436 alu.dst.write = 1; 2437 2438 if (signed_op) { 2439 alu.src[0].sel = tmp2; 2440 alu.src[0].chan = 0; 2441 } else { 2442 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2443 } 2444 2445 alu.src[1].sel = tmp0; 2446 alu.src[1].chan = 1; 2447 2448 alu.last = 1; 2449 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2450 return r; 2451 2452 if (mod) { /* UMOD */ 2453 2454 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 2455 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2456 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2457 2458 alu.dst.sel = tmp1; 2459 alu.dst.chan = 2; 2460 alu.dst.write = 1; 2461 2462 alu.src[0].sel = tmp0; 2463 alu.src[0].chan = 3; 2464 2465 if (signed_op) { 2466 alu.src[1].sel = tmp2; 2467 alu.src[1].chan = 1; 2468 } else { 2469 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2470 } 2471 2472 alu.last = 1; 2473 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2474 return r; 2475 2476 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 2477 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2478 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 2479 2480 alu.dst.sel = tmp1; 2481 alu.dst.chan = 3; 2482 alu.dst.write = 1; 2483 2484 alu.src[0].sel = tmp0; 2485 alu.src[0].chan = 3; 2486 if (signed_op) { 2487 alu.src[1].sel = tmp2; 2488 alu.src[1].chan = 1; 2489 } else { 2490 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2491 } 2492 2493 alu.last = 1; 2494 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2495 return r; 2496 2497 } else { /* UDIV */ 2498 2499 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 2500 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2501 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 2502 2503 alu.dst.sel = tmp1; 2504 alu.dst.chan = 2; 2505 alu.dst.write = 1; 2506 2507 alu.src[0].sel = tmp0; 2508 alu.src[0].chan = 2; 2509 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 2510 2511 alu.last = 1; 2512 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2513 return r; 2514 2515 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 2516 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2517 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 2518 2519 alu.dst.sel = tmp1; 2520 alu.dst.chan = 3; 2521 alu.dst.write = 1; 2522 2523 alu.src[0].sel = tmp0; 2524 alu.src[0].chan = 2; 2525 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 2526 2527 alu.last = 1; 2528 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2529 return r; 2530 2531 } 2532 2533 /* 17. tmp1.x = tmp1.x & tmp1.y */ 2534 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2535 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT); 2536 2537 alu.dst.sel = tmp1; 2538 alu.dst.chan = 0; 2539 alu.dst.write = 1; 2540 2541 alu.src[0].sel = tmp1; 2542 alu.src[0].chan = 0; 2543 alu.src[1].sel = tmp1; 2544 alu.src[1].chan = 1; 2545 2546 alu.last = 1; 2547 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2548 return r; 2549 2550 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 2551 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 2552 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2553 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 2554 alu.is_op3 = 1; 2555 2556 alu.dst.sel = tmp0; 2557 alu.dst.chan = 2; 2558 alu.dst.write = 1; 2559 2560 alu.src[0].sel = tmp1; 2561 alu.src[0].chan = 0; 2562 alu.src[1].sel = tmp0; 2563 alu.src[1].chan = mod ? 3 : 2; 2564 alu.src[2].sel = tmp1; 2565 alu.src[2].chan = 2; 2566 2567 alu.last = 1; 2568 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2569 return r; 2570 2571 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 2572 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2573 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDE_INT); 2574 alu.is_op3 = 1; 2575 2576 if (signed_op) { 2577 alu.dst.sel = tmp0; 2578 alu.dst.chan = 2; 2579 alu.dst.write = 1; 2580 } else { 2581 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2582 } 2583 2584 alu.src[0].sel = tmp1; 2585 alu.src[0].chan = 1; 2586 alu.src[1].sel = tmp1; 2587 alu.src[1].chan = 3; 2588 alu.src[2].sel = tmp0; 2589 alu.src[2].chan = 2; 2590 2591 alu.last = 1; 2592 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2593 return r; 2594 2595 if (signed_op) { 2596 2597 /* fix the sign of the result */ 2598 2599 if (mod) { 2600 2601 /* tmp0.x = -tmp0.z */ 2602 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2603 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2604 2605 alu.dst.sel = tmp0; 2606 alu.dst.chan = 0; 2607 alu.dst.write = 1; 2608 2609 alu.src[0].sel = V_SQ_ALU_SRC_0; 2610 alu.src[1].sel = tmp0; 2611 alu.src[1].chan = 2; 2612 2613 alu.last = 1; 2614 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2615 return r; 2616 2617 /* sign of the remainder is the same as the sign of src0 */ 2618 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 2619 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2620 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2621 alu.is_op3 = 1; 2622 2623 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2624 2625 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2626 alu.src[1].sel = tmp0; 2627 alu.src[1].chan = 2; 2628 alu.src[2].sel = tmp0; 2629 alu.src[2].chan = 0; 2630 2631 alu.last = 1; 2632 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2633 return r; 2634 2635 } else { 2636 2637 /* tmp0.x = -tmp0.z */ 2638 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2639 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2640 2641 alu.dst.sel = tmp0; 2642 alu.dst.chan = 0; 2643 alu.dst.write = 1; 2644 2645 alu.src[0].sel = V_SQ_ALU_SRC_0; 2646 alu.src[1].sel = tmp0; 2647 alu.src[1].chan = 2; 2648 2649 alu.last = 1; 2650 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2651 return r; 2652 2653 /* fix the quotient sign (same as the sign of src0*src1) */ 2654 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 2655 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2656 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2657 alu.is_op3 = 1; 2658 2659 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2660 2661 alu.src[0].sel = tmp2; 2662 alu.src[0].chan = 2; 2663 alu.src[1].sel = tmp0; 2664 alu.src[1].chan = 2; 2665 alu.src[2].sel = tmp0; 2666 alu.src[2].chan = 0; 2667 2668 alu.last = 1; 2669 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2670 return r; 2671 } 2672 } 2673 } 2674 return 0; 2675} 2676 2677static int tgsi_udiv(struct r600_shader_ctx *ctx) 2678{ 2679 return tgsi_divmod(ctx, 0, 0); 2680} 2681 2682static int tgsi_umod(struct r600_shader_ctx *ctx) 2683{ 2684 return tgsi_divmod(ctx, 1, 0); 2685} 2686 2687static int tgsi_idiv(struct r600_shader_ctx *ctx) 2688{ 2689 return tgsi_divmod(ctx, 0, 1); 2690} 2691 2692static int tgsi_imod(struct r600_shader_ctx *ctx) 2693{ 2694 return tgsi_divmod(ctx, 1, 1); 2695} 2696 2697 2698static int tgsi_f2i(struct r600_shader_ctx *ctx) 2699{ 2700 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2701 struct r600_bytecode_alu alu; 2702 int i, r; 2703 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2704 int last_inst = tgsi_last_instruction(write_mask); 2705 2706 for (i = 0; i < 4; i++) { 2707 if (!(write_mask & (1<<i))) 2708 continue; 2709 2710 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2711 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC); 2712 2713 alu.dst.sel = ctx->temp_reg; 2714 alu.dst.chan = i; 2715 alu.dst.write = 1; 2716 2717 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2718 if (i == last_inst) 2719 alu.last = 1; 2720 r = r600_bytecode_add_alu(ctx->bc, &alu); 2721 if (r) 2722 return r; 2723 } 2724 2725 for (i = 0; i < 4; i++) { 2726 if (!(write_mask & (1<<i))) 2727 continue; 2728 2729 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2730 alu.inst = ctx->inst_info->r600_opcode; 2731 2732 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2733 2734 alu.src[0].sel = ctx->temp_reg; 2735 alu.src[0].chan = i; 2736 2737 if (i == last_inst) 2738 alu.last = 1; 2739 r = r600_bytecode_add_alu(ctx->bc, &alu); 2740 if (r) 2741 return r; 2742 } 2743 2744 return 0; 2745} 2746 2747static int tgsi_iabs(struct r600_shader_ctx *ctx) 2748{ 2749 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2750 struct r600_bytecode_alu alu; 2751 int i, r; 2752 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2753 int last_inst = tgsi_last_instruction(write_mask); 2754 2755 /* tmp = -src */ 2756 for (i = 0; i < 4; i++) { 2757 if (!(write_mask & (1<<i))) 2758 continue; 2759 2760 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2761 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT); 2762 2763 alu.dst.sel = ctx->temp_reg; 2764 alu.dst.chan = i; 2765 alu.dst.write = 1; 2766 2767 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2768 alu.src[0].sel = V_SQ_ALU_SRC_0; 2769 2770 if (i == last_inst) 2771 alu.last = 1; 2772 r = r600_bytecode_add_alu(ctx->bc, &alu); 2773 if (r) 2774 return r; 2775 } 2776 2777 /* dst = (src >= 0 ? src : tmp) */ 2778 for (i = 0; i < 4; i++) { 2779 if (!(write_mask & (1<<i))) 2780 continue; 2781 2782 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2783 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2784 alu.is_op3 = 1; 2785 alu.dst.write = 1; 2786 2787 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2788 2789 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2790 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2791 alu.src[2].sel = ctx->temp_reg; 2792 alu.src[2].chan = i; 2793 2794 if (i == last_inst) 2795 alu.last = 1; 2796 r = r600_bytecode_add_alu(ctx->bc, &alu); 2797 if (r) 2798 return r; 2799 } 2800 return 0; 2801} 2802 2803static int tgsi_issg(struct r600_shader_ctx *ctx) 2804{ 2805 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2806 struct r600_bytecode_alu alu; 2807 int i, r; 2808 unsigned write_mask = inst->Dst[0].Register.WriteMask; 2809 int last_inst = tgsi_last_instruction(write_mask); 2810 2811 /* tmp = (src >= 0 ? src : -1) */ 2812 for (i = 0; i < 4; i++) { 2813 if (!(write_mask & (1<<i))) 2814 continue; 2815 2816 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2817 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT); 2818 alu.is_op3 = 1; 2819 2820 alu.dst.sel = ctx->temp_reg; 2821 alu.dst.chan = i; 2822 alu.dst.write = 1; 2823 2824 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2825 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2826 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 2827 2828 if (i == last_inst) 2829 alu.last = 1; 2830 r = r600_bytecode_add_alu(ctx->bc, &alu); 2831 if (r) 2832 return r; 2833 } 2834 2835 /* dst = (tmp > 0 ? 1 : tmp) */ 2836 for (i = 0; i < 4; i++) { 2837 if (!(write_mask & (1<<i))) 2838 continue; 2839 2840 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2841 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT); 2842 alu.is_op3 = 1; 2843 alu.dst.write = 1; 2844 2845 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2846 2847 alu.src[0].sel = ctx->temp_reg; 2848 alu.src[0].chan = i; 2849 2850 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 2851 2852 alu.src[2].sel = ctx->temp_reg; 2853 alu.src[2].chan = i; 2854 2855 if (i == last_inst) 2856 alu.last = 1; 2857 r = r600_bytecode_add_alu(ctx->bc, &alu); 2858 if (r) 2859 return r; 2860 } 2861 return 0; 2862} 2863 2864 2865 2866static int tgsi_ssg(struct r600_shader_ctx *ctx) 2867{ 2868 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2869 struct r600_bytecode_alu alu; 2870 int i, r; 2871 2872 /* tmp = (src > 0 ? 1 : src) */ 2873 for (i = 0; i < 4; i++) { 2874 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2875 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 2876 alu.is_op3 = 1; 2877 2878 alu.dst.sel = ctx->temp_reg; 2879 alu.dst.chan = i; 2880 2881 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2882 alu.src[1].sel = V_SQ_ALU_SRC_1; 2883 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 2884 2885 if (i == 3) 2886 alu.last = 1; 2887 r = r600_bytecode_add_alu(ctx->bc, &alu); 2888 if (r) 2889 return r; 2890 } 2891 2892 /* dst = (-tmp > 0 ? -1 : tmp) */ 2893 for (i = 0; i < 4; i++) { 2894 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2895 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 2896 alu.is_op3 = 1; 2897 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2898 2899 alu.src[0].sel = ctx->temp_reg; 2900 alu.src[0].chan = i; 2901 alu.src[0].neg = 1; 2902 2903 alu.src[1].sel = V_SQ_ALU_SRC_1; 2904 alu.src[1].neg = 1; 2905 2906 alu.src[2].sel = ctx->temp_reg; 2907 alu.src[2].chan = i; 2908 2909 if (i == 3) 2910 alu.last = 1; 2911 r = r600_bytecode_add_alu(ctx->bc, &alu); 2912 if (r) 2913 return r; 2914 } 2915 return 0; 2916} 2917 2918static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 2919{ 2920 struct r600_bytecode_alu alu; 2921 int i, r; 2922 2923 for (i = 0; i < 4; i++) { 2924 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2925 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 2926 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 2927 alu.dst.chan = i; 2928 } else { 2929 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2930 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2931 alu.src[0].sel = ctx->temp_reg; 2932 alu.src[0].chan = i; 2933 } 2934 if (i == 3) { 2935 alu.last = 1; 2936 } 2937 r = r600_bytecode_add_alu(ctx->bc, &alu); 2938 if (r) 2939 return r; 2940 } 2941 return 0; 2942} 2943 2944static int tgsi_op3(struct r600_shader_ctx *ctx) 2945{ 2946 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2947 struct r600_bytecode_alu alu; 2948 int i, j, r; 2949 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2950 2951 for (i = 0; i < lasti + 1; i++) { 2952 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2953 continue; 2954 2955 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2956 alu.inst = ctx->inst_info->r600_opcode; 2957 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2958 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 2959 } 2960 2961 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2962 alu.dst.chan = i; 2963 alu.dst.write = 1; 2964 alu.is_op3 = 1; 2965 if (i == lasti) { 2966 alu.last = 1; 2967 } 2968 r = r600_bytecode_add_alu(ctx->bc, &alu); 2969 if (r) 2970 return r; 2971 } 2972 return 0; 2973} 2974 2975static int tgsi_dp(struct r600_shader_ctx *ctx) 2976{ 2977 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2978 struct r600_bytecode_alu alu; 2979 int i, j, r; 2980 2981 for (i = 0; i < 4; i++) { 2982 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2983 alu.inst = ctx->inst_info->r600_opcode; 2984 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 2985 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 2986 } 2987 2988 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2989 alu.dst.chan = i; 2990 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 2991 /* handle some special cases */ 2992 switch (ctx->inst_info->tgsi_opcode) { 2993 case TGSI_OPCODE_DP2: 2994 if (i > 1) { 2995 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 2996 alu.src[0].chan = alu.src[1].chan = 0; 2997 } 2998 break; 2999 case TGSI_OPCODE_DP3: 3000 if (i > 2) { 3001 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 3002 alu.src[0].chan = alu.src[1].chan = 0; 3003 } 3004 break; 3005 case TGSI_OPCODE_DPH: 3006 if (i == 3) { 3007 alu.src[0].sel = V_SQ_ALU_SRC_1; 3008 alu.src[0].chan = 0; 3009 alu.src[0].neg = 0; 3010 } 3011 break; 3012 default: 3013 break; 3014 } 3015 if (i == 3) { 3016 alu.last = 1; 3017 } 3018 r = r600_bytecode_add_alu(ctx->bc, &alu); 3019 if (r) 3020 return r; 3021 } 3022 return 0; 3023} 3024 3025static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 3026 unsigned index) 3027{ 3028 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3029 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 3030 inst->Src[index].Register.File != TGSI_FILE_INPUT) || 3031 ctx->src[index].neg || ctx->src[index].abs; 3032} 3033 3034static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 3035 unsigned index) 3036{ 3037 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3038 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 3039} 3040 3041static int tgsi_tex(struct r600_shader_ctx *ctx) 3042{ 3043 static float one_point_five = 1.5f; 3044 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3045 struct r600_bytecode_tex tex; 3046 struct r600_bytecode_alu alu; 3047 unsigned src_gpr; 3048 int r, i, j; 3049 int opcode; 3050 /* Texture fetch instructions can only use gprs as source. 3051 * Also they cannot negate the source or take the absolute value */ 3052 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0); 3053 boolean src_loaded = FALSE; 3054 unsigned sampler_src_reg = 1; 3055 u8 offset_x = 0, offset_y = 0, offset_z = 0; 3056 3057 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 3058 3059 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 3060 /* get offset values */ 3061 if (inst->Texture.NumOffsets) { 3062 assert(inst->Texture.NumOffsets == 1); 3063 3064 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 3065 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 3066 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 3067 } 3068 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 3069 /* TGSI moves the sampler to src reg 3 for TXD */ 3070 sampler_src_reg = 3; 3071 3072 for (i = 1; i < 3; i++) { 3073 /* set gradients h/v */ 3074 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 3075 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : 3076 SQ_TEX_INST_SET_GRADIENTS_V; 3077 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 3078 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 3079 3080 if (tgsi_tex_src_requires_loading(ctx, i)) { 3081 tex.src_gpr = r600_get_temp(ctx); 3082 tex.src_sel_x = 0; 3083 tex.src_sel_y = 1; 3084 tex.src_sel_z = 2; 3085 tex.src_sel_w = 3; 3086 3087 for (j = 0; j < 4; j++) { 3088 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3089 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3090 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 3091 alu.dst.sel = tex.src_gpr; 3092 alu.dst.chan = j; 3093 if (j == 3) 3094 alu.last = 1; 3095 alu.dst.write = 1; 3096 r = r600_bytecode_add_alu(ctx->bc, &alu); 3097 if (r) 3098 return r; 3099 } 3100 3101 } else { 3102 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); 3103 tex.src_sel_x = ctx->src[i].swizzle[0]; 3104 tex.src_sel_y = ctx->src[i].swizzle[1]; 3105 tex.src_sel_z = ctx->src[i].swizzle[2]; 3106 tex.src_sel_w = ctx->src[i].swizzle[3]; 3107 tex.src_rel = ctx->src[i].rel; 3108 } 3109 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ 3110 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 3111 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 3112 tex.coord_type_x = 1; 3113 tex.coord_type_y = 1; 3114 tex.coord_type_z = 1; 3115 tex.coord_type_w = 1; 3116 } 3117 r = r600_bytecode_add_tex(ctx->bc, &tex); 3118 if (r) 3119 return r; 3120 } 3121 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 3122 int out_chan; 3123 /* Add perspective divide */ 3124 if (ctx->bc->chip_class == CAYMAN) { 3125 out_chan = 2; 3126 for (i = 0; i < 3; i++) { 3127 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3128 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3129 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 3130 3131 alu.dst.sel = ctx->temp_reg; 3132 alu.dst.chan = i; 3133 if (i == 2) 3134 alu.last = 1; 3135 if (out_chan == i) 3136 alu.dst.write = 1; 3137 r = r600_bytecode_add_alu(ctx->bc, &alu); 3138 if (r) 3139 return r; 3140 } 3141 3142 } else { 3143 out_chan = 3; 3144 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3145 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3146 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 3147 3148 alu.dst.sel = ctx->temp_reg; 3149 alu.dst.chan = out_chan; 3150 alu.last = 1; 3151 alu.dst.write = 1; 3152 r = r600_bytecode_add_alu(ctx->bc, &alu); 3153 if (r) 3154 return r; 3155 } 3156 3157 for (i = 0; i < 3; i++) { 3158 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3159 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3160 alu.src[0].sel = ctx->temp_reg; 3161 alu.src[0].chan = out_chan; 3162 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3163 alu.dst.sel = ctx->temp_reg; 3164 alu.dst.chan = i; 3165 alu.dst.write = 1; 3166 r = r600_bytecode_add_alu(ctx->bc, &alu); 3167 if (r) 3168 return r; 3169 } 3170 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3171 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3172 alu.src[0].sel = V_SQ_ALU_SRC_1; 3173 alu.src[0].chan = 0; 3174 alu.dst.sel = ctx->temp_reg; 3175 alu.dst.chan = 3; 3176 alu.last = 1; 3177 alu.dst.write = 1; 3178 r = r600_bytecode_add_alu(ctx->bc, &alu); 3179 if (r) 3180 return r; 3181 src_loaded = TRUE; 3182 src_gpr = ctx->temp_reg; 3183 } 3184 3185 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 3186 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 3187 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 3188 3189 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 3190 for (i = 0; i < 4; i++) { 3191 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3192 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 3193 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 3194 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 3195 alu.dst.sel = ctx->temp_reg; 3196 alu.dst.chan = i; 3197 if (i == 3) 3198 alu.last = 1; 3199 alu.dst.write = 1; 3200 r = r600_bytecode_add_alu(ctx->bc, &alu); 3201 if (r) 3202 return r; 3203 } 3204 3205 /* tmp1.z = RCP_e(|tmp1.z|) */ 3206 if (ctx->bc->chip_class == CAYMAN) { 3207 for (i = 0; i < 3; i++) { 3208 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3209 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3210 alu.src[0].sel = ctx->temp_reg; 3211 alu.src[0].chan = 2; 3212 alu.src[0].abs = 1; 3213 alu.dst.sel = ctx->temp_reg; 3214 alu.dst.chan = i; 3215 if (i == 2) 3216 alu.dst.write = 1; 3217 if (i == 2) 3218 alu.last = 1; 3219 r = r600_bytecode_add_alu(ctx->bc, &alu); 3220 if (r) 3221 return r; 3222 } 3223 } else { 3224 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3225 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3226 alu.src[0].sel = ctx->temp_reg; 3227 alu.src[0].chan = 2; 3228 alu.src[0].abs = 1; 3229 alu.dst.sel = ctx->temp_reg; 3230 alu.dst.chan = 2; 3231 alu.dst.write = 1; 3232 alu.last = 1; 3233 r = r600_bytecode_add_alu(ctx->bc, &alu); 3234 if (r) 3235 return r; 3236 } 3237 3238 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 3239 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 3240 * muladd has no writemask, have to use another temp 3241 */ 3242 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3243 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 3244 alu.is_op3 = 1; 3245 3246 alu.src[0].sel = ctx->temp_reg; 3247 alu.src[0].chan = 0; 3248 alu.src[1].sel = ctx->temp_reg; 3249 alu.src[1].chan = 2; 3250 3251 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 3252 alu.src[2].chan = 0; 3253 alu.src[2].value = *(uint32_t *)&one_point_five; 3254 3255 alu.dst.sel = ctx->temp_reg; 3256 alu.dst.chan = 0; 3257 alu.dst.write = 1; 3258 3259 r = r600_bytecode_add_alu(ctx->bc, &alu); 3260 if (r) 3261 return r; 3262 3263 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3264 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 3265 alu.is_op3 = 1; 3266 3267 alu.src[0].sel = ctx->temp_reg; 3268 alu.src[0].chan = 1; 3269 alu.src[1].sel = ctx->temp_reg; 3270 alu.src[1].chan = 2; 3271 3272 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 3273 alu.src[2].chan = 0; 3274 alu.src[2].value = *(uint32_t *)&one_point_five; 3275 3276 alu.dst.sel = ctx->temp_reg; 3277 alu.dst.chan = 1; 3278 alu.dst.write = 1; 3279 3280 alu.last = 1; 3281 r = r600_bytecode_add_alu(ctx->bc, &alu); 3282 if (r) 3283 return r; 3284 3285 src_loaded = TRUE; 3286 src_gpr = ctx->temp_reg; 3287 } 3288 3289 if (src_requires_loading && !src_loaded) { 3290 for (i = 0; i < 4; i++) { 3291 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3292 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3293 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3294 alu.dst.sel = ctx->temp_reg; 3295 alu.dst.chan = i; 3296 if (i == 3) 3297 alu.last = 1; 3298 alu.dst.write = 1; 3299 r = r600_bytecode_add_alu(ctx->bc, &alu); 3300 if (r) 3301 return r; 3302 } 3303 src_loaded = TRUE; 3304 src_gpr = ctx->temp_reg; 3305 } 3306 3307 opcode = ctx->inst_info->r600_opcode; 3308 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 3309 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 3310 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 3311 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 3312 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) { 3313 switch (opcode) { 3314 case SQ_TEX_INST_SAMPLE: 3315 opcode = SQ_TEX_INST_SAMPLE_C; 3316 break; 3317 case SQ_TEX_INST_SAMPLE_L: 3318 opcode = SQ_TEX_INST_SAMPLE_C_L; 3319 break; 3320 case SQ_TEX_INST_SAMPLE_LB: 3321 opcode = SQ_TEX_INST_SAMPLE_C_LB; 3322 break; 3323 case SQ_TEX_INST_SAMPLE_G: 3324 opcode = SQ_TEX_INST_SAMPLE_C_G; 3325 break; 3326 } 3327 } 3328 3329 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 3330 tex.inst = opcode; 3331 3332 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 3333 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 3334 tex.src_gpr = src_gpr; 3335 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 3336 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 3337 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 3338 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 3339 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 3340 if (src_loaded) { 3341 tex.src_sel_x = 0; 3342 tex.src_sel_y = 1; 3343 tex.src_sel_z = 2; 3344 tex.src_sel_w = 3; 3345 } else { 3346 tex.src_sel_x = ctx->src[0].swizzle[0]; 3347 tex.src_sel_y = ctx->src[0].swizzle[1]; 3348 tex.src_sel_z = ctx->src[0].swizzle[2]; 3349 tex.src_sel_w = ctx->src[0].swizzle[3]; 3350 tex.src_rel = ctx->src[0].rel; 3351 } 3352 3353 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 3354 tex.src_sel_x = 1; 3355 tex.src_sel_y = 0; 3356 tex.src_sel_z = 3; 3357 tex.src_sel_w = 1; 3358 } 3359 3360 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 3361 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 3362 tex.coord_type_x = 1; 3363 tex.coord_type_y = 1; 3364 } 3365 tex.coord_type_z = 1; 3366 tex.coord_type_w = 1; 3367 3368 tex.offset_x = offset_x; 3369 tex.offset_y = offset_y; 3370 tex.offset_z = offset_z; 3371 3372 /* Put the depth for comparison in W. 3373 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 3374 * Some instructions expect the depth in Z. */ 3375 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 3376 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 3377 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 3378 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 3379 opcode != SQ_TEX_INST_SAMPLE_C_L && 3380 opcode != SQ_TEX_INST_SAMPLE_C_LB) { 3381 tex.src_sel_w = tex.src_sel_z; 3382 } 3383 3384 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 3385 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 3386 if (opcode == SQ_TEX_INST_SAMPLE_C_L || 3387 opcode == SQ_TEX_INST_SAMPLE_C_LB) { 3388 /* the array index is read from Y */ 3389 tex.coord_type_y = 0; 3390 } else { 3391 /* the array index is read from Z */ 3392 tex.coord_type_z = 0; 3393 tex.src_sel_z = tex.src_sel_y; 3394 } 3395 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 3396 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) 3397 /* the array index is read from Z */ 3398 tex.coord_type_z = 0; 3399 3400 r = r600_bytecode_add_tex(ctx->bc, &tex); 3401 if (r) 3402 return r; 3403 3404 /* add shadow ambient support - gallium doesn't do it yet */ 3405 return 0; 3406} 3407 3408static int tgsi_lrp(struct r600_shader_ctx *ctx) 3409{ 3410 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3411 struct r600_bytecode_alu alu; 3412 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3413 unsigned i; 3414 int r; 3415 3416 /* optimize if it's just an equal balance */ 3417 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 3418 for (i = 0; i < lasti + 1; i++) { 3419 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3420 continue; 3421 3422 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3423 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 3424 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3425 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 3426 alu.omod = 3; 3427 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3428 alu.dst.chan = i; 3429 if (i == lasti) { 3430 alu.last = 1; 3431 } 3432 r = r600_bytecode_add_alu(ctx->bc, &alu); 3433 if (r) 3434 return r; 3435 } 3436 return 0; 3437 } 3438 3439 /* 1 - src0 */ 3440 for (i = 0; i < lasti + 1; i++) { 3441 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3442 continue; 3443 3444 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3445 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 3446 alu.src[0].sel = V_SQ_ALU_SRC_1; 3447 alu.src[0].chan = 0; 3448 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3449 r600_bytecode_src_toggle_neg(&alu.src[1]); 3450 alu.dst.sel = ctx->temp_reg; 3451 alu.dst.chan = i; 3452 if (i == lasti) { 3453 alu.last = 1; 3454 } 3455 alu.dst.write = 1; 3456 r = r600_bytecode_add_alu(ctx->bc, &alu); 3457 if (r) 3458 return r; 3459 } 3460 3461 /* (1 - src0) * src2 */ 3462 for (i = 0; i < lasti + 1; i++) { 3463 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3464 continue; 3465 3466 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3467 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3468 alu.src[0].sel = ctx->temp_reg; 3469 alu.src[0].chan = i; 3470 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 3471 alu.dst.sel = ctx->temp_reg; 3472 alu.dst.chan = i; 3473 if (i == lasti) { 3474 alu.last = 1; 3475 } 3476 alu.dst.write = 1; 3477 r = r600_bytecode_add_alu(ctx->bc, &alu); 3478 if (r) 3479 return r; 3480 } 3481 3482 /* src0 * src1 + (1 - src0) * src2 */ 3483 for (i = 0; i < lasti + 1; i++) { 3484 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3485 continue; 3486 3487 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3488 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 3489 alu.is_op3 = 1; 3490 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3491 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3492 alu.src[2].sel = ctx->temp_reg; 3493 alu.src[2].chan = i; 3494 3495 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3496 alu.dst.chan = i; 3497 if (i == lasti) { 3498 alu.last = 1; 3499 } 3500 r = r600_bytecode_add_alu(ctx->bc, &alu); 3501 if (r) 3502 return r; 3503 } 3504 return 0; 3505} 3506 3507static int tgsi_cmp(struct r600_shader_ctx *ctx) 3508{ 3509 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3510 struct r600_bytecode_alu alu; 3511 int i, r; 3512 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3513 3514 for (i = 0; i < lasti + 1; i++) { 3515 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3516 continue; 3517 3518 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3519 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 3520 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3521 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 3522 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 3523 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3524 alu.dst.chan = i; 3525 alu.dst.write = 1; 3526 alu.is_op3 = 1; 3527 if (i == lasti) 3528 alu.last = 1; 3529 r = r600_bytecode_add_alu(ctx->bc, &alu); 3530 if (r) 3531 return r; 3532 } 3533 return 0; 3534} 3535 3536static int tgsi_xpd(struct r600_shader_ctx *ctx) 3537{ 3538 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3539 static const unsigned int src0_swizzle[] = {2, 0, 1}; 3540 static const unsigned int src1_swizzle[] = {1, 2, 0}; 3541 struct r600_bytecode_alu alu; 3542 uint32_t use_temp = 0; 3543 int i, r; 3544 3545 if (inst->Dst[0].Register.WriteMask != 0xf) 3546 use_temp = 1; 3547 3548 for (i = 0; i < 4; i++) { 3549 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3550 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3551 if (i < 3) { 3552 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 3553 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 3554 } else { 3555 alu.src[0].sel = V_SQ_ALU_SRC_0; 3556 alu.src[0].chan = i; 3557 alu.src[1].sel = V_SQ_ALU_SRC_0; 3558 alu.src[1].chan = i; 3559 } 3560 3561 alu.dst.sel = ctx->temp_reg; 3562 alu.dst.chan = i; 3563 alu.dst.write = 1; 3564 3565 if (i == 3) 3566 alu.last = 1; 3567 r = r600_bytecode_add_alu(ctx->bc, &alu); 3568 if (r) 3569 return r; 3570 } 3571 3572 for (i = 0; i < 4; i++) { 3573 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3574 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 3575 3576 if (i < 3) { 3577 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 3578 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 3579 } else { 3580 alu.src[0].sel = V_SQ_ALU_SRC_0; 3581 alu.src[0].chan = i; 3582 alu.src[1].sel = V_SQ_ALU_SRC_0; 3583 alu.src[1].chan = i; 3584 } 3585 3586 alu.src[2].sel = ctx->temp_reg; 3587 alu.src[2].neg = 1; 3588 alu.src[2].chan = i; 3589 3590 if (use_temp) 3591 alu.dst.sel = ctx->temp_reg; 3592 else 3593 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3594 alu.dst.chan = i; 3595 alu.dst.write = 1; 3596 alu.is_op3 = 1; 3597 if (i == 3) 3598 alu.last = 1; 3599 r = r600_bytecode_add_alu(ctx->bc, &alu); 3600 if (r) 3601 return r; 3602 } 3603 if (use_temp) 3604 return tgsi_helper_copy(ctx, inst); 3605 return 0; 3606} 3607 3608static int tgsi_exp(struct r600_shader_ctx *ctx) 3609{ 3610 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3611 struct r600_bytecode_alu alu; 3612 int r; 3613 int i; 3614 3615 /* result.x = 2^floor(src); */ 3616 if (inst->Dst[0].Register.WriteMask & 1) { 3617 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3618 3619 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 3620 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3621 3622 alu.dst.sel = ctx->temp_reg; 3623 alu.dst.chan = 0; 3624 alu.dst.write = 1; 3625 alu.last = 1; 3626 r = r600_bytecode_add_alu(ctx->bc, &alu); 3627 if (r) 3628 return r; 3629 3630 if (ctx->bc->chip_class == CAYMAN) { 3631 for (i = 0; i < 3; i++) { 3632 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3633 alu.src[0].sel = ctx->temp_reg; 3634 alu.src[0].chan = 0; 3635 3636 alu.dst.sel = ctx->temp_reg; 3637 alu.dst.chan = i; 3638 if (i == 0) 3639 alu.dst.write = 1; 3640 if (i == 2) 3641 alu.last = 1; 3642 r = r600_bytecode_add_alu(ctx->bc, &alu); 3643 if (r) 3644 return r; 3645 } 3646 } else { 3647 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3648 alu.src[0].sel = ctx->temp_reg; 3649 alu.src[0].chan = 0; 3650 3651 alu.dst.sel = ctx->temp_reg; 3652 alu.dst.chan = 0; 3653 alu.dst.write = 1; 3654 alu.last = 1; 3655 r = r600_bytecode_add_alu(ctx->bc, &alu); 3656 if (r) 3657 return r; 3658 } 3659 } 3660 3661 /* result.y = tmp - floor(tmp); */ 3662 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 3663 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3664 3665 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 3666 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3667 3668 alu.dst.sel = ctx->temp_reg; 3669#if 0 3670 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3671 if (r) 3672 return r; 3673#endif 3674 alu.dst.write = 1; 3675 alu.dst.chan = 1; 3676 3677 alu.last = 1; 3678 3679 r = r600_bytecode_add_alu(ctx->bc, &alu); 3680 if (r) 3681 return r; 3682 } 3683 3684 /* result.z = RoughApprox2ToX(tmp);*/ 3685 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 3686 if (ctx->bc->chip_class == CAYMAN) { 3687 for (i = 0; i < 3; i++) { 3688 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3689 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3690 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3691 3692 alu.dst.sel = ctx->temp_reg; 3693 alu.dst.chan = i; 3694 if (i == 2) { 3695 alu.dst.write = 1; 3696 alu.last = 1; 3697 } 3698 3699 r = r600_bytecode_add_alu(ctx->bc, &alu); 3700 if (r) 3701 return r; 3702 } 3703 } else { 3704 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3705 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3706 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3707 3708 alu.dst.sel = ctx->temp_reg; 3709 alu.dst.write = 1; 3710 alu.dst.chan = 2; 3711 3712 alu.last = 1; 3713 3714 r = r600_bytecode_add_alu(ctx->bc, &alu); 3715 if (r) 3716 return r; 3717 } 3718 } 3719 3720 /* result.w = 1.0;*/ 3721 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 3722 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3723 3724 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3725 alu.src[0].sel = V_SQ_ALU_SRC_1; 3726 alu.src[0].chan = 0; 3727 3728 alu.dst.sel = ctx->temp_reg; 3729 alu.dst.chan = 3; 3730 alu.dst.write = 1; 3731 alu.last = 1; 3732 r = r600_bytecode_add_alu(ctx->bc, &alu); 3733 if (r) 3734 return r; 3735 } 3736 return tgsi_helper_copy(ctx, inst); 3737} 3738 3739static int tgsi_log(struct r600_shader_ctx *ctx) 3740{ 3741 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3742 struct r600_bytecode_alu alu; 3743 int r; 3744 int i; 3745 3746 /* result.x = floor(log2(|src|)); */ 3747 if (inst->Dst[0].Register.WriteMask & 1) { 3748 if (ctx->bc->chip_class == CAYMAN) { 3749 for (i = 0; i < 3; i++) { 3750 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3751 3752 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3753 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3754 r600_bytecode_src_set_abs(&alu.src[0]); 3755 3756 alu.dst.sel = ctx->temp_reg; 3757 alu.dst.chan = i; 3758 if (i == 0) 3759 alu.dst.write = 1; 3760 if (i == 2) 3761 alu.last = 1; 3762 r = r600_bytecode_add_alu(ctx->bc, &alu); 3763 if (r) 3764 return r; 3765 } 3766 3767 } else { 3768 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3769 3770 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3771 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3772 r600_bytecode_src_set_abs(&alu.src[0]); 3773 3774 alu.dst.sel = ctx->temp_reg; 3775 alu.dst.chan = 0; 3776 alu.dst.write = 1; 3777 alu.last = 1; 3778 r = r600_bytecode_add_alu(ctx->bc, &alu); 3779 if (r) 3780 return r; 3781 } 3782 3783 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 3784 alu.src[0].sel = ctx->temp_reg; 3785 alu.src[0].chan = 0; 3786 3787 alu.dst.sel = ctx->temp_reg; 3788 alu.dst.chan = 0; 3789 alu.dst.write = 1; 3790 alu.last = 1; 3791 3792 r = r600_bytecode_add_alu(ctx->bc, &alu); 3793 if (r) 3794 return r; 3795 } 3796 3797 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 3798 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 3799 3800 if (ctx->bc->chip_class == CAYMAN) { 3801 for (i = 0; i < 3; i++) { 3802 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3803 3804 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3805 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3806 r600_bytecode_src_set_abs(&alu.src[0]); 3807 3808 alu.dst.sel = ctx->temp_reg; 3809 alu.dst.chan = i; 3810 if (i == 1) 3811 alu.dst.write = 1; 3812 if (i == 2) 3813 alu.last = 1; 3814 3815 r = r600_bytecode_add_alu(ctx->bc, &alu); 3816 if (r) 3817 return r; 3818 } 3819 } else { 3820 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3821 3822 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3823 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3824 r600_bytecode_src_set_abs(&alu.src[0]); 3825 3826 alu.dst.sel = ctx->temp_reg; 3827 alu.dst.chan = 1; 3828 alu.dst.write = 1; 3829 alu.last = 1; 3830 3831 r = r600_bytecode_add_alu(ctx->bc, &alu); 3832 if (r) 3833 return r; 3834 } 3835 3836 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3837 3838 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 3839 alu.src[0].sel = ctx->temp_reg; 3840 alu.src[0].chan = 1; 3841 3842 alu.dst.sel = ctx->temp_reg; 3843 alu.dst.chan = 1; 3844 alu.dst.write = 1; 3845 alu.last = 1; 3846 3847 r = r600_bytecode_add_alu(ctx->bc, &alu); 3848 if (r) 3849 return r; 3850 3851 if (ctx->bc->chip_class == CAYMAN) { 3852 for (i = 0; i < 3; i++) { 3853 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3854 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3855 alu.src[0].sel = ctx->temp_reg; 3856 alu.src[0].chan = 1; 3857 3858 alu.dst.sel = ctx->temp_reg; 3859 alu.dst.chan = i; 3860 if (i == 1) 3861 alu.dst.write = 1; 3862 if (i == 2) 3863 alu.last = 1; 3864 3865 r = r600_bytecode_add_alu(ctx->bc, &alu); 3866 if (r) 3867 return r; 3868 } 3869 } else { 3870 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3871 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 3872 alu.src[0].sel = ctx->temp_reg; 3873 alu.src[0].chan = 1; 3874 3875 alu.dst.sel = ctx->temp_reg; 3876 alu.dst.chan = 1; 3877 alu.dst.write = 1; 3878 alu.last = 1; 3879 3880 r = r600_bytecode_add_alu(ctx->bc, &alu); 3881 if (r) 3882 return r; 3883 } 3884 3885 if (ctx->bc->chip_class == CAYMAN) { 3886 for (i = 0; i < 3; i++) { 3887 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3888 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3889 alu.src[0].sel = ctx->temp_reg; 3890 alu.src[0].chan = 1; 3891 3892 alu.dst.sel = ctx->temp_reg; 3893 alu.dst.chan = i; 3894 if (i == 1) 3895 alu.dst.write = 1; 3896 if (i == 2) 3897 alu.last = 1; 3898 3899 r = r600_bytecode_add_alu(ctx->bc, &alu); 3900 if (r) 3901 return r; 3902 } 3903 } else { 3904 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3905 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 3906 alu.src[0].sel = ctx->temp_reg; 3907 alu.src[0].chan = 1; 3908 3909 alu.dst.sel = ctx->temp_reg; 3910 alu.dst.chan = 1; 3911 alu.dst.write = 1; 3912 alu.last = 1; 3913 3914 r = r600_bytecode_add_alu(ctx->bc, &alu); 3915 if (r) 3916 return r; 3917 } 3918 3919 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3920 3921 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3922 3923 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3924 r600_bytecode_src_set_abs(&alu.src[0]); 3925 3926 alu.src[1].sel = ctx->temp_reg; 3927 alu.src[1].chan = 1; 3928 3929 alu.dst.sel = ctx->temp_reg; 3930 alu.dst.chan = 1; 3931 alu.dst.write = 1; 3932 alu.last = 1; 3933 3934 r = r600_bytecode_add_alu(ctx->bc, &alu); 3935 if (r) 3936 return r; 3937 } 3938 3939 /* result.z = log2(|src|);*/ 3940 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 3941 if (ctx->bc->chip_class == CAYMAN) { 3942 for (i = 0; i < 3; i++) { 3943 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3944 3945 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3946 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3947 r600_bytecode_src_set_abs(&alu.src[0]); 3948 3949 alu.dst.sel = ctx->temp_reg; 3950 if (i == 2) 3951 alu.dst.write = 1; 3952 alu.dst.chan = i; 3953 if (i == 2) 3954 alu.last = 1; 3955 3956 r = r600_bytecode_add_alu(ctx->bc, &alu); 3957 if (r) 3958 return r; 3959 } 3960 } else { 3961 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3962 3963 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 3964 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3965 r600_bytecode_src_set_abs(&alu.src[0]); 3966 3967 alu.dst.sel = ctx->temp_reg; 3968 alu.dst.write = 1; 3969 alu.dst.chan = 2; 3970 alu.last = 1; 3971 3972 r = r600_bytecode_add_alu(ctx->bc, &alu); 3973 if (r) 3974 return r; 3975 } 3976 } 3977 3978 /* result.w = 1.0; */ 3979 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 3980 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3981 3982 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 3983 alu.src[0].sel = V_SQ_ALU_SRC_1; 3984 alu.src[0].chan = 0; 3985 3986 alu.dst.sel = ctx->temp_reg; 3987 alu.dst.chan = 3; 3988 alu.dst.write = 1; 3989 alu.last = 1; 3990 3991 r = r600_bytecode_add_alu(ctx->bc, &alu); 3992 if (r) 3993 return r; 3994 } 3995 3996 return tgsi_helper_copy(ctx, inst); 3997} 3998 3999static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 4000{ 4001 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4002 struct r600_bytecode_alu alu; 4003 int r; 4004 4005 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4006 4007 switch (inst->Instruction.Opcode) { 4008 case TGSI_OPCODE_ARL: 4009 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 4010 break; 4011 case TGSI_OPCODE_ARR: 4012 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 4013 break; 4014 case TGSI_OPCODE_UARL: 4015 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 4016 break; 4017 default: 4018 assert(0); 4019 return -1; 4020 } 4021 4022 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4023 alu.last = 1; 4024 alu.dst.sel = ctx->bc->ar_reg; 4025 alu.dst.write = 1; 4026 r = r600_bytecode_add_alu(ctx->bc, &alu); 4027 if (r) 4028 return r; 4029 4030 ctx->bc->ar_loaded = 0; 4031 return 0; 4032} 4033static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 4034{ 4035 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4036 struct r600_bytecode_alu alu; 4037 int r; 4038 4039 switch (inst->Instruction.Opcode) { 4040 case TGSI_OPCODE_ARL: 4041 memset(&alu, 0, sizeof(alu)); 4042 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 4043 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4044 alu.dst.sel = ctx->bc->ar_reg; 4045 alu.dst.write = 1; 4046 alu.last = 1; 4047 4048 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4049 return r; 4050 4051 memset(&alu, 0, sizeof(alu)); 4052 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 4053 alu.src[0].sel = ctx->bc->ar_reg; 4054 alu.dst.sel = ctx->bc->ar_reg; 4055 alu.dst.write = 1; 4056 alu.last = 1; 4057 4058 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4059 return r; 4060 break; 4061 case TGSI_OPCODE_ARR: 4062 memset(&alu, 0, sizeof(alu)); 4063 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 4064 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4065 alu.dst.sel = ctx->bc->ar_reg; 4066 alu.dst.write = 1; 4067 alu.last = 1; 4068 4069 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4070 return r; 4071 break; 4072 case TGSI_OPCODE_UARL: 4073 memset(&alu, 0, sizeof(alu)); 4074 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 4075 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4076 alu.dst.sel = ctx->bc->ar_reg; 4077 alu.dst.write = 1; 4078 alu.last = 1; 4079 4080 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 4081 return r; 4082 break; 4083 default: 4084 assert(0); 4085 return -1; 4086 } 4087 4088 ctx->bc->ar_loaded = 0; 4089 return 0; 4090} 4091 4092static int tgsi_opdst(struct r600_shader_ctx *ctx) 4093{ 4094 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4095 struct r600_bytecode_alu alu; 4096 int i, r = 0; 4097 4098 for (i = 0; i < 4; i++) { 4099 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4100 4101 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 4102 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4103 4104 if (i == 0 || i == 3) { 4105 alu.src[0].sel = V_SQ_ALU_SRC_1; 4106 } else { 4107 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4108 } 4109 4110 if (i == 0 || i == 2) { 4111 alu.src[1].sel = V_SQ_ALU_SRC_1; 4112 } else { 4113 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 4114 } 4115 if (i == 3) 4116 alu.last = 1; 4117 r = r600_bytecode_add_alu(ctx->bc, &alu); 4118 if (r) 4119 return r; 4120 } 4121 return 0; 4122} 4123 4124static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 4125{ 4126 struct r600_bytecode_alu alu; 4127 int r; 4128 4129 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4130 alu.inst = opcode; 4131 alu.predicate = 1; 4132 4133 alu.dst.sel = ctx->temp_reg; 4134 alu.dst.write = 1; 4135 alu.dst.chan = 0; 4136 4137 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4138 alu.src[1].sel = V_SQ_ALU_SRC_0; 4139 alu.src[1].chan = 0; 4140 4141 alu.last = 1; 4142 4143 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 4144 if (r) 4145 return r; 4146 return 0; 4147} 4148 4149static int pops(struct r600_shader_ctx *ctx, int pops) 4150{ 4151 unsigned force_pop = ctx->bc->force_add_cf; 4152 4153 if (!force_pop) { 4154 int alu_pop = 3; 4155 if (ctx->bc->cf_last) { 4156 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)) 4157 alu_pop = 0; 4158 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER)) 4159 alu_pop = 1; 4160 } 4161 alu_pop += pops; 4162 if (alu_pop == 1) { 4163 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER); 4164 ctx->bc->force_add_cf = 1; 4165 } else if (alu_pop == 2) { 4166 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER); 4167 ctx->bc->force_add_cf = 1; 4168 } else { 4169 force_pop = 1; 4170 } 4171 } 4172 4173 if (force_pop) { 4174 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 4175 ctx->bc->cf_last->pop_count = pops; 4176 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 4177 } 4178 4179 return 0; 4180} 4181 4182static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 4183{ 4184 switch(reason) { 4185 case FC_PUSH_VPM: 4186 ctx->bc->callstack[ctx->bc->call_sp].current--; 4187 break; 4188 case FC_PUSH_WQM: 4189 case FC_LOOP: 4190 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 4191 break; 4192 case FC_REP: 4193 /* TOODO : for 16 vp asic should -= 2; */ 4194 ctx->bc->callstack[ctx->bc->call_sp].current --; 4195 break; 4196 } 4197} 4198 4199static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 4200{ 4201 if (check_max_only) { 4202 int diff; 4203 switch (reason) { 4204 case FC_PUSH_VPM: 4205 diff = 1; 4206 break; 4207 case FC_PUSH_WQM: 4208 diff = 4; 4209 break; 4210 default: 4211 assert(0); 4212 diff = 0; 4213 } 4214 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 4215 ctx->bc->callstack[ctx->bc->call_sp].max) { 4216 ctx->bc->callstack[ctx->bc->call_sp].max = 4217 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 4218 } 4219 return; 4220 } 4221 switch (reason) { 4222 case FC_PUSH_VPM: 4223 ctx->bc->callstack[ctx->bc->call_sp].current++; 4224 break; 4225 case FC_PUSH_WQM: 4226 case FC_LOOP: 4227 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 4228 break; 4229 case FC_REP: 4230 ctx->bc->callstack[ctx->bc->call_sp].current++; 4231 break; 4232 } 4233 4234 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 4235 ctx->bc->callstack[ctx->bc->call_sp].max) { 4236 ctx->bc->callstack[ctx->bc->call_sp].max = 4237 ctx->bc->callstack[ctx->bc->call_sp].current; 4238 } 4239} 4240 4241static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 4242{ 4243 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 4244 4245 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid, 4246 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 4247 sp->mid[sp->num_mid] = ctx->bc->cf_last; 4248 sp->num_mid++; 4249} 4250 4251static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 4252{ 4253 ctx->bc->fc_sp++; 4254 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 4255 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 4256} 4257 4258static void fc_poplevel(struct r600_shader_ctx *ctx) 4259{ 4260 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 4261 if (sp->mid) { 4262 free(sp->mid); 4263 sp->mid = NULL; 4264 } 4265 sp->num_mid = 0; 4266 sp->start = NULL; 4267 sp->type = 0; 4268 ctx->bc->fc_sp--; 4269} 4270 4271#if 0 4272static int emit_return(struct r600_shader_ctx *ctx) 4273{ 4274 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); 4275 return 0; 4276} 4277 4278static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 4279{ 4280 4281 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 4282 ctx->bc->cf_last->pop_count = pops; 4283 /* TODO work out offset */ 4284 return 0; 4285} 4286 4287static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 4288{ 4289 return 0; 4290} 4291 4292static void emit_testflag(struct r600_shader_ctx *ctx) 4293{ 4294 4295} 4296 4297static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 4298{ 4299 emit_testflag(ctx); 4300 emit_jump_to_offset(ctx, 1, 4); 4301 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 4302 pops(ctx, ifidx + 1); 4303 emit_return(ctx); 4304} 4305 4306static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 4307{ 4308 emit_testflag(ctx); 4309 4310 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 4311 ctx->bc->cf_last->pop_count = 1; 4312 4313 fc_set_mid(ctx, fc_sp); 4314 4315 pops(ctx, 1); 4316} 4317#endif 4318 4319static int tgsi_if(struct r600_shader_ctx *ctx) 4320{ 4321 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)); 4322 4323 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 4324 4325 fc_pushlevel(ctx, FC_IF); 4326 4327 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 4328 return 0; 4329} 4330 4331static int tgsi_else(struct r600_shader_ctx *ctx) 4332{ 4333 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 4334 ctx->bc->cf_last->pop_count = 1; 4335 4336 fc_set_mid(ctx, ctx->bc->fc_sp); 4337 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 4338 return 0; 4339} 4340 4341static int tgsi_endif(struct r600_shader_ctx *ctx) 4342{ 4343 pops(ctx, 1); 4344 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 4345 R600_ERR("if/endif unbalanced in shader\n"); 4346 return -1; 4347 } 4348 4349 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 4350 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 4351 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 4352 } else { 4353 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 4354 } 4355 fc_poplevel(ctx); 4356 4357 callstack_decrease_current(ctx, FC_PUSH_VPM); 4358 return 0; 4359} 4360 4361static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 4362{ 4363 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 4364 4365 fc_pushlevel(ctx, FC_LOOP); 4366 4367 /* check stack depth */ 4368 callstack_check_depth(ctx, FC_LOOP, 0); 4369 return 0; 4370} 4371 4372static int tgsi_endloop(struct r600_shader_ctx *ctx) 4373{ 4374 int i; 4375 4376 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 4377 4378 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 4379 R600_ERR("loop/endloop in shader code are not paired.\n"); 4380 return -EINVAL; 4381 } 4382 4383 /* fixup loop pointers - from r600isa 4384 LOOP END points to CF after LOOP START, 4385 LOOP START point to CF after LOOP END 4386 BRK/CONT point to LOOP END CF 4387 */ 4388 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 4389 4390 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 4391 4392 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 4393 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 4394 } 4395 /* TODO add LOOPRET support */ 4396 fc_poplevel(ctx); 4397 callstack_decrease_current(ctx, FC_LOOP); 4398 return 0; 4399} 4400 4401static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 4402{ 4403 unsigned int fscp; 4404 4405 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 4406 { 4407 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 4408 break; 4409 } 4410 4411 if (fscp == 0) { 4412 R600_ERR("Break not inside loop/endloop pair\n"); 4413 return -EINVAL; 4414 } 4415 4416 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 4417 ctx->bc->cf_last->pop_count = 1; 4418 4419 fc_set_mid(ctx, fscp); 4420 4421 pops(ctx, 1); 4422 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 4423 return 0; 4424} 4425 4426static int tgsi_umad(struct r600_shader_ctx *ctx) 4427{ 4428 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4429 struct r600_bytecode_alu alu; 4430 int i, j, r; 4431 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4432 4433 /* src0 * src1 */ 4434 for (i = 0; i < lasti + 1; i++) { 4435 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4436 continue; 4437 4438 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4439 4440 alu.dst.chan = i; 4441 alu.dst.sel = ctx->temp_reg; 4442 alu.dst.write = 1; 4443 4444 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 4445 for (j = 0; j < 2; j++) { 4446 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 4447 } 4448 4449 alu.last = 1; 4450 r = r600_bytecode_add_alu(ctx->bc, &alu); 4451 if (r) 4452 return r; 4453 } 4454 4455 4456 for (i = 0; i < lasti + 1; i++) { 4457 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4458 continue; 4459 4460 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4461 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4462 4463 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 4464 4465 alu.src[0].sel = ctx->temp_reg; 4466 alu.src[0].chan = i; 4467 4468 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 4469 if (i == lasti) { 4470 alu.last = 1; 4471 } 4472 r = r600_bytecode_add_alu(ctx->bc, &alu); 4473 if (r) 4474 return r; 4475 } 4476 return 0; 4477} 4478 4479static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 4480 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 4481 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4482 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 4483 4484 /* FIXME: 4485 * For state trackers other than OpenGL, we'll want to use 4486 * _RECIP_IEEE instead. 4487 */ 4488 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 4489 4490 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 4491 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 4492 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 4493 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 4494 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4495 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4496 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4497 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 4498 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 4499 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 4500 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 4501 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 4502 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 4503 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4504 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 4505 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4506 /* gap */ 4507 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4508 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4509 /* gap */ 4510 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4511 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4512 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 4513 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4514 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 4515 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 4516 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 4517 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 4518 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 4519 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 4520 /* gap */ 4521 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4522 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4523 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4524 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4525 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 4526 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 4527 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 4528 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 4529 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4530 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4531 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4532 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4533 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4534 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 4535 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4536 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 4537 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 4538 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 4539 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 4540 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4541 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4542 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 4543 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4544 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4545 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4546 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4547 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4548 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4549 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4550 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 4551 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4552 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4553 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4554 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 4555 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 4556 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 4557 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 4558 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4559 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4560 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4561 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 4562 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 4563 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 4564 /* gap */ 4565 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4566 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4567 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 4568 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 4569 /* gap */ 4570 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4571 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4572 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4573 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4574 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4575 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, 4576 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 4577 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 4578 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2_trans}, 4579 /* gap */ 4580 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4581 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 4582 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 4583 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, 4584 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 4585 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4586 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 4587 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 4588 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 4589 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4590 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4591 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 4592 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4593 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 4594 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4595 /* gap */ 4596 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4597 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4598 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4599 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4600 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4601 /* gap */ 4602 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4603 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4604 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4605 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4606 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4607 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4608 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4609 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4610 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 4611 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 4612 /* gap */ 4613 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4614 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans}, 4615 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 4616 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 4617 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 4618 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_op2}, 4619 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 4620 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2_trans}, 4621 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2}, 4622 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2}, 4623 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans}, 4624 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 4625 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, 4626 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 4627 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 4628 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 4629 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, 4630 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans}, 4631 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 4632 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 4633 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2_trans}, 4634 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, 4635 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap}, 4636 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4637 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4638 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4639 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4640 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 4641 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 4642 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 4643 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 4644 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 4645 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 4646 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 4647 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 4648 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 4649 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 4650 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 4651 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 4652 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl}, 4653 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 4654 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 4655 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 4656 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4657}; 4658 4659static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 4660 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 4661 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4662 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 4663 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 4664 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, 4665 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 4666 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 4667 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 4668 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4669 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4670 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4671 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 4672 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 4673 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 4674 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 4675 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 4676 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 4677 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4678 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 4679 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4680 /* gap */ 4681 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4682 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4683 /* gap */ 4684 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4685 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4686 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 4687 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4688 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 4689 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 4690 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 4691 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 4692 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 4693 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 4694 /* gap */ 4695 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4696 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4697 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4698 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4699 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 4700 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 4701 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 4702 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 4703 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4704 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4705 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4706 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4707 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4708 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 4709 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4710 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 4711 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 4712 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 4713 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 4714 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4715 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4716 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 4717 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4718 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4719 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4720 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4721 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4722 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4723 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4724 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 4725 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4726 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4727 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4728 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 4729 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 4730 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 4731 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 4732 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4733 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4734 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4735 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 4736 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 4737 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 4738 /* gap */ 4739 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4740 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4741 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 4742 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 4743 /* gap */ 4744 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4745 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4746 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4747 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4748 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4749 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, 4750 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 4751 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 4752 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT, tgsi_op2}, 4753 /* gap */ 4754 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4755 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 4756 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 4757 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_imod}, 4758 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 4759 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4760 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 4761 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 4762 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 4763 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4764 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4765 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 4766 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4767 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 4768 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4769 /* gap */ 4770 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4771 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4772 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4773 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4774 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4775 /* gap */ 4776 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4777 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4778 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4779 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4780 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4781 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4782 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4783 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4784 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 4785 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 4786 /* gap */ 4787 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4788 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_f2i}, 4789 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_idiv}, 4790 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 4791 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 4792 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, 4793 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 4794 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT, tgsi_op2}, 4795 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 4796 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_f2i}, 4797 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2_trans}, 4798 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 4799 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_udiv}, 4800 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 4801 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 4802 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 4803 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umod}, 4804 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans}, 4805 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 4806 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 4807 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT, tgsi_op2}, 4808 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_UINT, tgsi_op2_swap}, 4809 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2}, 4810 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4811 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4812 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4813 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4814 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 4815 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 4816 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 4817 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 4818 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 4819 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 4820 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 4821 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 4822 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 4823 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 4824 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 4825 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 4826 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, 4827 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 4828 {TGSI_OPCODE_IABS, 0, 0, tgsi_iabs}, 4829 {TGSI_OPCODE_ISSG, 0, 0, tgsi_issg}, 4830 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4831}; 4832 4833static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 4834 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 4835 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4836 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 4837 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, 4838 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, 4839 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 4840 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 4841 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 4842 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4843 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4844 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4845 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 4846 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 4847 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 4848 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 4849 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 4850 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 4851 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 4852 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 4853 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4854 /* gap */ 4855 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4856 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4857 /* gap */ 4858 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4859 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4860 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 4861 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4862 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 4863 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 4864 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, 4865 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, 4866 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, 4867 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 4868 /* gap */ 4869 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4870 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 4871 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4872 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4873 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, 4874 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 4875 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 4876 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 4877 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4878 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4879 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4880 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4881 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4882 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 4883 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4884 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 4885 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, 4886 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 4887 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 4888 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4889 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4890 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 4891 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 4892 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4893 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4894 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4895 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4896 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4897 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4898 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 4899 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4900 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4901 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4902 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 4903 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 4904 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 4905 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 4906 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4907 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4908 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 4909 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 4910 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 4911 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 4912 /* gap */ 4913 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4914 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4915 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 4916 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 4917 /* gap */ 4918 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4919 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4920 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4921 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4922 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4923 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4924 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 4925 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 4926 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4927 /* gap */ 4928 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4929 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4930 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4931 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4932 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 4933 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4934 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 4935 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 4936 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 4937 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4938 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4939 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 4940 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4941 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 4942 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4943 /* gap */ 4944 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4945 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4946 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4947 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4948 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4949 /* gap */ 4950 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4951 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4952 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4953 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4954 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4955 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4956 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4957 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4958 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 4959 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 4960 /* gap */ 4961 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4962 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4963 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4964 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 4965 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 4966 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4967 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4968 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4969 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4970 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4971 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4972 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4973 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4974 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4975 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4976 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4977 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4978 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4979 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4980 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4981 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4982 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4983 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4984 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4985 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4986 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4987 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4988 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 4989 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 4990 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 4991 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 4992 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 4993 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 4994 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 4995 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 4996 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 4997 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 4998 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 4999 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 5000 {TGSI_OPCODE_UARL, 0, 0, tgsi_unsupported}, 5001 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 5002 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 5003}; 5004