r600_shader.c revision 2449695e822421fdcaf1c66dffc12d7d705ea69d
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_info.h" 25#include "tgsi/tgsi_parse.h" 26#include "tgsi/tgsi_scan.h" 27#include "tgsi/tgsi_dump.h" 28#include "util/u_format.h" 29#include "r600_pipe.h" 30#include "r600_asm.h" 31#include "r600_sq.h" 32#include "r600_formats.h" 33#include "r600_opcodes.h" 34#include "r600d.h" 35#include <stdio.h> 36#include <errno.h> 37#include <byteswap.h> 38 39/* CAYMAN notes 40Why CAYMAN got loops for lots of instructions is explained here. 41 42-These 8xx t-slot only ops are implemented in all vector slots. 43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 44These 8xx t-slot only opcodes become vector ops, with all four 45slots expecting the arguments on sources a and b. Result is 46broadcast to all channels. 47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT 48These 8xx t-slot only opcodes become vector ops in the z, y, and 49x slots. 50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 52SQRT_IEEE/_64 53SIN/COS 54The w slot may have an independent co-issued operation, or if the 55result is required to be in the w slot, the opcode above may be 56issued in the w slot as well. 57The compiler must issue the source argument to slots z, y, and x 58*/ 59 60static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 61{ 62 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 63 struct r600_shader *rshader = &shader->shader; 64 uint32_t *ptr; 65 int i; 66 67 /* copy new shader */ 68 if (shader->bo == NULL) { 69 shader->bo = (struct r600_resource*) 70 pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, rshader->bc.ndw * 4); 71 if (shader->bo == NULL) { 72 return -ENOMEM; 73 } 74 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->ctx.cs, PIPE_TRANSFER_WRITE); 75 if (R600_BIG_ENDIAN) { 76 for (i = 0; i < rshader->bc.ndw; ++i) { 77 ptr[i] = bswap_32(rshader->bc.bytecode[i]); 78 } 79 } else { 80 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr)); 81 } 82 rctx->ws->buffer_unmap(shader->bo->buf); 83 } 84 /* build state */ 85 switch (rshader->processor_type) { 86 case TGSI_PROCESSOR_VERTEX: 87 if (rctx->chip_class >= EVERGREEN) { 88 evergreen_pipe_shader_vs(ctx, shader); 89 } else { 90 r600_pipe_shader_vs(ctx, shader); 91 } 92 break; 93 case TGSI_PROCESSOR_FRAGMENT: 94 if (rctx->chip_class >= EVERGREEN) { 95 evergreen_pipe_shader_ps(ctx, shader); 96 } else { 97 r600_pipe_shader_ps(ctx, shader); 98 } 99 break; 100 default: 101 return -EINVAL; 102 } 103 return 0; 104} 105 106static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader); 107 108int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader) 109{ 110 static int dump_shaders = -1; 111 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 112 int r; 113 114 /* Would like some magic "get_bool_option_once" routine. 115 */ 116 if (dump_shaders == -1) 117 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 118 119 if (dump_shaders) { 120 fprintf(stderr, "--------------------------------------------------------------\n"); 121 tgsi_dump(shader->tokens, 0); 122 123 if (shader->so.num_outputs) { 124 unsigned i; 125 fprintf(stderr, "STREAMOUT\n"); 126 for (i = 0; i < shader->so.num_outputs; i++) { 127 unsigned mask = ((1 << shader->so.output[i].num_components) - 1) << 128 shader->so.output[i].start_component; 129 fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i, 130 shader->so.output[i].output_buffer, shader->so.output[i].register_index, 131 mask & 1 ? "x" : "_", 132 (mask >> 1) & 1 ? "y" : "_", 133 (mask >> 2) & 1 ? "z" : "_", 134 (mask >> 3) & 1 ? "w" : "_"); 135 } 136 } 137 } 138 r = r600_shader_from_tgsi(rctx, shader); 139 if (r) { 140 R600_ERR("translation from TGSI failed !\n"); 141 return r; 142 } 143 r = r600_bytecode_build(&shader->shader.bc); 144 if (r) { 145 R600_ERR("building bytecode failed !\n"); 146 return r; 147 } 148 if (dump_shaders) { 149 r600_bytecode_dump(&shader->shader.bc); 150 fprintf(stderr, "______________________________________________________________\n"); 151 } 152 return r600_pipe_shader(ctx, shader); 153} 154 155void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 156{ 157 pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); 158 r600_bytecode_clear(&shader->shader.bc); 159 160 memset(&shader->shader,0,sizeof(struct r600_shader)); 161} 162 163/* 164 * tgsi -> r600 shader 165 */ 166struct r600_shader_tgsi_instruction; 167 168struct r600_shader_src { 169 unsigned sel; 170 unsigned swizzle[4]; 171 unsigned neg; 172 unsigned abs; 173 unsigned rel; 174 uint32_t value[4]; 175}; 176 177struct r600_shader_ctx { 178 struct tgsi_shader_info info; 179 struct tgsi_parse_context parse; 180 const struct tgsi_token *tokens; 181 unsigned type; 182 unsigned file_offset[TGSI_FILE_COUNT]; 183 unsigned temp_reg; 184 struct r600_shader_tgsi_instruction *inst_info; 185 struct r600_bytecode *bc; 186 struct r600_shader *shader; 187 struct r600_shader_src src[4]; 188 u32 *literals; 189 u32 nliterals; 190 u32 max_driver_temp_used; 191 /* needed for evergreen interpolation */ 192 boolean input_centroid; 193 boolean input_linear; 194 boolean input_perspective; 195 int num_interp_gpr; 196}; 197 198struct r600_shader_tgsi_instruction { 199 unsigned tgsi_opcode; 200 unsigned is_op3; 201 unsigned r600_opcode; 202 int (*process)(struct r600_shader_ctx *ctx); 203}; 204 205static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 206static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 207 208static int tgsi_is_supported(struct r600_shader_ctx *ctx) 209{ 210 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 211 int j; 212 213 if (i->Instruction.NumDstRegs > 1) { 214 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 215 return -EINVAL; 216 } 217 if (i->Instruction.Predicate) { 218 R600_ERR("predicate unsupported\n"); 219 return -EINVAL; 220 } 221#if 0 222 if (i->Instruction.Label) { 223 R600_ERR("label unsupported\n"); 224 return -EINVAL; 225 } 226#endif 227 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 228 if (i->Src[j].Register.Dimension) { 229 R600_ERR("unsupported src %d (dimension %d)\n", j, 230 i->Src[j].Register.Dimension); 231 return -EINVAL; 232 } 233 } 234 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 235 if (i->Dst[j].Register.Dimension) { 236 R600_ERR("unsupported dst (dimension)\n"); 237 return -EINVAL; 238 } 239 } 240 return 0; 241} 242 243static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 244{ 245 int i, r; 246 struct r600_bytecode_alu alu; 247 int gpr = 0, base_chan = 0; 248 int ij_index = 0; 249 250 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 251 ij_index = 0; 252 if (ctx->shader->input[input].centroid) 253 ij_index++; 254 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 255 ij_index = 0; 256 /* if we have perspective add one */ 257 if (ctx->input_perspective) { 258 ij_index++; 259 /* if we have perspective centroid */ 260 if (ctx->input_centroid) 261 ij_index++; 262 } 263 if (ctx->shader->input[input].centroid) 264 ij_index++; 265 } 266 267 /* work out gpr and base_chan from index */ 268 gpr = ij_index / 2; 269 base_chan = (2 * (ij_index % 2)) + 1; 270 271 for (i = 0; i < 8; i++) { 272 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 273 274 if (i < 4) 275 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 276 else 277 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 278 279 if ((i > 1) && (i < 6)) { 280 alu.dst.sel = ctx->shader->input[input].gpr; 281 alu.dst.write = 1; 282 } 283 284 alu.dst.chan = i % 4; 285 286 alu.src[0].sel = gpr; 287 alu.src[0].chan = (base_chan - (i % 2)); 288 289 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 290 291 alu.bank_swizzle_force = SQ_ALU_VEC_210; 292 if ((i % 4) == 3) 293 alu.last = 1; 294 r = r600_bytecode_add_alu(ctx->bc, &alu); 295 if (r) 296 return r; 297 } 298 return 0; 299} 300 301static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 302{ 303 int i, r; 304 struct r600_bytecode_alu alu; 305 306 for (i = 0; i < 4; i++) { 307 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 308 309 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_LOAD_P0; 310 311 alu.dst.sel = ctx->shader->input[input].gpr; 312 alu.dst.write = 1; 313 314 alu.dst.chan = i; 315 316 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 317 alu.src[0].chan = i; 318 319 if (i == 3) 320 alu.last = 1; 321 r = r600_bytecode_add_alu(ctx->bc, &alu); 322 if (r) 323 return r; 324 } 325 return 0; 326} 327 328/* 329 * Special export handling in shaders 330 * 331 * shader export ARRAY_BASE for EXPORT_POS: 332 * 60 is position 333 * 61 is misc vector 334 * 62, 63 are clip distance vectors 335 * 336 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 337 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 338 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 339 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 340 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 341 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 342 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 343 * exclusive from render target index) 344 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 345 * 346 * 347 * shader export ARRAY_BASE for EXPORT_PIXEL: 348 * 0-7 CB targets 349 * 61 computed Z vector 350 * 351 * The use of the values exported in the computed Z vector are controlled 352 * by DB_SHADER_CONTROL: 353 * Z_EXPORT_ENABLE - Z as a float in RED 354 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 355 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 356 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 357 * DB_SOURCE_FORMAT - export control restrictions 358 * 359 */ 360 361 362/* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 363static int r600_spi_sid(struct r600_shader_io * io) 364{ 365 int index, name = io->name; 366 367 /* These params are handled differently, they don't need 368 * semantic indices, so we'll use 0 for them. 369 */ 370 if (name == TGSI_SEMANTIC_POSITION || 371 name == TGSI_SEMANTIC_PSIZE || 372 name == TGSI_SEMANTIC_FACE) 373 index = 0; 374 else { 375 if (name == TGSI_SEMANTIC_GENERIC) { 376 /* For generic params simply use sid from tgsi */ 377 index = io->sid; 378 } else { 379 380 /* FIXME: two-side rendering is broken in r600g, this will 381 * keep old functionality */ 382 if (name == TGSI_SEMANTIC_BCOLOR) 383 name = TGSI_SEMANTIC_COLOR; 384 385 /* For non-generic params - pack name and sid into 8 bits */ 386 index = 0x80 | (name<<3) | (io->sid); 387 } 388 389 /* Make sure that all really used indices have nonzero value, so 390 * we can just compare it to 0 later instead of comparing the name 391 * with different values to detect special cases. */ 392 index++; 393 } 394 395 return index; 396}; 397 398static int tgsi_declaration(struct r600_shader_ctx *ctx) 399{ 400 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 401 unsigned i; 402 int r; 403 404 switch (d->Declaration.File) { 405 case TGSI_FILE_INPUT: 406 i = ctx->shader->ninput++; 407 ctx->shader->input[i].name = d->Semantic.Name; 408 ctx->shader->input[i].sid = d->Semantic.Index; 409 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 410 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 411 ctx->shader->input[i].centroid = d->Declaration.Centroid; 412 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; 413 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) { 414 /* turn input into interpolate on EG */ 415 if (ctx->shader->input[i].spi_sid) { 416 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 417 if (ctx->shader->input[i].interpolate > 0) { 418 evergreen_interp_alu(ctx, i); 419 } else { 420 evergreen_interp_flat(ctx, i); 421 } 422 } 423 } 424 break; 425 case TGSI_FILE_OUTPUT: 426 i = ctx->shader->noutput++; 427 ctx->shader->output[i].name = d->Semantic.Name; 428 ctx->shader->output[i].sid = d->Semantic.Index; 429 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 430 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; 431 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 432 break; 433 case TGSI_FILE_CONSTANT: 434 case TGSI_FILE_TEMPORARY: 435 case TGSI_FILE_SAMPLER: 436 case TGSI_FILE_ADDRESS: 437 break; 438 439 case TGSI_FILE_SYSTEM_VALUE: 440 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 441 struct r600_bytecode_alu alu; 442 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 443 444 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 445 alu.src[0].sel = 0; 446 alu.src[0].chan = 3; 447 448 alu.dst.sel = 0; 449 alu.dst.chan = 3; 450 alu.dst.write = 1; 451 alu.last = 1; 452 453 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 454 return r; 455 break; 456 } 457 458 default: 459 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 460 return -EINVAL; 461 } 462 return 0; 463} 464 465static int r600_get_temp(struct r600_shader_ctx *ctx) 466{ 467 return ctx->temp_reg + ctx->max_driver_temp_used++; 468} 469 470/* 471 * for evergreen we need to scan the shader to find the number of GPRs we need to 472 * reserve for interpolation. 473 * 474 * we need to know if we are going to emit 475 * any centroid inputs 476 * if perspective and linear are required 477*/ 478static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 479{ 480 int i; 481 int num_baryc; 482 483 ctx->input_linear = FALSE; 484 ctx->input_perspective = FALSE; 485 ctx->input_centroid = FALSE; 486 ctx->num_interp_gpr = 1; 487 488 /* any centroid inputs */ 489 for (i = 0; i < ctx->info.num_inputs; i++) { 490 /* skip position/face */ 491 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 492 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 493 continue; 494 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 495 ctx->input_linear = TRUE; 496 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 497 ctx->input_perspective = TRUE; 498 if (ctx->info.input_centroid[i]) 499 ctx->input_centroid = TRUE; 500 } 501 502 num_baryc = 0; 503 /* ignoring sample for now */ 504 if (ctx->input_perspective) 505 num_baryc++; 506 if (ctx->input_linear) 507 num_baryc++; 508 if (ctx->input_centroid) 509 num_baryc *= 2; 510 511 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 512 513 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 514 return ctx->num_interp_gpr; 515} 516 517static void tgsi_src(struct r600_shader_ctx *ctx, 518 const struct tgsi_full_src_register *tgsi_src, 519 struct r600_shader_src *r600_src) 520{ 521 memset(r600_src, 0, sizeof(*r600_src)); 522 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 523 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 524 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 525 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 526 r600_src->neg = tgsi_src->Register.Negate; 527 r600_src->abs = tgsi_src->Register.Absolute; 528 529 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 530 int index; 531 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 532 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 533 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 534 535 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 536 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 537 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 538 return; 539 } 540 index = tgsi_src->Register.Index; 541 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 542 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 543 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 544 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ 545 r600_src->swizzle[0] = 3; 546 r600_src->swizzle[1] = 3; 547 r600_src->swizzle[2] = 3; 548 r600_src->swizzle[3] = 3; 549 r600_src->sel = 0; 550 } else { 551 if (tgsi_src->Register.Indirect) 552 r600_src->rel = V_SQ_REL_RELATIVE; 553 r600_src->sel = tgsi_src->Register.Index; 554 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 555 } 556} 557 558static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 559{ 560 struct r600_bytecode_vtx vtx; 561 unsigned int ar_reg; 562 int r; 563 564 if (offset) { 565 struct r600_bytecode_alu alu; 566 567 memset(&alu, 0, sizeof(alu)); 568 569 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 570 alu.src[0].sel = ctx->bc->ar_reg; 571 572 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 573 alu.src[1].value = offset; 574 575 alu.dst.sel = dst_reg; 576 alu.dst.write = 1; 577 alu.last = 1; 578 579 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 580 return r; 581 582 ar_reg = dst_reg; 583 } else { 584 ar_reg = ctx->bc->ar_reg; 585 } 586 587 memset(&vtx, 0, sizeof(vtx)); 588 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 589 vtx.src_gpr = ar_reg; 590 vtx.mega_fetch_count = 16; 591 vtx.dst_gpr = dst_reg; 592 vtx.dst_sel_x = 0; /* SEL_X */ 593 vtx.dst_sel_y = 1; /* SEL_Y */ 594 vtx.dst_sel_z = 2; /* SEL_Z */ 595 vtx.dst_sel_w = 3; /* SEL_W */ 596 vtx.data_format = FMT_32_32_32_32_FLOAT; 597 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 598 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 599 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 600 vtx.endian = r600_endian_swap(32); 601 602 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 603 return r; 604 605 return 0; 606} 607 608static int tgsi_split_constant(struct r600_shader_ctx *ctx) 609{ 610 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 611 struct r600_bytecode_alu alu; 612 int i, j, k, nconst, r; 613 614 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 615 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 616 nconst++; 617 } 618 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 619 } 620 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 621 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 622 continue; 623 } 624 625 if (ctx->src[i].rel) { 626 int treg = r600_get_temp(ctx); 627 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 628 return r; 629 630 ctx->src[i].sel = treg; 631 ctx->src[i].rel = 0; 632 j--; 633 } else if (j > 0) { 634 int treg = r600_get_temp(ctx); 635 for (k = 0; k < 4; k++) { 636 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 637 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 638 alu.src[0].sel = ctx->src[i].sel; 639 alu.src[0].chan = k; 640 alu.src[0].rel = ctx->src[i].rel; 641 alu.dst.sel = treg; 642 alu.dst.chan = k; 643 alu.dst.write = 1; 644 if (k == 3) 645 alu.last = 1; 646 r = r600_bytecode_add_alu(ctx->bc, &alu); 647 if (r) 648 return r; 649 } 650 ctx->src[i].sel = treg; 651 ctx->src[i].rel =0; 652 j--; 653 } 654 } 655 return 0; 656} 657 658/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 659static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 660{ 661 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 662 struct r600_bytecode_alu alu; 663 int i, j, k, nliteral, r; 664 665 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 666 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 667 nliteral++; 668 } 669 } 670 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 671 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 672 int treg = r600_get_temp(ctx); 673 for (k = 0; k < 4; k++) { 674 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 675 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 676 alu.src[0].sel = ctx->src[i].sel; 677 alu.src[0].chan = k; 678 alu.src[0].value = ctx->src[i].value[k]; 679 alu.dst.sel = treg; 680 alu.dst.chan = k; 681 alu.dst.write = 1; 682 if (k == 3) 683 alu.last = 1; 684 r = r600_bytecode_add_alu(ctx->bc, &alu); 685 if (r) 686 return r; 687 } 688 ctx->src[i].sel = treg; 689 j--; 690 } 691 } 692 return 0; 693} 694 695static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader) 696{ 697 struct r600_shader *shader = &pipeshader->shader; 698 struct tgsi_token *tokens = pipeshader->tokens; 699 struct pipe_stream_output_info so = pipeshader->so; 700 struct tgsi_full_immediate *immediate; 701 struct tgsi_full_property *property; 702 struct r600_shader_ctx ctx; 703 struct r600_bytecode_output output[32]; 704 unsigned output_done, noutput; 705 unsigned opcode; 706 int i, j, r = 0, pos0; 707 708 ctx.bc = &shader->bc; 709 ctx.shader = shader; 710 r600_bytecode_init(ctx.bc, rctx->chip_class); 711 ctx.tokens = tokens; 712 tgsi_scan_shader(tokens, &ctx.info); 713 tgsi_parse_init(&ctx.parse, tokens); 714 ctx.type = ctx.parse.FullHeader.Processor.Processor; 715 shader->processor_type = ctx.type; 716 ctx.bc->type = shader->processor_type; 717 718 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) || 719 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color)); 720 721 shader->nr_cbufs = rctx->nr_cbufs; 722 723 /* register allocations */ 724 /* Values [0,127] correspond to GPR[0..127]. 725 * Values [128,159] correspond to constant buffer bank 0 726 * Values [160,191] correspond to constant buffer bank 1 727 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 728 * Values [256,287] correspond to constant buffer bank 2 (EG) 729 * Values [288,319] correspond to constant buffer bank 3 (EG) 730 * Other special values are shown in the list below. 731 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 732 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 733 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 734 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 735 * 248 SQ_ALU_SRC_0: special constant 0.0. 736 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 737 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 738 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 739 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 740 * 253 SQ_ALU_SRC_LITERAL: literal constant. 741 * 254 SQ_ALU_SRC_PV: previous vector result. 742 * 255 SQ_ALU_SRC_PS: previous scalar result. 743 */ 744 for (i = 0; i < TGSI_FILE_COUNT; i++) { 745 ctx.file_offset[i] = 0; 746 } 747 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 748 ctx.file_offset[TGSI_FILE_INPUT] = 1; 749 if (ctx.bc->chip_class >= EVERGREEN) { 750 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 751 } else { 752 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 753 } 754 } 755 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { 756 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 757 } 758 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 759 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 760 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 761 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 762 763 /* Outside the GPR range. This will be translated to one of the 764 * kcache banks later. */ 765 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 766 767 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 768 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 769 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 770 ctx.temp_reg = ctx.bc->ar_reg + 1; 771 772 ctx.nliterals = 0; 773 ctx.literals = NULL; 774 shader->fs_write_all = FALSE; 775 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 776 tgsi_parse_token(&ctx.parse); 777 switch (ctx.parse.FullToken.Token.Type) { 778 case TGSI_TOKEN_TYPE_IMMEDIATE: 779 immediate = &ctx.parse.FullToken.FullImmediate; 780 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 781 if(ctx.literals == NULL) { 782 r = -ENOMEM; 783 goto out_err; 784 } 785 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 786 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 787 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 788 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 789 ctx.nliterals++; 790 break; 791 case TGSI_TOKEN_TYPE_DECLARATION: 792 r = tgsi_declaration(&ctx); 793 if (r) 794 goto out_err; 795 break; 796 case TGSI_TOKEN_TYPE_INSTRUCTION: 797 r = tgsi_is_supported(&ctx); 798 if (r) 799 goto out_err; 800 ctx.max_driver_temp_used = 0; 801 /* reserve first tmp for everyone */ 802 r600_get_temp(&ctx); 803 804 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 805 if ((r = tgsi_split_constant(&ctx))) 806 goto out_err; 807 if ((r = tgsi_split_literal_constant(&ctx))) 808 goto out_err; 809 if (ctx.bc->chip_class == CAYMAN) 810 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 811 else if (ctx.bc->chip_class >= EVERGREEN) 812 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 813 else 814 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 815 r = ctx.inst_info->process(&ctx); 816 if (r) 817 goto out_err; 818 break; 819 case TGSI_TOKEN_TYPE_PROPERTY: 820 property = &ctx.parse.FullToken.FullProperty; 821 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { 822 if (property->u[0].Data == 1) 823 shader->fs_write_all = TRUE; 824 } 825 break; 826 default: 827 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 828 r = -EINVAL; 829 goto out_err; 830 } 831 } 832 833 noutput = shader->noutput; 834 835 /* clamp color outputs */ 836 if (shader->clamp_color) { 837 for (i = 0; i < noutput; i++) { 838 if (shader->output[i].name == TGSI_SEMANTIC_COLOR || 839 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) { 840 841 int j; 842 for (j = 0; j < 4; j++) { 843 struct r600_bytecode_alu alu; 844 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 845 846 /* MOV_SAT R, R */ 847 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 848 alu.dst.sel = shader->output[i].gpr; 849 alu.dst.chan = j; 850 alu.dst.write = 1; 851 alu.dst.clamp = 1; 852 alu.src[0].sel = alu.dst.sel; 853 alu.src[0].chan = j; 854 855 if (j == 3) { 856 alu.last = 1; 857 } 858 r = r600_bytecode_add_alu(ctx.bc, &alu); 859 if (r) 860 return r; 861 } 862 } 863 } 864 } 865 866 /* Add stream outputs. */ 867 if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) { 868 for (i = 0; i < so.num_outputs; i++) { 869 struct r600_bytecode_output output; 870 871 if (so.output[i].output_buffer >= 4) { 872 R600_ERR("exceeded the max number of stream output buffers, got: %d\n", 873 so.output[i].output_buffer); 874 r = -EINVAL; 875 goto out_err; 876 } 877 if (so.output[i].start_component) { 878 R600_ERR("stream_output - start_component cannot be non-zero\n"); 879 r = -EINVAL; 880 goto out_err; 881 } 882 883 memset(&output, 0, sizeof(struct r600_bytecode_output)); 884 output.gpr = shader->output[so.output[i].register_index].gpr; 885 output.elem_size = 0; 886 output.array_base = so.output[i].dst_offset; 887 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 888 output.burst_count = 1; 889 output.barrier = 1; 890 output.array_size = 0; 891 output.comp_mask = (1 << so.output[i].num_components) - 1; 892 if (ctx.bc->chip_class >= EVERGREEN) { 893 switch (so.output[i].output_buffer) { 894 case 0: 895 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0; 896 break; 897 case 1: 898 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1; 899 break; 900 case 2: 901 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2; 902 break; 903 case 3: 904 output.inst = EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3; 905 break; 906 } 907 } else { 908 switch (so.output[i].output_buffer) { 909 case 0: 910 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0; 911 break; 912 case 1: 913 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1; 914 break; 915 case 2: 916 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2; 917 break; 918 case 3: 919 output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3; 920 break; 921 } 922 } 923 r = r600_bytecode_add_output(ctx.bc, &output); 924 if (r) 925 goto out_err; 926 } 927 } 928 929 /* export output */ 930 j = 0; 931 for (i = 0, pos0 = 0; i < noutput; i++) { 932 memset(&output[i], 0, sizeof(struct r600_bytecode_output)); 933 output[i + j].gpr = shader->output[i].gpr; 934 output[i + j].elem_size = 3; 935 output[i + j].swizzle_x = 0; 936 output[i + j].swizzle_y = 1; 937 output[i + j].swizzle_z = 2; 938 output[i + j].swizzle_w = 3; 939 output[i + j].burst_count = 1; 940 output[i + j].barrier = 1; 941 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 942 output[i + j].array_base = i - pos0; 943 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 944 switch (ctx.type) { 945 case TGSI_PROCESSOR_VERTEX: 946 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 947 output[i + j].array_base = 60; 948 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 949 /* position doesn't count in array_base */ 950 pos0++; 951 } 952 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 953 output[i + j].array_base = 61; 954 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 955 /* position doesn't count in array_base */ 956 pos0++; 957 } 958 break; 959 case TGSI_PROCESSOR_FRAGMENT: 960 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 961 output[i + j].array_base = shader->output[i].sid; 962 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 963 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { 964 for (j = 1; j < shader->nr_cbufs; j++) { 965 memset(&output[i + j], 0, sizeof(struct r600_bytecode_output)); 966 output[i + j].gpr = shader->output[i].gpr; 967 output[i + j].elem_size = 3; 968 output[i + j].swizzle_x = 0; 969 output[i + j].swizzle_y = 1; 970 output[i + j].swizzle_z = 2; 971 output[i + j].swizzle_w = 3; 972 output[i + j].burst_count = 1; 973 output[i + j].barrier = 1; 974 output[i + j].array_base = shader->output[i].sid + j; 975 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 976 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 977 } 978 j = shader->nr_cbufs-1; 979 } 980 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 981 output[i + j].array_base = 61; 982 output[i + j].swizzle_x = 2; 983 output[i + j].swizzle_y = 7; 984 output[i + j].swizzle_z = output[i + j].swizzle_w = 7; 985 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 986 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 987 output[i + j].array_base = 61; 988 output[i + j].swizzle_x = 7; 989 output[i + j].swizzle_y = 1; 990 output[i + j].swizzle_z = output[i + j].swizzle_w = 7; 991 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 992 } else { 993 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 994 r = -EINVAL; 995 goto out_err; 996 } 997 break; 998 default: 999 R600_ERR("unsupported processor type %d\n", ctx.type); 1000 r = -EINVAL; 1001 goto out_err; 1002 } 1003 } 1004 noutput += j; 1005 /* add fake param output for vertex shader if no param is exported */ 1006 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 1007 for (i = 0, pos0 = 0; i < noutput; i++) { 1008 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 1009 pos0 = 1; 1010 break; 1011 } 1012 } 1013 if (!pos0) { 1014 memset(&output[i], 0, sizeof(struct r600_bytecode_output)); 1015 output[i].gpr = 0; 1016 output[i].elem_size = 3; 1017 output[i].swizzle_x = 7; 1018 output[i].swizzle_y = 7; 1019 output[i].swizzle_z = 7; 1020 output[i].swizzle_w = 7; 1021 output[i].burst_count = 1; 1022 output[i].barrier = 1; 1023 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 1024 output[i].array_base = 0; 1025 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1026 noutput++; 1027 } 1028 } 1029 /* add fake pixel export */ 1030 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 1031 memset(&output[0], 0, sizeof(struct r600_bytecode_output)); 1032 output[0].gpr = 0; 1033 output[0].elem_size = 3; 1034 output[0].swizzle_x = 7; 1035 output[0].swizzle_y = 7; 1036 output[0].swizzle_z = 7; 1037 output[0].swizzle_w = 7; 1038 output[0].burst_count = 1; 1039 output[0].barrier = 1; 1040 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 1041 output[0].array_base = 0; 1042 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 1043 noutput++; 1044 } 1045 /* set export done on last export of each type */ 1046 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 1047 if (ctx.bc->chip_class < CAYMAN) { 1048 if (i == (noutput - 1)) { 1049 output[i].end_of_program = 1; 1050 } 1051 } 1052 if (!(output_done & (1 << output[i].type))) { 1053 output_done |= (1 << output[i].type); 1054 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 1055 } 1056 } 1057 /* add output to bytecode */ 1058 for (i = 0; i < noutput; i++) { 1059 r = r600_bytecode_add_output(ctx.bc, &output[i]); 1060 if (r) 1061 goto out_err; 1062 } 1063 /* add program end */ 1064 if (ctx.bc->chip_class == CAYMAN) 1065 cm_bytecode_add_cf_end(ctx.bc); 1066 1067 free(ctx.literals); 1068 tgsi_parse_free(&ctx.parse); 1069 return 0; 1070out_err: 1071 free(ctx.literals); 1072 tgsi_parse_free(&ctx.parse); 1073 return r; 1074} 1075 1076static int tgsi_unsupported(struct r600_shader_ctx *ctx) 1077{ 1078 R600_ERR("%s tgsi opcode unsupported\n", 1079 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); 1080 return -EINVAL; 1081} 1082 1083static int tgsi_end(struct r600_shader_ctx *ctx) 1084{ 1085 return 0; 1086} 1087 1088static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 1089 const struct r600_shader_src *shader_src, 1090 unsigned chan) 1091{ 1092 bc_src->sel = shader_src->sel; 1093 bc_src->chan = shader_src->swizzle[chan]; 1094 bc_src->neg = shader_src->neg; 1095 bc_src->abs = shader_src->abs; 1096 bc_src->rel = shader_src->rel; 1097 bc_src->value = shader_src->value[bc_src->chan]; 1098} 1099 1100static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 1101{ 1102 bc_src->abs = 1; 1103 bc_src->neg = 0; 1104} 1105 1106static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 1107{ 1108 bc_src->neg = !bc_src->neg; 1109} 1110 1111static void tgsi_dst(struct r600_shader_ctx *ctx, 1112 const struct tgsi_full_dst_register *tgsi_dst, 1113 unsigned swizzle, 1114 struct r600_bytecode_alu_dst *r600_dst) 1115{ 1116 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1117 1118 r600_dst->sel = tgsi_dst->Register.Index; 1119 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 1120 r600_dst->chan = swizzle; 1121 r600_dst->write = 1; 1122 if (tgsi_dst->Register.Indirect) 1123 r600_dst->rel = V_SQ_REL_RELATIVE; 1124 if (inst->Instruction.Saturate) { 1125 r600_dst->clamp = 1; 1126 } 1127} 1128 1129static int tgsi_last_instruction(unsigned writemask) 1130{ 1131 int i, lasti = 0; 1132 1133 for (i = 0; i < 4; i++) { 1134 if (writemask & (1 << i)) { 1135 lasti = i; 1136 } 1137 } 1138 return lasti; 1139} 1140 1141static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 1142{ 1143 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1144 struct r600_bytecode_alu alu; 1145 int i, j, r; 1146 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1147 1148 for (i = 0; i < lasti + 1; i++) { 1149 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1150 continue; 1151 1152 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1153 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1154 1155 alu.inst = ctx->inst_info->r600_opcode; 1156 if (!swap) { 1157 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1158 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1159 } 1160 } else { 1161 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 1162 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1163 } 1164 /* handle some special cases */ 1165 switch (ctx->inst_info->tgsi_opcode) { 1166 case TGSI_OPCODE_SUB: 1167 r600_bytecode_src_toggle_neg(&alu.src[1]); 1168 break; 1169 case TGSI_OPCODE_ABS: 1170 r600_bytecode_src_set_abs(&alu.src[0]); 1171 break; 1172 default: 1173 break; 1174 } 1175 if (i == lasti || trans_only) { 1176 alu.last = 1; 1177 } 1178 r = r600_bytecode_add_alu(ctx->bc, &alu); 1179 if (r) 1180 return r; 1181 } 1182 return 0; 1183} 1184 1185static int tgsi_op2(struct r600_shader_ctx *ctx) 1186{ 1187 return tgsi_op2_s(ctx, 0, 0); 1188} 1189 1190static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1191{ 1192 return tgsi_op2_s(ctx, 1, 0); 1193} 1194 1195static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 1196{ 1197 return tgsi_op2_s(ctx, 0, 1); 1198} 1199 1200static int tgsi_ineg(struct r600_shader_ctx *ctx) 1201{ 1202 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1203 struct r600_bytecode_alu alu; 1204 int i, r; 1205 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1206 1207 for (i = 0; i < lasti + 1; i++) { 1208 1209 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1210 continue; 1211 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1212 alu.inst = ctx->inst_info->r600_opcode; 1213 1214 alu.src[0].sel = V_SQ_ALU_SRC_0; 1215 1216 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1217 1218 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1219 1220 if (i == lasti) { 1221 alu.last = 1; 1222 } 1223 r = r600_bytecode_add_alu(ctx->bc, &alu); 1224 if (r) 1225 return r; 1226 } 1227 return 0; 1228 1229} 1230 1231static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 1232{ 1233 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1234 int i, j, r; 1235 struct r600_bytecode_alu alu; 1236 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1237 1238 for (i = 0 ; i < last_slot; i++) { 1239 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1240 alu.inst = ctx->inst_info->r600_opcode; 1241 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1242 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 1243 } 1244 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1245 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1246 1247 if (i == last_slot - 1) 1248 alu.last = 1; 1249 r = r600_bytecode_add_alu(ctx->bc, &alu); 1250 if (r) 1251 return r; 1252 } 1253 return 0; 1254} 1255 1256/* 1257 * r600 - trunc to -PI..PI range 1258 * r700 - normalize by dividing by 2PI 1259 * see fdo bug 27901 1260 */ 1261static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 1262{ 1263 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 1264 static float double_pi = 3.1415926535 * 2; 1265 static float neg_pi = -3.1415926535; 1266 1267 int r; 1268 struct r600_bytecode_alu alu; 1269 1270 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1271 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1272 alu.is_op3 = 1; 1273 1274 alu.dst.chan = 0; 1275 alu.dst.sel = ctx->temp_reg; 1276 alu.dst.write = 1; 1277 1278 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1279 1280 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1281 alu.src[1].chan = 0; 1282 alu.src[1].value = *(uint32_t *)&half_inv_pi; 1283 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1284 alu.src[2].chan = 0; 1285 alu.last = 1; 1286 r = r600_bytecode_add_alu(ctx->bc, &alu); 1287 if (r) 1288 return r; 1289 1290 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1291 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1292 1293 alu.dst.chan = 0; 1294 alu.dst.sel = ctx->temp_reg; 1295 alu.dst.write = 1; 1296 1297 alu.src[0].sel = ctx->temp_reg; 1298 alu.src[0].chan = 0; 1299 alu.last = 1; 1300 r = r600_bytecode_add_alu(ctx->bc, &alu); 1301 if (r) 1302 return r; 1303 1304 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1305 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1306 alu.is_op3 = 1; 1307 1308 alu.dst.chan = 0; 1309 alu.dst.sel = ctx->temp_reg; 1310 alu.dst.write = 1; 1311 1312 alu.src[0].sel = ctx->temp_reg; 1313 alu.src[0].chan = 0; 1314 1315 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1316 alu.src[1].chan = 0; 1317 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1318 alu.src[2].chan = 0; 1319 1320 if (ctx->bc->chip_class == R600) { 1321 alu.src[1].value = *(uint32_t *)&double_pi; 1322 alu.src[2].value = *(uint32_t *)&neg_pi; 1323 } else { 1324 alu.src[1].sel = V_SQ_ALU_SRC_1; 1325 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1326 alu.src[2].neg = 1; 1327 } 1328 1329 alu.last = 1; 1330 r = r600_bytecode_add_alu(ctx->bc, &alu); 1331 if (r) 1332 return r; 1333 return 0; 1334} 1335 1336static int cayman_trig(struct r600_shader_ctx *ctx) 1337{ 1338 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1339 struct r600_bytecode_alu alu; 1340 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1341 int i, r; 1342 1343 r = tgsi_setup_trig(ctx); 1344 if (r) 1345 return r; 1346 1347 1348 for (i = 0; i < last_slot; i++) { 1349 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1350 alu.inst = ctx->inst_info->r600_opcode; 1351 alu.dst.chan = i; 1352 1353 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1354 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1355 1356 alu.src[0].sel = ctx->temp_reg; 1357 alu.src[0].chan = 0; 1358 if (i == last_slot - 1) 1359 alu.last = 1; 1360 r = r600_bytecode_add_alu(ctx->bc, &alu); 1361 if (r) 1362 return r; 1363 } 1364 return 0; 1365} 1366 1367static int tgsi_trig(struct r600_shader_ctx *ctx) 1368{ 1369 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1370 struct r600_bytecode_alu alu; 1371 int i, r; 1372 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1373 1374 r = tgsi_setup_trig(ctx); 1375 if (r) 1376 return r; 1377 1378 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1379 alu.inst = ctx->inst_info->r600_opcode; 1380 alu.dst.chan = 0; 1381 alu.dst.sel = ctx->temp_reg; 1382 alu.dst.write = 1; 1383 1384 alu.src[0].sel = ctx->temp_reg; 1385 alu.src[0].chan = 0; 1386 alu.last = 1; 1387 r = r600_bytecode_add_alu(ctx->bc, &alu); 1388 if (r) 1389 return r; 1390 1391 /* replicate result */ 1392 for (i = 0; i < lasti + 1; i++) { 1393 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1394 continue; 1395 1396 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1397 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1398 1399 alu.src[0].sel = ctx->temp_reg; 1400 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1401 if (i == lasti) 1402 alu.last = 1; 1403 r = r600_bytecode_add_alu(ctx->bc, &alu); 1404 if (r) 1405 return r; 1406 } 1407 return 0; 1408} 1409 1410static int tgsi_scs(struct r600_shader_ctx *ctx) 1411{ 1412 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1413 struct r600_bytecode_alu alu; 1414 int i, r; 1415 1416 /* We'll only need the trig stuff if we are going to write to the 1417 * X or Y components of the destination vector. 1418 */ 1419 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1420 r = tgsi_setup_trig(ctx); 1421 if (r) 1422 return r; 1423 } 1424 1425 /* dst.x = COS */ 1426 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1427 if (ctx->bc->chip_class == CAYMAN) { 1428 for (i = 0 ; i < 3; i++) { 1429 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1430 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1431 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1432 1433 if (i == 0) 1434 alu.dst.write = 1; 1435 else 1436 alu.dst.write = 0; 1437 alu.src[0].sel = ctx->temp_reg; 1438 alu.src[0].chan = 0; 1439 if (i == 2) 1440 alu.last = 1; 1441 r = r600_bytecode_add_alu(ctx->bc, &alu); 1442 if (r) 1443 return r; 1444 } 1445 } else { 1446 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1447 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1448 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1449 1450 alu.src[0].sel = ctx->temp_reg; 1451 alu.src[0].chan = 0; 1452 alu.last = 1; 1453 r = r600_bytecode_add_alu(ctx->bc, &alu); 1454 if (r) 1455 return r; 1456 } 1457 } 1458 1459 /* dst.y = SIN */ 1460 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1461 if (ctx->bc->chip_class == CAYMAN) { 1462 for (i = 0 ; i < 3; i++) { 1463 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1464 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1465 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1466 if (i == 1) 1467 alu.dst.write = 1; 1468 else 1469 alu.dst.write = 0; 1470 alu.src[0].sel = ctx->temp_reg; 1471 alu.src[0].chan = 0; 1472 if (i == 2) 1473 alu.last = 1; 1474 r = r600_bytecode_add_alu(ctx->bc, &alu); 1475 if (r) 1476 return r; 1477 } 1478 } else { 1479 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1480 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1481 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1482 1483 alu.src[0].sel = ctx->temp_reg; 1484 alu.src[0].chan = 0; 1485 alu.last = 1; 1486 r = r600_bytecode_add_alu(ctx->bc, &alu); 1487 if (r) 1488 return r; 1489 } 1490 } 1491 1492 /* dst.z = 0.0; */ 1493 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1494 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1495 1496 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1497 1498 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1499 1500 alu.src[0].sel = V_SQ_ALU_SRC_0; 1501 alu.src[0].chan = 0; 1502 1503 alu.last = 1; 1504 1505 r = r600_bytecode_add_alu(ctx->bc, &alu); 1506 if (r) 1507 return r; 1508 } 1509 1510 /* dst.w = 1.0; */ 1511 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1512 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1513 1514 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1515 1516 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1517 1518 alu.src[0].sel = V_SQ_ALU_SRC_1; 1519 alu.src[0].chan = 0; 1520 1521 alu.last = 1; 1522 1523 r = r600_bytecode_add_alu(ctx->bc, &alu); 1524 if (r) 1525 return r; 1526 } 1527 1528 return 0; 1529} 1530 1531static int tgsi_kill(struct r600_shader_ctx *ctx) 1532{ 1533 struct r600_bytecode_alu alu; 1534 int i, r; 1535 1536 for (i = 0; i < 4; i++) { 1537 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1538 alu.inst = ctx->inst_info->r600_opcode; 1539 1540 alu.dst.chan = i; 1541 1542 alu.src[0].sel = V_SQ_ALU_SRC_0; 1543 1544 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1545 alu.src[1].sel = V_SQ_ALU_SRC_1; 1546 alu.src[1].neg = 1; 1547 } else { 1548 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1549 } 1550 if (i == 3) { 1551 alu.last = 1; 1552 } 1553 r = r600_bytecode_add_alu(ctx->bc, &alu); 1554 if (r) 1555 return r; 1556 } 1557 1558 /* kill must be last in ALU */ 1559 ctx->bc->force_add_cf = 1; 1560 ctx->shader->uses_kill = TRUE; 1561 return 0; 1562} 1563 1564static int tgsi_lit(struct r600_shader_ctx *ctx) 1565{ 1566 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1567 struct r600_bytecode_alu alu; 1568 int r; 1569 1570 /* tmp.x = max(src.y, 0.0) */ 1571 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1572 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1573 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 1574 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1575 alu.src[1].chan = 1; 1576 1577 alu.dst.sel = ctx->temp_reg; 1578 alu.dst.chan = 0; 1579 alu.dst.write = 1; 1580 1581 alu.last = 1; 1582 r = r600_bytecode_add_alu(ctx->bc, &alu); 1583 if (r) 1584 return r; 1585 1586 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1587 { 1588 int chan; 1589 int sel; 1590 int i; 1591 1592 if (ctx->bc->chip_class == CAYMAN) { 1593 for (i = 0; i < 3; i++) { 1594 /* tmp.z = log(tmp.x) */ 1595 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1596 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1597 alu.src[0].sel = ctx->temp_reg; 1598 alu.src[0].chan = 0; 1599 alu.dst.sel = ctx->temp_reg; 1600 alu.dst.chan = i; 1601 if (i == 2) { 1602 alu.dst.write = 1; 1603 alu.last = 1; 1604 } else 1605 alu.dst.write = 0; 1606 1607 r = r600_bytecode_add_alu(ctx->bc, &alu); 1608 if (r) 1609 return r; 1610 } 1611 } else { 1612 /* tmp.z = log(tmp.x) */ 1613 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1614 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1615 alu.src[0].sel = ctx->temp_reg; 1616 alu.src[0].chan = 0; 1617 alu.dst.sel = ctx->temp_reg; 1618 alu.dst.chan = 2; 1619 alu.dst.write = 1; 1620 alu.last = 1; 1621 r = r600_bytecode_add_alu(ctx->bc, &alu); 1622 if (r) 1623 return r; 1624 } 1625 1626 chan = alu.dst.chan; 1627 sel = alu.dst.sel; 1628 1629 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 1630 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1631 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1632 alu.src[0].sel = sel; 1633 alu.src[0].chan = chan; 1634 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 1635 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 1636 alu.dst.sel = ctx->temp_reg; 1637 alu.dst.chan = 0; 1638 alu.dst.write = 1; 1639 alu.is_op3 = 1; 1640 alu.last = 1; 1641 r = r600_bytecode_add_alu(ctx->bc, &alu); 1642 if (r) 1643 return r; 1644 1645 if (ctx->bc->chip_class == CAYMAN) { 1646 for (i = 0; i < 3; i++) { 1647 /* dst.z = exp(tmp.x) */ 1648 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1649 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1650 alu.src[0].sel = ctx->temp_reg; 1651 alu.src[0].chan = 0; 1652 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1653 if (i == 2) { 1654 alu.dst.write = 1; 1655 alu.last = 1; 1656 } else 1657 alu.dst.write = 0; 1658 r = r600_bytecode_add_alu(ctx->bc, &alu); 1659 if (r) 1660 return r; 1661 } 1662 } else { 1663 /* dst.z = exp(tmp.x) */ 1664 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1665 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1666 alu.src[0].sel = ctx->temp_reg; 1667 alu.src[0].chan = 0; 1668 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1669 alu.last = 1; 1670 r = r600_bytecode_add_alu(ctx->bc, &alu); 1671 if (r) 1672 return r; 1673 } 1674 } 1675 1676 /* dst.x, <- 1.0 */ 1677 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1678 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1679 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1680 alu.src[0].chan = 0; 1681 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1682 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1683 r = r600_bytecode_add_alu(ctx->bc, &alu); 1684 if (r) 1685 return r; 1686 1687 /* dst.y = max(src.x, 0.0) */ 1688 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1689 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1690 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1691 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1692 alu.src[1].chan = 0; 1693 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1694 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1695 r = r600_bytecode_add_alu(ctx->bc, &alu); 1696 if (r) 1697 return r; 1698 1699 /* dst.w, <- 1.0 */ 1700 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1701 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1702 alu.src[0].sel = V_SQ_ALU_SRC_1; 1703 alu.src[0].chan = 0; 1704 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1705 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1706 alu.last = 1; 1707 r = r600_bytecode_add_alu(ctx->bc, &alu); 1708 if (r) 1709 return r; 1710 1711 return 0; 1712} 1713 1714static int tgsi_rsq(struct r600_shader_ctx *ctx) 1715{ 1716 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1717 struct r600_bytecode_alu alu; 1718 int i, r; 1719 1720 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1721 1722 /* FIXME: 1723 * For state trackers other than OpenGL, we'll want to use 1724 * _RECIPSQRT_IEEE instead. 1725 */ 1726 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1727 1728 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1729 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 1730 r600_bytecode_src_set_abs(&alu.src[i]); 1731 } 1732 alu.dst.sel = ctx->temp_reg; 1733 alu.dst.write = 1; 1734 alu.last = 1; 1735 r = r600_bytecode_add_alu(ctx->bc, &alu); 1736 if (r) 1737 return r; 1738 /* replicate result */ 1739 return tgsi_helper_tempx_replicate(ctx); 1740} 1741 1742static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1743{ 1744 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1745 struct r600_bytecode_alu alu; 1746 int i, r; 1747 1748 for (i = 0; i < 4; i++) { 1749 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1750 alu.src[0].sel = ctx->temp_reg; 1751 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1752 alu.dst.chan = i; 1753 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1754 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1755 if (i == 3) 1756 alu.last = 1; 1757 r = r600_bytecode_add_alu(ctx->bc, &alu); 1758 if (r) 1759 return r; 1760 } 1761 return 0; 1762} 1763 1764static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1765{ 1766 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1767 struct r600_bytecode_alu alu; 1768 int i, r; 1769 1770 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1771 alu.inst = ctx->inst_info->r600_opcode; 1772 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1773 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 1774 } 1775 alu.dst.sel = ctx->temp_reg; 1776 alu.dst.write = 1; 1777 alu.last = 1; 1778 r = r600_bytecode_add_alu(ctx->bc, &alu); 1779 if (r) 1780 return r; 1781 /* replicate result */ 1782 return tgsi_helper_tempx_replicate(ctx); 1783} 1784 1785static int cayman_pow(struct r600_shader_ctx *ctx) 1786{ 1787 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1788 int i, r; 1789 struct r600_bytecode_alu alu; 1790 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1791 1792 for (i = 0; i < 3; i++) { 1793 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1794 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1795 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1796 alu.dst.sel = ctx->temp_reg; 1797 alu.dst.chan = i; 1798 alu.dst.write = 1; 1799 if (i == 2) 1800 alu.last = 1; 1801 r = r600_bytecode_add_alu(ctx->bc, &alu); 1802 if (r) 1803 return r; 1804 } 1805 1806 /* b * LOG2(a) */ 1807 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1808 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1809 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 1810 alu.src[1].sel = ctx->temp_reg; 1811 alu.dst.sel = ctx->temp_reg; 1812 alu.dst.write = 1; 1813 alu.last = 1; 1814 r = r600_bytecode_add_alu(ctx->bc, &alu); 1815 if (r) 1816 return r; 1817 1818 for (i = 0; i < last_slot; i++) { 1819 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1820 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1821 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1822 alu.src[0].sel = ctx->temp_reg; 1823 1824 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1825 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1826 if (i == last_slot - 1) 1827 alu.last = 1; 1828 r = r600_bytecode_add_alu(ctx->bc, &alu); 1829 if (r) 1830 return r; 1831 } 1832 return 0; 1833} 1834 1835static int tgsi_pow(struct r600_shader_ctx *ctx) 1836{ 1837 struct r600_bytecode_alu alu; 1838 int r; 1839 1840 /* LOG2(a) */ 1841 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1842 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1843 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1844 alu.dst.sel = ctx->temp_reg; 1845 alu.dst.write = 1; 1846 alu.last = 1; 1847 r = r600_bytecode_add_alu(ctx->bc, &alu); 1848 if (r) 1849 return r; 1850 /* b * LOG2(a) */ 1851 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1852 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1853 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 1854 alu.src[1].sel = ctx->temp_reg; 1855 alu.dst.sel = ctx->temp_reg; 1856 alu.dst.write = 1; 1857 alu.last = 1; 1858 r = r600_bytecode_add_alu(ctx->bc, &alu); 1859 if (r) 1860 return r; 1861 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1862 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1863 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1864 alu.src[0].sel = ctx->temp_reg; 1865 alu.dst.sel = ctx->temp_reg; 1866 alu.dst.write = 1; 1867 alu.last = 1; 1868 r = r600_bytecode_add_alu(ctx->bc, &alu); 1869 if (r) 1870 return r; 1871 return tgsi_helper_tempx_replicate(ctx); 1872} 1873 1874static int tgsi_ssg(struct r600_shader_ctx *ctx) 1875{ 1876 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1877 struct r600_bytecode_alu alu; 1878 int i, r; 1879 1880 /* tmp = (src > 0 ? 1 : src) */ 1881 for (i = 0; i < 4; i++) { 1882 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1883 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1884 alu.is_op3 = 1; 1885 1886 alu.dst.sel = ctx->temp_reg; 1887 alu.dst.chan = i; 1888 1889 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 1890 alu.src[1].sel = V_SQ_ALU_SRC_1; 1891 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 1892 1893 if (i == 3) 1894 alu.last = 1; 1895 r = r600_bytecode_add_alu(ctx->bc, &alu); 1896 if (r) 1897 return r; 1898 } 1899 1900 /* dst = (-tmp > 0 ? -1 : tmp) */ 1901 for (i = 0; i < 4; i++) { 1902 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1903 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1904 alu.is_op3 = 1; 1905 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1906 1907 alu.src[0].sel = ctx->temp_reg; 1908 alu.src[0].chan = i; 1909 alu.src[0].neg = 1; 1910 1911 alu.src[1].sel = V_SQ_ALU_SRC_1; 1912 alu.src[1].neg = 1; 1913 1914 alu.src[2].sel = ctx->temp_reg; 1915 alu.src[2].chan = i; 1916 1917 if (i == 3) 1918 alu.last = 1; 1919 r = r600_bytecode_add_alu(ctx->bc, &alu); 1920 if (r) 1921 return r; 1922 } 1923 return 0; 1924} 1925 1926static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1927{ 1928 struct r600_bytecode_alu alu; 1929 int i, r; 1930 1931 for (i = 0; i < 4; i++) { 1932 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1933 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1934 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1935 alu.dst.chan = i; 1936 } else { 1937 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1938 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1939 alu.src[0].sel = ctx->temp_reg; 1940 alu.src[0].chan = i; 1941 } 1942 if (i == 3) { 1943 alu.last = 1; 1944 } 1945 r = r600_bytecode_add_alu(ctx->bc, &alu); 1946 if (r) 1947 return r; 1948 } 1949 return 0; 1950} 1951 1952static int tgsi_op3(struct r600_shader_ctx *ctx) 1953{ 1954 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1955 struct r600_bytecode_alu alu; 1956 int i, j, r; 1957 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1958 1959 for (i = 0; i < lasti + 1; i++) { 1960 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1961 continue; 1962 1963 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1964 alu.inst = ctx->inst_info->r600_opcode; 1965 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1966 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1967 } 1968 1969 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1970 alu.dst.chan = i; 1971 alu.dst.write = 1; 1972 alu.is_op3 = 1; 1973 if (i == lasti) { 1974 alu.last = 1; 1975 } 1976 r = r600_bytecode_add_alu(ctx->bc, &alu); 1977 if (r) 1978 return r; 1979 } 1980 return 0; 1981} 1982 1983static int tgsi_dp(struct r600_shader_ctx *ctx) 1984{ 1985 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1986 struct r600_bytecode_alu alu; 1987 int i, j, r; 1988 1989 for (i = 0; i < 4; i++) { 1990 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1991 alu.inst = ctx->inst_info->r600_opcode; 1992 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1993 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1994 } 1995 1996 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1997 alu.dst.chan = i; 1998 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1999 /* handle some special cases */ 2000 switch (ctx->inst_info->tgsi_opcode) { 2001 case TGSI_OPCODE_DP2: 2002 if (i > 1) { 2003 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 2004 alu.src[0].chan = alu.src[1].chan = 0; 2005 } 2006 break; 2007 case TGSI_OPCODE_DP3: 2008 if (i > 2) { 2009 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 2010 alu.src[0].chan = alu.src[1].chan = 0; 2011 } 2012 break; 2013 case TGSI_OPCODE_DPH: 2014 if (i == 3) { 2015 alu.src[0].sel = V_SQ_ALU_SRC_1; 2016 alu.src[0].chan = 0; 2017 alu.src[0].neg = 0; 2018 } 2019 break; 2020 default: 2021 break; 2022 } 2023 if (i == 3) { 2024 alu.last = 1; 2025 } 2026 r = r600_bytecode_add_alu(ctx->bc, &alu); 2027 if (r) 2028 return r; 2029 } 2030 return 0; 2031} 2032 2033static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 2034 unsigned index) 2035{ 2036 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2037 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 2038 inst->Src[index].Register.File != TGSI_FILE_INPUT) || 2039 ctx->src[index].neg || ctx->src[index].abs; 2040} 2041 2042static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 2043 unsigned index) 2044{ 2045 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2046 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 2047} 2048 2049static int tgsi_tex(struct r600_shader_ctx *ctx) 2050{ 2051 static float one_point_five = 1.5f; 2052 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2053 struct r600_bytecode_tex tex; 2054 struct r600_bytecode_alu alu; 2055 unsigned src_gpr; 2056 int r, i, j; 2057 int opcode; 2058 /* Texture fetch instructions can only use gprs as source. 2059 * Also they cannot negate the source or take the absolute value */ 2060 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0); 2061 boolean src_loaded = FALSE; 2062 unsigned sampler_src_reg = 1; 2063 u8 offset_x = 0, offset_y = 0, offset_z = 0; 2064 2065 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 2066 2067 if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 2068 /* get offset values */ 2069 if (inst->Texture.NumOffsets) { 2070 assert(inst->Texture.NumOffsets == 1); 2071 2072 offset_x = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 2073 offset_y = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 2074 offset_z = ctx->literals[inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 2075 } 2076 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 2077 /* TGSI moves the sampler to src reg 3 for TXD */ 2078 sampler_src_reg = 3; 2079 2080 for (i = 1; i < 3; i++) { 2081 /* set gradients h/v */ 2082 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 2083 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : 2084 SQ_TEX_INST_SET_GRADIENTS_V; 2085 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 2086 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 2087 2088 if (tgsi_tex_src_requires_loading(ctx, i)) { 2089 tex.src_gpr = r600_get_temp(ctx); 2090 tex.src_sel_x = 0; 2091 tex.src_sel_y = 1; 2092 tex.src_sel_z = 2; 2093 tex.src_sel_w = 3; 2094 2095 for (j = 0; j < 4; j++) { 2096 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2097 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2098 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 2099 alu.dst.sel = tex.src_gpr; 2100 alu.dst.chan = j; 2101 if (j == 3) 2102 alu.last = 1; 2103 alu.dst.write = 1; 2104 r = r600_bytecode_add_alu(ctx->bc, &alu); 2105 if (r) 2106 return r; 2107 } 2108 2109 } else { 2110 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); 2111 tex.src_sel_x = ctx->src[i].swizzle[0]; 2112 tex.src_sel_y = ctx->src[i].swizzle[1]; 2113 tex.src_sel_z = ctx->src[i].swizzle[2]; 2114 tex.src_sel_w = ctx->src[i].swizzle[3]; 2115 tex.src_rel = ctx->src[i].rel; 2116 } 2117 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ 2118 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 2119 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 2120 tex.coord_type_x = 1; 2121 tex.coord_type_y = 1; 2122 tex.coord_type_z = 1; 2123 tex.coord_type_w = 1; 2124 } 2125 r = r600_bytecode_add_tex(ctx->bc, &tex); 2126 if (r) 2127 return r; 2128 } 2129 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 2130 int out_chan; 2131 /* Add perspective divide */ 2132 if (ctx->bc->chip_class == CAYMAN) { 2133 out_chan = 2; 2134 for (i = 0; i < 3; i++) { 2135 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2136 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2137 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 2138 2139 alu.dst.sel = ctx->temp_reg; 2140 alu.dst.chan = i; 2141 if (i == 2) 2142 alu.last = 1; 2143 if (out_chan == i) 2144 alu.dst.write = 1; 2145 r = r600_bytecode_add_alu(ctx->bc, &alu); 2146 if (r) 2147 return r; 2148 } 2149 2150 } else { 2151 out_chan = 3; 2152 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2153 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2154 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 2155 2156 alu.dst.sel = ctx->temp_reg; 2157 alu.dst.chan = out_chan; 2158 alu.last = 1; 2159 alu.dst.write = 1; 2160 r = r600_bytecode_add_alu(ctx->bc, &alu); 2161 if (r) 2162 return r; 2163 } 2164 2165 for (i = 0; i < 3; i++) { 2166 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2167 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2168 alu.src[0].sel = ctx->temp_reg; 2169 alu.src[0].chan = out_chan; 2170 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2171 alu.dst.sel = ctx->temp_reg; 2172 alu.dst.chan = i; 2173 alu.dst.write = 1; 2174 r = r600_bytecode_add_alu(ctx->bc, &alu); 2175 if (r) 2176 return r; 2177 } 2178 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2179 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2180 alu.src[0].sel = V_SQ_ALU_SRC_1; 2181 alu.src[0].chan = 0; 2182 alu.dst.sel = ctx->temp_reg; 2183 alu.dst.chan = 3; 2184 alu.last = 1; 2185 alu.dst.write = 1; 2186 r = r600_bytecode_add_alu(ctx->bc, &alu); 2187 if (r) 2188 return r; 2189 src_loaded = TRUE; 2190 src_gpr = ctx->temp_reg; 2191 } 2192 2193 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2194 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 2195 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 2196 2197 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 2198 for (i = 0; i < 4; i++) { 2199 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2200 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 2201 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 2202 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 2203 alu.dst.sel = ctx->temp_reg; 2204 alu.dst.chan = i; 2205 if (i == 3) 2206 alu.last = 1; 2207 alu.dst.write = 1; 2208 r = r600_bytecode_add_alu(ctx->bc, &alu); 2209 if (r) 2210 return r; 2211 } 2212 2213 /* tmp1.z = RCP_e(|tmp1.z|) */ 2214 if (ctx->bc->chip_class == CAYMAN) { 2215 for (i = 0; i < 3; i++) { 2216 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2217 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2218 alu.src[0].sel = ctx->temp_reg; 2219 alu.src[0].chan = 2; 2220 alu.src[0].abs = 1; 2221 alu.dst.sel = ctx->temp_reg; 2222 alu.dst.chan = i; 2223 if (i == 2) 2224 alu.dst.write = 1; 2225 if (i == 2) 2226 alu.last = 1; 2227 r = r600_bytecode_add_alu(ctx->bc, &alu); 2228 if (r) 2229 return r; 2230 } 2231 } else { 2232 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2233 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2234 alu.src[0].sel = ctx->temp_reg; 2235 alu.src[0].chan = 2; 2236 alu.src[0].abs = 1; 2237 alu.dst.sel = ctx->temp_reg; 2238 alu.dst.chan = 2; 2239 alu.dst.write = 1; 2240 alu.last = 1; 2241 r = r600_bytecode_add_alu(ctx->bc, &alu); 2242 if (r) 2243 return r; 2244 } 2245 2246 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 2247 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 2248 * muladd has no writemask, have to use another temp 2249 */ 2250 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2251 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2252 alu.is_op3 = 1; 2253 2254 alu.src[0].sel = ctx->temp_reg; 2255 alu.src[0].chan = 0; 2256 alu.src[1].sel = ctx->temp_reg; 2257 alu.src[1].chan = 2; 2258 2259 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 2260 alu.src[2].chan = 0; 2261 alu.src[2].value = *(uint32_t *)&one_point_five; 2262 2263 alu.dst.sel = ctx->temp_reg; 2264 alu.dst.chan = 0; 2265 alu.dst.write = 1; 2266 2267 r = r600_bytecode_add_alu(ctx->bc, &alu); 2268 if (r) 2269 return r; 2270 2271 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2272 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2273 alu.is_op3 = 1; 2274 2275 alu.src[0].sel = ctx->temp_reg; 2276 alu.src[0].chan = 1; 2277 alu.src[1].sel = ctx->temp_reg; 2278 alu.src[1].chan = 2; 2279 2280 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 2281 alu.src[2].chan = 0; 2282 alu.src[2].value = *(uint32_t *)&one_point_five; 2283 2284 alu.dst.sel = ctx->temp_reg; 2285 alu.dst.chan = 1; 2286 alu.dst.write = 1; 2287 2288 alu.last = 1; 2289 r = r600_bytecode_add_alu(ctx->bc, &alu); 2290 if (r) 2291 return r; 2292 2293 src_loaded = TRUE; 2294 src_gpr = ctx->temp_reg; 2295 } 2296 2297 if (src_requires_loading && !src_loaded) { 2298 for (i = 0; i < 4; i++) { 2299 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2300 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2301 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2302 alu.dst.sel = ctx->temp_reg; 2303 alu.dst.chan = i; 2304 if (i == 3) 2305 alu.last = 1; 2306 alu.dst.write = 1; 2307 r = r600_bytecode_add_alu(ctx->bc, &alu); 2308 if (r) 2309 return r; 2310 } 2311 src_loaded = TRUE; 2312 src_gpr = ctx->temp_reg; 2313 } 2314 2315 opcode = ctx->inst_info->r600_opcode; 2316 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 2317 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 2318 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 2319 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 2320 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) { 2321 switch (opcode) { 2322 case SQ_TEX_INST_SAMPLE: 2323 opcode = SQ_TEX_INST_SAMPLE_C; 2324 break; 2325 case SQ_TEX_INST_SAMPLE_L: 2326 opcode = SQ_TEX_INST_SAMPLE_C_L; 2327 break; 2328 case SQ_TEX_INST_SAMPLE_LB: 2329 opcode = SQ_TEX_INST_SAMPLE_C_LB; 2330 break; 2331 case SQ_TEX_INST_SAMPLE_G: 2332 opcode = SQ_TEX_INST_SAMPLE_C_G; 2333 break; 2334 } 2335 } 2336 2337 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 2338 tex.inst = opcode; 2339 2340 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 2341 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 2342 tex.src_gpr = src_gpr; 2343 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 2344 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 2345 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 2346 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 2347 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 2348 if (src_loaded) { 2349 tex.src_sel_x = 0; 2350 tex.src_sel_y = 1; 2351 tex.src_sel_z = 2; 2352 tex.src_sel_w = 3; 2353 } else { 2354 tex.src_sel_x = ctx->src[0].swizzle[0]; 2355 tex.src_sel_y = ctx->src[0].swizzle[1]; 2356 tex.src_sel_z = ctx->src[0].swizzle[2]; 2357 tex.src_sel_w = ctx->src[0].swizzle[3]; 2358 tex.src_rel = ctx->src[0].rel; 2359 } 2360 2361 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2362 tex.src_sel_x = 1; 2363 tex.src_sel_y = 0; 2364 tex.src_sel_z = 3; 2365 tex.src_sel_w = 1; 2366 } 2367 2368 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 2369 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 2370 tex.coord_type_x = 1; 2371 tex.coord_type_y = 1; 2372 } 2373 tex.coord_type_z = 1; 2374 tex.coord_type_w = 1; 2375 2376 tex.offset_x = offset_x; 2377 tex.offset_y = offset_y; 2378 tex.offset_z = offset_z; 2379 2380 /* Put the depth for comparison in W. 2381 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 2382 * Some instructions expect the depth in Z. */ 2383 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 2384 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 2385 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 2386 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 2387 opcode != SQ_TEX_INST_SAMPLE_C_L && 2388 opcode != SQ_TEX_INST_SAMPLE_C_LB) { 2389 tex.src_sel_w = tex.src_sel_z; 2390 } 2391 2392 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 2393 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 2394 if (opcode == SQ_TEX_INST_SAMPLE_C_L || 2395 opcode == SQ_TEX_INST_SAMPLE_C_LB) { 2396 /* the array index is read from Y */ 2397 tex.coord_type_y = 0; 2398 } else { 2399 /* the array index is read from Z */ 2400 tex.coord_type_z = 0; 2401 tex.src_sel_z = tex.src_sel_y; 2402 } 2403 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 2404 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) 2405 /* the array index is read from Z */ 2406 tex.coord_type_z = 0; 2407 2408 r = r600_bytecode_add_tex(ctx->bc, &tex); 2409 if (r) 2410 return r; 2411 2412 /* add shadow ambient support - gallium doesn't do it yet */ 2413 return 0; 2414} 2415 2416static int tgsi_lrp(struct r600_shader_ctx *ctx) 2417{ 2418 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2419 struct r600_bytecode_alu alu; 2420 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2421 unsigned i; 2422 int r; 2423 2424 /* optimize if it's just an equal balance */ 2425 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 2426 for (i = 0; i < lasti + 1; i++) { 2427 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2428 continue; 2429 2430 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2431 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2432 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2433 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 2434 alu.omod = 3; 2435 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2436 alu.dst.chan = i; 2437 if (i == lasti) { 2438 alu.last = 1; 2439 } 2440 r = r600_bytecode_add_alu(ctx->bc, &alu); 2441 if (r) 2442 return r; 2443 } 2444 return 0; 2445 } 2446 2447 /* 1 - src0 */ 2448 for (i = 0; i < lasti + 1; i++) { 2449 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2450 continue; 2451 2452 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2453 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2454 alu.src[0].sel = V_SQ_ALU_SRC_1; 2455 alu.src[0].chan = 0; 2456 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2457 r600_bytecode_src_toggle_neg(&alu.src[1]); 2458 alu.dst.sel = ctx->temp_reg; 2459 alu.dst.chan = i; 2460 if (i == lasti) { 2461 alu.last = 1; 2462 } 2463 alu.dst.write = 1; 2464 r = r600_bytecode_add_alu(ctx->bc, &alu); 2465 if (r) 2466 return r; 2467 } 2468 2469 /* (1 - src0) * src2 */ 2470 for (i = 0; i < lasti + 1; i++) { 2471 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2472 continue; 2473 2474 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2475 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2476 alu.src[0].sel = ctx->temp_reg; 2477 alu.src[0].chan = i; 2478 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 2479 alu.dst.sel = ctx->temp_reg; 2480 alu.dst.chan = i; 2481 if (i == lasti) { 2482 alu.last = 1; 2483 } 2484 alu.dst.write = 1; 2485 r = r600_bytecode_add_alu(ctx->bc, &alu); 2486 if (r) 2487 return r; 2488 } 2489 2490 /* src0 * src1 + (1 - src0) * src2 */ 2491 for (i = 0; i < lasti + 1; i++) { 2492 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2493 continue; 2494 2495 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2496 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2497 alu.is_op3 = 1; 2498 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2499 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2500 alu.src[2].sel = ctx->temp_reg; 2501 alu.src[2].chan = i; 2502 2503 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2504 alu.dst.chan = i; 2505 if (i == lasti) { 2506 alu.last = 1; 2507 } 2508 r = r600_bytecode_add_alu(ctx->bc, &alu); 2509 if (r) 2510 return r; 2511 } 2512 return 0; 2513} 2514 2515static int tgsi_cmp(struct r600_shader_ctx *ctx) 2516{ 2517 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2518 struct r600_bytecode_alu alu; 2519 int i, r; 2520 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2521 2522 for (i = 0; i < lasti + 1; i++) { 2523 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2524 continue; 2525 2526 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2527 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2528 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2529 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 2530 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 2531 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2532 alu.dst.chan = i; 2533 alu.dst.write = 1; 2534 alu.is_op3 = 1; 2535 if (i == lasti) 2536 alu.last = 1; 2537 r = r600_bytecode_add_alu(ctx->bc, &alu); 2538 if (r) 2539 return r; 2540 } 2541 return 0; 2542} 2543 2544static int tgsi_xpd(struct r600_shader_ctx *ctx) 2545{ 2546 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2547 static const unsigned int src0_swizzle[] = {2, 0, 1}; 2548 static const unsigned int src1_swizzle[] = {1, 2, 0}; 2549 struct r600_bytecode_alu alu; 2550 uint32_t use_temp = 0; 2551 int i, r; 2552 2553 if (inst->Dst[0].Register.WriteMask != 0xf) 2554 use_temp = 1; 2555 2556 for (i = 0; i < 4; i++) { 2557 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2558 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2559 if (i < 3) { 2560 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 2561 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 2562 } else { 2563 alu.src[0].sel = V_SQ_ALU_SRC_0; 2564 alu.src[0].chan = i; 2565 alu.src[1].sel = V_SQ_ALU_SRC_0; 2566 alu.src[1].chan = i; 2567 } 2568 2569 alu.dst.sel = ctx->temp_reg; 2570 alu.dst.chan = i; 2571 alu.dst.write = 1; 2572 2573 if (i == 3) 2574 alu.last = 1; 2575 r = r600_bytecode_add_alu(ctx->bc, &alu); 2576 if (r) 2577 return r; 2578 } 2579 2580 for (i = 0; i < 4; i++) { 2581 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2582 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2583 2584 if (i < 3) { 2585 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 2586 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 2587 } else { 2588 alu.src[0].sel = V_SQ_ALU_SRC_0; 2589 alu.src[0].chan = i; 2590 alu.src[1].sel = V_SQ_ALU_SRC_0; 2591 alu.src[1].chan = i; 2592 } 2593 2594 alu.src[2].sel = ctx->temp_reg; 2595 alu.src[2].neg = 1; 2596 alu.src[2].chan = i; 2597 2598 if (use_temp) 2599 alu.dst.sel = ctx->temp_reg; 2600 else 2601 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2602 alu.dst.chan = i; 2603 alu.dst.write = 1; 2604 alu.is_op3 = 1; 2605 if (i == 3) 2606 alu.last = 1; 2607 r = r600_bytecode_add_alu(ctx->bc, &alu); 2608 if (r) 2609 return r; 2610 } 2611 if (use_temp) 2612 return tgsi_helper_copy(ctx, inst); 2613 return 0; 2614} 2615 2616static int tgsi_exp(struct r600_shader_ctx *ctx) 2617{ 2618 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2619 struct r600_bytecode_alu alu; 2620 int r; 2621 int i; 2622 2623 /* result.x = 2^floor(src); */ 2624 if (inst->Dst[0].Register.WriteMask & 1) { 2625 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2626 2627 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2628 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2629 2630 alu.dst.sel = ctx->temp_reg; 2631 alu.dst.chan = 0; 2632 alu.dst.write = 1; 2633 alu.last = 1; 2634 r = r600_bytecode_add_alu(ctx->bc, &alu); 2635 if (r) 2636 return r; 2637 2638 if (ctx->bc->chip_class == CAYMAN) { 2639 for (i = 0; i < 3; i++) { 2640 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2641 alu.src[0].sel = ctx->temp_reg; 2642 alu.src[0].chan = 0; 2643 2644 alu.dst.sel = ctx->temp_reg; 2645 alu.dst.chan = i; 2646 if (i == 0) 2647 alu.dst.write = 1; 2648 if (i == 2) 2649 alu.last = 1; 2650 r = r600_bytecode_add_alu(ctx->bc, &alu); 2651 if (r) 2652 return r; 2653 } 2654 } else { 2655 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2656 alu.src[0].sel = ctx->temp_reg; 2657 alu.src[0].chan = 0; 2658 2659 alu.dst.sel = ctx->temp_reg; 2660 alu.dst.chan = 0; 2661 alu.dst.write = 1; 2662 alu.last = 1; 2663 r = r600_bytecode_add_alu(ctx->bc, &alu); 2664 if (r) 2665 return r; 2666 } 2667 } 2668 2669 /* result.y = tmp - floor(tmp); */ 2670 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2671 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2672 2673 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2674 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2675 2676 alu.dst.sel = ctx->temp_reg; 2677#if 0 2678 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2679 if (r) 2680 return r; 2681#endif 2682 alu.dst.write = 1; 2683 alu.dst.chan = 1; 2684 2685 alu.last = 1; 2686 2687 r = r600_bytecode_add_alu(ctx->bc, &alu); 2688 if (r) 2689 return r; 2690 } 2691 2692 /* result.z = RoughApprox2ToX(tmp);*/ 2693 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2694 if (ctx->bc->chip_class == CAYMAN) { 2695 for (i = 0; i < 3; i++) { 2696 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2697 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2698 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2699 2700 alu.dst.sel = ctx->temp_reg; 2701 alu.dst.chan = i; 2702 if (i == 2) { 2703 alu.dst.write = 1; 2704 alu.last = 1; 2705 } 2706 2707 r = r600_bytecode_add_alu(ctx->bc, &alu); 2708 if (r) 2709 return r; 2710 } 2711 } else { 2712 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2713 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2714 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2715 2716 alu.dst.sel = ctx->temp_reg; 2717 alu.dst.write = 1; 2718 alu.dst.chan = 2; 2719 2720 alu.last = 1; 2721 2722 r = r600_bytecode_add_alu(ctx->bc, &alu); 2723 if (r) 2724 return r; 2725 } 2726 } 2727 2728 /* result.w = 1.0;*/ 2729 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2730 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2731 2732 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2733 alu.src[0].sel = V_SQ_ALU_SRC_1; 2734 alu.src[0].chan = 0; 2735 2736 alu.dst.sel = ctx->temp_reg; 2737 alu.dst.chan = 3; 2738 alu.dst.write = 1; 2739 alu.last = 1; 2740 r = r600_bytecode_add_alu(ctx->bc, &alu); 2741 if (r) 2742 return r; 2743 } 2744 return tgsi_helper_copy(ctx, inst); 2745} 2746 2747static int tgsi_log(struct r600_shader_ctx *ctx) 2748{ 2749 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2750 struct r600_bytecode_alu alu; 2751 int r; 2752 int i; 2753 2754 /* result.x = floor(log2(|src|)); */ 2755 if (inst->Dst[0].Register.WriteMask & 1) { 2756 if (ctx->bc->chip_class == CAYMAN) { 2757 for (i = 0; i < 3; i++) { 2758 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2759 2760 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2761 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2762 r600_bytecode_src_set_abs(&alu.src[0]); 2763 2764 alu.dst.sel = ctx->temp_reg; 2765 alu.dst.chan = i; 2766 if (i == 0) 2767 alu.dst.write = 1; 2768 if (i == 2) 2769 alu.last = 1; 2770 r = r600_bytecode_add_alu(ctx->bc, &alu); 2771 if (r) 2772 return r; 2773 } 2774 2775 } else { 2776 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2777 2778 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2779 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2780 r600_bytecode_src_set_abs(&alu.src[0]); 2781 2782 alu.dst.sel = ctx->temp_reg; 2783 alu.dst.chan = 0; 2784 alu.dst.write = 1; 2785 alu.last = 1; 2786 r = r600_bytecode_add_alu(ctx->bc, &alu); 2787 if (r) 2788 return r; 2789 } 2790 2791 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2792 alu.src[0].sel = ctx->temp_reg; 2793 alu.src[0].chan = 0; 2794 2795 alu.dst.sel = ctx->temp_reg; 2796 alu.dst.chan = 0; 2797 alu.dst.write = 1; 2798 alu.last = 1; 2799 2800 r = r600_bytecode_add_alu(ctx->bc, &alu); 2801 if (r) 2802 return r; 2803 } 2804 2805 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 2806 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2807 2808 if (ctx->bc->chip_class == CAYMAN) { 2809 for (i = 0; i < 3; i++) { 2810 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2811 2812 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2813 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2814 r600_bytecode_src_set_abs(&alu.src[0]); 2815 2816 alu.dst.sel = ctx->temp_reg; 2817 alu.dst.chan = i; 2818 if (i == 1) 2819 alu.dst.write = 1; 2820 if (i == 2) 2821 alu.last = 1; 2822 2823 r = r600_bytecode_add_alu(ctx->bc, &alu); 2824 if (r) 2825 return r; 2826 } 2827 } else { 2828 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2829 2830 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2831 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2832 r600_bytecode_src_set_abs(&alu.src[0]); 2833 2834 alu.dst.sel = ctx->temp_reg; 2835 alu.dst.chan = 1; 2836 alu.dst.write = 1; 2837 alu.last = 1; 2838 2839 r = r600_bytecode_add_alu(ctx->bc, &alu); 2840 if (r) 2841 return r; 2842 } 2843 2844 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2845 2846 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2847 alu.src[0].sel = ctx->temp_reg; 2848 alu.src[0].chan = 1; 2849 2850 alu.dst.sel = ctx->temp_reg; 2851 alu.dst.chan = 1; 2852 alu.dst.write = 1; 2853 alu.last = 1; 2854 2855 r = r600_bytecode_add_alu(ctx->bc, &alu); 2856 if (r) 2857 return r; 2858 2859 if (ctx->bc->chip_class == CAYMAN) { 2860 for (i = 0; i < 3; i++) { 2861 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2862 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2863 alu.src[0].sel = ctx->temp_reg; 2864 alu.src[0].chan = 1; 2865 2866 alu.dst.sel = ctx->temp_reg; 2867 alu.dst.chan = i; 2868 if (i == 1) 2869 alu.dst.write = 1; 2870 if (i == 2) 2871 alu.last = 1; 2872 2873 r = r600_bytecode_add_alu(ctx->bc, &alu); 2874 if (r) 2875 return r; 2876 } 2877 } else { 2878 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2879 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2880 alu.src[0].sel = ctx->temp_reg; 2881 alu.src[0].chan = 1; 2882 2883 alu.dst.sel = ctx->temp_reg; 2884 alu.dst.chan = 1; 2885 alu.dst.write = 1; 2886 alu.last = 1; 2887 2888 r = r600_bytecode_add_alu(ctx->bc, &alu); 2889 if (r) 2890 return r; 2891 } 2892 2893 if (ctx->bc->chip_class == CAYMAN) { 2894 for (i = 0; i < 3; i++) { 2895 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2896 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2897 alu.src[0].sel = ctx->temp_reg; 2898 alu.src[0].chan = 1; 2899 2900 alu.dst.sel = ctx->temp_reg; 2901 alu.dst.chan = i; 2902 if (i == 1) 2903 alu.dst.write = 1; 2904 if (i == 2) 2905 alu.last = 1; 2906 2907 r = r600_bytecode_add_alu(ctx->bc, &alu); 2908 if (r) 2909 return r; 2910 } 2911 } else { 2912 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2913 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2914 alu.src[0].sel = ctx->temp_reg; 2915 alu.src[0].chan = 1; 2916 2917 alu.dst.sel = ctx->temp_reg; 2918 alu.dst.chan = 1; 2919 alu.dst.write = 1; 2920 alu.last = 1; 2921 2922 r = r600_bytecode_add_alu(ctx->bc, &alu); 2923 if (r) 2924 return r; 2925 } 2926 2927 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2928 2929 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2930 2931 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2932 r600_bytecode_src_set_abs(&alu.src[0]); 2933 2934 alu.src[1].sel = ctx->temp_reg; 2935 alu.src[1].chan = 1; 2936 2937 alu.dst.sel = ctx->temp_reg; 2938 alu.dst.chan = 1; 2939 alu.dst.write = 1; 2940 alu.last = 1; 2941 2942 r = r600_bytecode_add_alu(ctx->bc, &alu); 2943 if (r) 2944 return r; 2945 } 2946 2947 /* result.z = log2(|src|);*/ 2948 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2949 if (ctx->bc->chip_class == CAYMAN) { 2950 for (i = 0; i < 3; i++) { 2951 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2952 2953 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2954 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2955 r600_bytecode_src_set_abs(&alu.src[0]); 2956 2957 alu.dst.sel = ctx->temp_reg; 2958 if (i == 2) 2959 alu.dst.write = 1; 2960 alu.dst.chan = i; 2961 if (i == 2) 2962 alu.last = 1; 2963 2964 r = r600_bytecode_add_alu(ctx->bc, &alu); 2965 if (r) 2966 return r; 2967 } 2968 } else { 2969 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2970 2971 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2972 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2973 r600_bytecode_src_set_abs(&alu.src[0]); 2974 2975 alu.dst.sel = ctx->temp_reg; 2976 alu.dst.write = 1; 2977 alu.dst.chan = 2; 2978 alu.last = 1; 2979 2980 r = r600_bytecode_add_alu(ctx->bc, &alu); 2981 if (r) 2982 return r; 2983 } 2984 } 2985 2986 /* result.w = 1.0; */ 2987 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2988 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2989 2990 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2991 alu.src[0].sel = V_SQ_ALU_SRC_1; 2992 alu.src[0].chan = 0; 2993 2994 alu.dst.sel = ctx->temp_reg; 2995 alu.dst.chan = 3; 2996 alu.dst.write = 1; 2997 alu.last = 1; 2998 2999 r = r600_bytecode_add_alu(ctx->bc, &alu); 3000 if (r) 3001 return r; 3002 } 3003 3004 return tgsi_helper_copy(ctx, inst); 3005} 3006 3007static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 3008{ 3009 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3010 struct r600_bytecode_alu alu; 3011 int r; 3012 3013 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3014 3015 switch (inst->Instruction.Opcode) { 3016 case TGSI_OPCODE_ARL: 3017 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 3018 break; 3019 case TGSI_OPCODE_ARR: 3020 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 3021 break; 3022 case TGSI_OPCODE_UARL: 3023 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 3024 break; 3025 default: 3026 assert(0); 3027 return -1; 3028 } 3029 3030 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3031 alu.last = 1; 3032 alu.dst.sel = ctx->bc->ar_reg; 3033 alu.dst.write = 1; 3034 r = r600_bytecode_add_alu(ctx->bc, &alu); 3035 if (r) 3036 return r; 3037 3038 ctx->bc->ar_loaded = 0; 3039 return 0; 3040} 3041static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 3042{ 3043 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3044 struct r600_bytecode_alu alu; 3045 int r; 3046 3047 switch (inst->Instruction.Opcode) { 3048 case TGSI_OPCODE_ARL: 3049 memset(&alu, 0, sizeof(alu)); 3050 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 3051 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3052 alu.dst.sel = ctx->bc->ar_reg; 3053 alu.dst.write = 1; 3054 alu.last = 1; 3055 3056 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3057 return r; 3058 3059 memset(&alu, 0, sizeof(alu)); 3060 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 3061 alu.src[0].sel = ctx->bc->ar_reg; 3062 alu.dst.sel = ctx->bc->ar_reg; 3063 alu.dst.write = 1; 3064 alu.last = 1; 3065 3066 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3067 return r; 3068 break; 3069 case TGSI_OPCODE_ARR: 3070 memset(&alu, 0, sizeof(alu)); 3071 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 3072 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3073 alu.dst.sel = ctx->bc->ar_reg; 3074 alu.dst.write = 1; 3075 alu.last = 1; 3076 3077 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3078 return r; 3079 break; 3080 case TGSI_OPCODE_UARL: 3081 memset(&alu, 0, sizeof(alu)); 3082 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV; 3083 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3084 alu.dst.sel = ctx->bc->ar_reg; 3085 alu.dst.write = 1; 3086 alu.last = 1; 3087 3088 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 3089 return r; 3090 break; 3091 default: 3092 assert(0); 3093 return -1; 3094 } 3095 3096 ctx->bc->ar_loaded = 0; 3097 return 0; 3098} 3099 3100static int tgsi_opdst(struct r600_shader_ctx *ctx) 3101{ 3102 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3103 struct r600_bytecode_alu alu; 3104 int i, r = 0; 3105 3106 for (i = 0; i < 4; i++) { 3107 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3108 3109 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 3110 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3111 3112 if (i == 0 || i == 3) { 3113 alu.src[0].sel = V_SQ_ALU_SRC_1; 3114 } else { 3115 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3116 } 3117 3118 if (i == 0 || i == 2) { 3119 alu.src[1].sel = V_SQ_ALU_SRC_1; 3120 } else { 3121 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 3122 } 3123 if (i == 3) 3124 alu.last = 1; 3125 r = r600_bytecode_add_alu(ctx->bc, &alu); 3126 if (r) 3127 return r; 3128 } 3129 return 0; 3130} 3131 3132static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 3133{ 3134 struct r600_bytecode_alu alu; 3135 int r; 3136 3137 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3138 alu.inst = opcode; 3139 alu.predicate = 1; 3140 3141 alu.dst.sel = ctx->temp_reg; 3142 alu.dst.write = 1; 3143 alu.dst.chan = 0; 3144 3145 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 3146 alu.src[1].sel = V_SQ_ALU_SRC_0; 3147 alu.src[1].chan = 0; 3148 3149 alu.last = 1; 3150 3151 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 3152 if (r) 3153 return r; 3154 return 0; 3155} 3156 3157static int pops(struct r600_shader_ctx *ctx, int pops) 3158{ 3159 unsigned force_pop = ctx->bc->force_add_cf; 3160 3161 if (!force_pop) { 3162 int alu_pop = 3; 3163 if (ctx->bc->cf_last) { 3164 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)) 3165 alu_pop = 0; 3166 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER)) 3167 alu_pop = 1; 3168 } 3169 alu_pop += pops; 3170 if (alu_pop == 1) { 3171 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER); 3172 ctx->bc->force_add_cf = 1; 3173 } else if (alu_pop == 2) { 3174 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER); 3175 ctx->bc->force_add_cf = 1; 3176 } else { 3177 force_pop = 1; 3178 } 3179 } 3180 3181 if (force_pop) { 3182 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 3183 ctx->bc->cf_last->pop_count = pops; 3184 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 3185 } 3186 3187 return 0; 3188} 3189 3190static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 3191{ 3192 switch(reason) { 3193 case FC_PUSH_VPM: 3194 ctx->bc->callstack[ctx->bc->call_sp].current--; 3195 break; 3196 case FC_PUSH_WQM: 3197 case FC_LOOP: 3198 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 3199 break; 3200 case FC_REP: 3201 /* TOODO : for 16 vp asic should -= 2; */ 3202 ctx->bc->callstack[ctx->bc->call_sp].current --; 3203 break; 3204 } 3205} 3206 3207static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 3208{ 3209 if (check_max_only) { 3210 int diff; 3211 switch (reason) { 3212 case FC_PUSH_VPM: 3213 diff = 1; 3214 break; 3215 case FC_PUSH_WQM: 3216 diff = 4; 3217 break; 3218 default: 3219 assert(0); 3220 diff = 0; 3221 } 3222 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 3223 ctx->bc->callstack[ctx->bc->call_sp].max) { 3224 ctx->bc->callstack[ctx->bc->call_sp].max = 3225 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 3226 } 3227 return; 3228 } 3229 switch (reason) { 3230 case FC_PUSH_VPM: 3231 ctx->bc->callstack[ctx->bc->call_sp].current++; 3232 break; 3233 case FC_PUSH_WQM: 3234 case FC_LOOP: 3235 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 3236 break; 3237 case FC_REP: 3238 ctx->bc->callstack[ctx->bc->call_sp].current++; 3239 break; 3240 } 3241 3242 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 3243 ctx->bc->callstack[ctx->bc->call_sp].max) { 3244 ctx->bc->callstack[ctx->bc->call_sp].max = 3245 ctx->bc->callstack[ctx->bc->call_sp].current; 3246 } 3247} 3248 3249static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 3250{ 3251 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 3252 3253 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid, 3254 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 3255 sp->mid[sp->num_mid] = ctx->bc->cf_last; 3256 sp->num_mid++; 3257} 3258 3259static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 3260{ 3261 ctx->bc->fc_sp++; 3262 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 3263 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 3264} 3265 3266static void fc_poplevel(struct r600_shader_ctx *ctx) 3267{ 3268 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 3269 if (sp->mid) { 3270 free(sp->mid); 3271 sp->mid = NULL; 3272 } 3273 sp->num_mid = 0; 3274 sp->start = NULL; 3275 sp->type = 0; 3276 ctx->bc->fc_sp--; 3277} 3278 3279#if 0 3280static int emit_return(struct r600_shader_ctx *ctx) 3281{ 3282 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN)); 3283 return 0; 3284} 3285 3286static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 3287{ 3288 3289 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 3290 ctx->bc->cf_last->pop_count = pops; 3291 /* TODO work out offset */ 3292 return 0; 3293} 3294 3295static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 3296{ 3297 return 0; 3298} 3299 3300static void emit_testflag(struct r600_shader_ctx *ctx) 3301{ 3302 3303} 3304 3305static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 3306{ 3307 emit_testflag(ctx); 3308 emit_jump_to_offset(ctx, 1, 4); 3309 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 3310 pops(ctx, ifidx + 1); 3311 emit_return(ctx); 3312} 3313 3314static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 3315{ 3316 emit_testflag(ctx); 3317 3318 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3319 ctx->bc->cf_last->pop_count = 1; 3320 3321 fc_set_mid(ctx, fc_sp); 3322 3323 pops(ctx, 1); 3324} 3325#endif 3326 3327static int tgsi_if(struct r600_shader_ctx *ctx) 3328{ 3329 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT)); 3330 3331 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 3332 3333 fc_pushlevel(ctx, FC_IF); 3334 3335 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 3336 return 0; 3337} 3338 3339static int tgsi_else(struct r600_shader_ctx *ctx) 3340{ 3341 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 3342 ctx->bc->cf_last->pop_count = 1; 3343 3344 fc_set_mid(ctx, ctx->bc->fc_sp); 3345 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 3346 return 0; 3347} 3348 3349static int tgsi_endif(struct r600_shader_ctx *ctx) 3350{ 3351 pops(ctx, 1); 3352 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 3353 R600_ERR("if/endif unbalanced in shader\n"); 3354 return -1; 3355 } 3356 3357 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 3358 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3359 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 3360 } else { 3361 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 3362 } 3363 fc_poplevel(ctx); 3364 3365 callstack_decrease_current(ctx, FC_PUSH_VPM); 3366 return 0; 3367} 3368 3369static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 3370{ 3371 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 3372 3373 fc_pushlevel(ctx, FC_LOOP); 3374 3375 /* check stack depth */ 3376 callstack_check_depth(ctx, FC_LOOP, 0); 3377 return 0; 3378} 3379 3380static int tgsi_endloop(struct r600_shader_ctx *ctx) 3381{ 3382 int i; 3383 3384 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 3385 3386 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 3387 R600_ERR("loop/endloop in shader code are not paired.\n"); 3388 return -EINVAL; 3389 } 3390 3391 /* fixup loop pointers - from r600isa 3392 LOOP END points to CF after LOOP START, 3393 LOOP START point to CF after LOOP END 3394 BRK/CONT point to LOOP END CF 3395 */ 3396 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 3397 3398 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3399 3400 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 3401 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 3402 } 3403 /* TODO add LOOPRET support */ 3404 fc_poplevel(ctx); 3405 callstack_decrease_current(ctx, FC_LOOP); 3406 return 0; 3407} 3408 3409static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 3410{ 3411 unsigned int fscp; 3412 3413 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 3414 { 3415 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 3416 break; 3417 } 3418 3419 if (fscp == 0) { 3420 R600_ERR("Break not inside loop/endloop pair\n"); 3421 return -EINVAL; 3422 } 3423 3424 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3425 ctx->bc->cf_last->pop_count = 1; 3426 3427 fc_set_mid(ctx, fscp); 3428 3429 pops(ctx, 1); 3430 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 3431 return 0; 3432} 3433 3434static int tgsi_umad(struct r600_shader_ctx *ctx) 3435{ 3436 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3437 struct r600_bytecode_alu alu; 3438 int i, j, r; 3439 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3440 3441 /* src0 * src1 */ 3442 for (i = 0; i < lasti + 1; i++) { 3443 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3444 continue; 3445 3446 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3447 3448 alu.dst.chan = i; 3449 alu.dst.sel = ctx->temp_reg; 3450 alu.dst.write = 1; 3451 3452 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT); 3453 for (j = 0; j < 2; j++) { 3454 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3455 } 3456 3457 alu.last = 1; 3458 r = r600_bytecode_add_alu(ctx->bc, &alu); 3459 if (r) 3460 return r; 3461 } 3462 3463 3464 for (i = 0; i < lasti + 1; i++) { 3465 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3466 continue; 3467 3468 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3469 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3470 3471 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 3472 3473 alu.src[0].sel = ctx->temp_reg; 3474 alu.src[0].chan = i; 3475 3476 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 3477 if (i == lasti) { 3478 alu.last = 1; 3479 } 3480 r = r600_bytecode_add_alu(ctx->bc, &alu); 3481 if (r) 3482 return r; 3483 } 3484 return 0; 3485} 3486 3487static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 3488 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3489 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3490 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3491 3492 /* FIXME: 3493 * For state trackers other than OpenGL, we'll want to use 3494 * _RECIP_IEEE instead. 3495 */ 3496 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 3497 3498 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 3499 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3500 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3501 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3502 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3503 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3504 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3505 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3506 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3507 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3508 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3509 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3510 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3511 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3512 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3513 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3514 /* gap */ 3515 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3516 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3517 /* gap */ 3518 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3519 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3520 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3521 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3522 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3523 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 3524 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3525 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3526 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3527 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3528 /* gap */ 3529 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3530 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3531 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3532 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3533 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3534 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3535 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3536 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3537 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3538 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3539 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3540 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3541 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3542 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3543 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3544 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3545 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3546 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3547 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3548 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3549 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3550 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3551 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3552 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3553 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3554 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3555 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3556 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3557 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3558 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3559 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3560 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3561 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3562 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3563 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3564 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3565 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 3566 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3567 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3568 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3569 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3570 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3571 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3572 /* gap */ 3573 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3574 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3575 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3576 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3577 /* gap */ 3578 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3579 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3580 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3581 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3582 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3583 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, 3584 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 3585 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3586 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3587 /* gap */ 3588 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3589 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 3590 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 3591 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3592 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 3593 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3594 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 3595 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 3596 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3597 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3598 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3599 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3600 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3601 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3602 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3603 /* gap */ 3604 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3605 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3606 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3607 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3608 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3609 /* gap */ 3610 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3611 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3612 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3613 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3614 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3615 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3616 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3617 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3618 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3619 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3620 /* gap */ 3621 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3622 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2_trans}, 3623 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3624 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 3625 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 3626 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_op2}, 3627 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 3628 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3629 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2}, 3630 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_UINT, tgsi_op2}, 3631 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2}, 3632 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 3633 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3634 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 3635 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 3636 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 3637 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3638 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2}, 3639 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 3640 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 3641 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3642 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 3643 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2_swap}, 3644 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3645 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3646 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3647 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3648 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 3649 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 3650 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 3651 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 3652 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 3653 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 3654 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 3655 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 3656 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 3657 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 3658 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 3659 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 3660 {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl}, 3661 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 3662 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3663}; 3664 3665static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 3666 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3667 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3668 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3669 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3670 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, 3671 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3672 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3673 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3674 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3675 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3676 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3677 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3678 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3679 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3680 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3681 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3682 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3683 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3684 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3685 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3686 /* gap */ 3687 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3688 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3689 /* gap */ 3690 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3691 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3692 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3693 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3694 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3695 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 3696 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3697 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3698 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3699 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3700 /* gap */ 3701 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3702 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3703 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3704 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3705 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3706 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3707 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3708 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3709 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3710 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3711 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3712 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3713 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3714 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3715 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3716 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3717 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3718 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3719 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3720 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3721 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3722 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3723 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3724 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3725 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3726 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3727 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3728 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3729 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3730 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3731 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3732 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3733 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3734 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3735 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3736 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3737 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 3738 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3739 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3740 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3741 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3742 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3743 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3744 /* gap */ 3745 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3746 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3747 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3748 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3749 /* gap */ 3750 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3751 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3752 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3753 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3754 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3755 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, 3756 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 3757 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3758 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3759 /* gap */ 3760 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3761 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_AND_INT, tgsi_op2}, 3762 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_OR_INT, tgsi_op2}, 3763 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3764 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 3765 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3766 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 3767 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 3768 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3769 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3770 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3771 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3772 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3773 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3774 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3775 /* gap */ 3776 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3777 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3778 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3779 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3780 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3781 /* gap */ 3782 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3783 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3784 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3785 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3786 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3787 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3788 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3789 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3790 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3791 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3792 /* gap */ 3793 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3794 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT, tgsi_op2}, 3795 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3796 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 3797 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 3798 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUB_INT, tgsi_ineg}, 3799 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT, tgsi_op2}, 3800 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3801 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 3802 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3803 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT, tgsi_op2}, 3804 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT, tgsi_op2}, 3805 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3806 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_umad}, 3807 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_UINT, tgsi_op2}, 3808 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_UINT, tgsi_op2}, 3809 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3810 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT, tgsi_op2_trans}, 3811 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE_INT, tgsi_op2}, 3812 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT, tgsi_op2}, 3813 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3814 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT_INT, tgsi_op2_swap}, 3815 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE_INT, tgsi_op2}, 3816 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3817 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3818 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3819 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3820 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 3821 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 3822 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 3823 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 3824 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 3825 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 3826 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 3827 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 3828 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 3829 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 3830 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 3831 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 3832 {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, 3833 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 3834 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3835}; 3836 3837static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 3838 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3839 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3840 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3841 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, 3842 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, 3843 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3844 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3845 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3846 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3847 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3848 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3849 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3850 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3851 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3852 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3853 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3854 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3855 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3856 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3857 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3858 /* gap */ 3859 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3860 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3861 /* gap */ 3862 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3863 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3864 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3865 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3866 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3867 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 3868 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, 3869 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, 3870 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, 3871 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3872 /* gap */ 3873 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3874 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3875 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3876 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3877 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, 3878 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3879 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3880 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3881 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3882 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3883 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3884 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3885 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3886 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3887 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3888 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3889 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, 3890 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3891 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3892 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3893 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3894 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3895 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3896 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3897 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3898 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3899 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3900 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3901 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3902 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3903 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3904 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3905 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3906 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3907 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3908 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3909 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_LB, tgsi_tex}, 3910 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3911 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3912 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3913 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3914 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3915 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3916 /* gap */ 3917 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3918 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3919 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3920 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3921 /* gap */ 3922 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3923 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3924 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3925 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3926 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3927 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3928 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, 3929 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3930 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3931 /* gap */ 3932 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3933 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3934 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3935 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3936 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_XOR_INT, tgsi_op2}, 3937 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3938 {TGSI_OPCODE_TXF, 0, SQ_TEX_INST_LD, tgsi_tex}, 3939 {TGSI_OPCODE_TXQ, 0, SQ_TEX_INST_GET_TEXTURE_RESINFO, tgsi_tex}, 3940 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3941 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3942 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3943 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3944 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3945 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3946 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3947 /* gap */ 3948 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3949 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3950 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3951 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3952 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3953 /* gap */ 3954 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3955 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3956 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3957 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3958 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3959 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3960 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3961 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3962 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3963 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3964 /* gap */ 3965 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3966 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3967 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3968 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX_INT, tgsi_op2}, 3969 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN_INT, tgsi_op2}, 3970 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3971 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3972 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3973 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3974 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3975 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3976 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3977 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3978 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3979 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3980 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3981 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3982 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3983 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3984 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3985 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3986 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3987 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3988 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3989 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3990 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3991 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3992 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 3993 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 3994 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 3995 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 3996 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 3997 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 3998 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 3999 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 4000 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 4001 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 4002 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 4003 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 4004 {TGSI_OPCODE_UARL, 0, 0, tgsi_unsupported}, 4005 {TGSI_OPCODE_UCMP, 0, 0, tgsi_unsupported}, 4006 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 4007}; 4008