r600_shader.c revision 8ab1c5328b12e8b075f62599a84672024aaf2982
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_info.h" 25#include "tgsi/tgsi_parse.h" 26#include "tgsi/tgsi_scan.h" 27#include "tgsi/tgsi_dump.h" 28#include "util/u_format.h" 29#include "r600_pipe.h" 30#include "r600_asm.h" 31#include "r600_sq.h" 32#include "r600_formats.h" 33#include "r600_opcodes.h" 34#include "r600d.h" 35#include <stdio.h> 36#include <errno.h> 37#include <byteswap.h> 38 39/* CAYMAN notes 40Why CAYMAN got loops for lots of instructions is explained here. 41 42-These 8xx t-slot only ops are implemented in all vector slots. 43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 44These 8xx t-slot only opcodes become vector ops, with all four 45slots expecting the arguments on sources a and b. Result is 46broadcast to all channels. 47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT 48These 8xx t-slot only opcodes become vector ops in the z, y, and 49x slots. 50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 52SQRT_IEEE/_64 53SIN/COS 54The w slot may have an independent co-issued operation, or if the 55result is required to be in the w slot, the opcode above may be 56issued in the w slot as well. 57The compiler must issue the source argument to slots z, y, and x 58*/ 59 60 61int r600_find_vs_semantic_index(struct r600_shader *vs, 62 struct r600_shader *ps, int id) 63{ 64 struct r600_shader_io *input = &ps->input[id]; 65 66 for (int i = 0; i < vs->noutput; i++) { 67 if (input->name == vs->output[i].name && 68 input->sid == vs->output[i].sid) { 69 return i - 1; 70 } 71 } 72 return 0; 73} 74 75static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 76{ 77 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 78 struct r600_shader *rshader = &shader->shader; 79 uint32_t *ptr; 80 int i; 81 82 /* copy new shader */ 83 if (shader->bo == NULL) { 84 /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ 85 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE); 86 if (shader->bo == NULL) { 87 return -ENOMEM; 88 } 89 ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 90 if (R600_BIG_ENDIAN) { 91 for (i = 0; i < rshader->bc.ndw; ++i) { 92 ptr[i] = bswap_32(rshader->bc.bytecode[i]); 93 } 94 } else { 95 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr)); 96 } 97 r600_bo_unmap(rctx->radeon, shader->bo); 98 } 99 /* build state */ 100 switch (rshader->processor_type) { 101 case TGSI_PROCESSOR_VERTEX: 102 if (rshader->family >= CHIP_CEDAR) { 103 evergreen_pipe_shader_vs(ctx, shader); 104 } else { 105 r600_pipe_shader_vs(ctx, shader); 106 } 107 break; 108 case TGSI_PROCESSOR_FRAGMENT: 109 if (rshader->family >= CHIP_CEDAR) { 110 evergreen_pipe_shader_ps(ctx, shader); 111 } else { 112 r600_pipe_shader_ps(ctx, shader); 113 } 114 break; 115 default: 116 return -EINVAL; 117 } 118 return 0; 119} 120 121static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 122 123int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 124{ 125 static int dump_shaders = -1; 126 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 127 int r; 128 129 /* Would like some magic "get_bool_option_once" routine. 130 */ 131 if (dump_shaders == -1) 132 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 133 134 if (dump_shaders) { 135 fprintf(stderr, "--------------------------------------------------------------\n"); 136 tgsi_dump(tokens, 0); 137 } 138 shader->shader.family = r600_get_family(rctx->radeon); 139 r = r600_shader_from_tgsi(tokens, &shader->shader); 140 if (r) { 141 R600_ERR("translation from TGSI failed !\n"); 142 return r; 143 } 144 r = r600_bc_build(&shader->shader.bc); 145 if (r) { 146 R600_ERR("building bytecode failed !\n"); 147 return r; 148 } 149 if (dump_shaders) { 150 r600_bc_dump(&shader->shader.bc); 151 fprintf(stderr, "______________________________________________________________\n"); 152 } 153 return r600_pipe_shader(ctx, shader); 154} 155 156void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 157{ 158 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 159 160 r600_bo_reference(rctx->radeon, &shader->bo, NULL); 161 r600_bc_clear(&shader->shader.bc); 162} 163 164/* 165 * tgsi -> r600 shader 166 */ 167struct r600_shader_tgsi_instruction; 168 169struct r600_shader_src { 170 unsigned sel; 171 unsigned swizzle[4]; 172 unsigned neg; 173 unsigned abs; 174 unsigned rel; 175 uint32_t value[4]; 176}; 177 178struct r600_shader_ctx { 179 struct tgsi_shader_info info; 180 struct tgsi_parse_context parse; 181 const struct tgsi_token *tokens; 182 unsigned type; 183 unsigned file_offset[TGSI_FILE_COUNT]; 184 unsigned temp_reg; 185 unsigned ar_reg; 186 struct r600_shader_tgsi_instruction *inst_info; 187 struct r600_bc *bc; 188 struct r600_shader *shader; 189 struct r600_shader_src src[4]; 190 u32 *literals; 191 u32 nliterals; 192 u32 max_driver_temp_used; 193 /* needed for evergreen interpolation */ 194 boolean input_centroid; 195 boolean input_linear; 196 boolean input_perspective; 197 int num_interp_gpr; 198}; 199 200struct r600_shader_tgsi_instruction { 201 unsigned tgsi_opcode; 202 unsigned is_op3; 203 unsigned r600_opcode; 204 int (*process)(struct r600_shader_ctx *ctx); 205}; 206 207static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 208static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 209 210static int tgsi_is_supported(struct r600_shader_ctx *ctx) 211{ 212 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 213 int j; 214 215 if (i->Instruction.NumDstRegs > 1) { 216 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 217 return -EINVAL; 218 } 219 if (i->Instruction.Predicate) { 220 R600_ERR("predicate unsupported\n"); 221 return -EINVAL; 222 } 223#if 0 224 if (i->Instruction.Label) { 225 R600_ERR("label unsupported\n"); 226 return -EINVAL; 227 } 228#endif 229 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 230 if (i->Src[j].Register.Dimension) { 231 R600_ERR("unsupported src %d (dimension %d)\n", j, 232 i->Src[j].Register.Dimension); 233 return -EINVAL; 234 } 235 } 236 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 237 if (i->Dst[j].Register.Dimension) { 238 R600_ERR("unsupported dst (dimension)\n"); 239 return -EINVAL; 240 } 241 } 242 return 0; 243} 244 245static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 246{ 247 int i, r; 248 struct r600_bc_alu alu; 249 int gpr = 0, base_chan = 0; 250 int ij_index = 0; 251 252 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 253 ij_index = 0; 254 if (ctx->shader->input[input].centroid) 255 ij_index++; 256 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 257 ij_index = 0; 258 /* if we have perspective add one */ 259 if (ctx->input_perspective) { 260 ij_index++; 261 /* if we have perspective centroid */ 262 if (ctx->input_centroid) 263 ij_index++; 264 } 265 if (ctx->shader->input[input].centroid) 266 ij_index++; 267 } 268 269 /* work out gpr and base_chan from index */ 270 gpr = ij_index / 2; 271 base_chan = (2 * (ij_index % 2)) + 1; 272 273 for (i = 0; i < 8; i++) { 274 memset(&alu, 0, sizeof(struct r600_bc_alu)); 275 276 if (i < 4) 277 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 278 else 279 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 280 281 if ((i > 1) && (i < 6)) { 282 alu.dst.sel = ctx->shader->input[input].gpr; 283 alu.dst.write = 1; 284 } 285 286 alu.dst.chan = i % 4; 287 288 alu.src[0].sel = gpr; 289 alu.src[0].chan = (base_chan - (i % 2)); 290 291 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 292 293 alu.bank_swizzle_force = SQ_ALU_VEC_210; 294 if ((i % 4) == 3) 295 alu.last = 1; 296 r = r600_bc_add_alu(ctx->bc, &alu); 297 if (r) 298 return r; 299 } 300 return 0; 301} 302 303 304static int tgsi_declaration(struct r600_shader_ctx *ctx) 305{ 306 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 307 unsigned i; 308 int r; 309 310 switch (d->Declaration.File) { 311 case TGSI_FILE_INPUT: 312 i = ctx->shader->ninput++; 313 ctx->shader->input[i].name = d->Semantic.Name; 314 ctx->shader->input[i].sid = d->Semantic.Index; 315 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 316 ctx->shader->input[i].centroid = d->Declaration.Centroid; 317 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 318 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev >= CHIPREV_EVERGREEN) { 319 /* turn input into interpolate on EG */ 320 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 321 if (ctx->shader->input[i].interpolate > 0) { 322 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 323 evergreen_interp_alu(ctx, i); 324 } 325 } 326 } 327 break; 328 case TGSI_FILE_OUTPUT: 329 i = ctx->shader->noutput++; 330 ctx->shader->output[i].name = d->Semantic.Name; 331 ctx->shader->output[i].sid = d->Semantic.Index; 332 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 333 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 334 break; 335 case TGSI_FILE_CONSTANT: 336 case TGSI_FILE_TEMPORARY: 337 case TGSI_FILE_SAMPLER: 338 case TGSI_FILE_ADDRESS: 339 break; 340 341 case TGSI_FILE_SYSTEM_VALUE: 342 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 343 struct r600_bc_alu alu; 344 memset(&alu, 0, sizeof(struct r600_bc_alu)); 345 346 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 347 alu.src[0].sel = 0; 348 alu.src[0].chan = 3; 349 350 alu.dst.sel = 0; 351 alu.dst.chan = 3; 352 alu.dst.write = 1; 353 alu.last = 1; 354 355 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 356 return r; 357 break; 358 } 359 360 default: 361 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 362 return -EINVAL; 363 } 364 return 0; 365} 366 367static int r600_get_temp(struct r600_shader_ctx *ctx) 368{ 369 return ctx->temp_reg + ctx->max_driver_temp_used++; 370} 371 372/* 373 * for evergreen we need to scan the shader to find the number of GPRs we need to 374 * reserve for interpolation. 375 * 376 * we need to know if we are going to emit 377 * any centroid inputs 378 * if perspective and linear are required 379*/ 380static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 381{ 382 int i; 383 int num_baryc; 384 385 ctx->input_linear = FALSE; 386 ctx->input_perspective = FALSE; 387 ctx->input_centroid = FALSE; 388 ctx->num_interp_gpr = 1; 389 390 /* any centroid inputs */ 391 for (i = 0; i < ctx->info.num_inputs; i++) { 392 /* skip position/face */ 393 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 394 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 395 continue; 396 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 397 ctx->input_linear = TRUE; 398 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 399 ctx->input_perspective = TRUE; 400 if (ctx->info.input_centroid[i]) 401 ctx->input_centroid = TRUE; 402 } 403 404 num_baryc = 0; 405 /* ignoring sample for now */ 406 if (ctx->input_perspective) 407 num_baryc++; 408 if (ctx->input_linear) 409 num_baryc++; 410 if (ctx->input_centroid) 411 num_baryc *= 2; 412 413 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 414 415 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 416 return ctx->num_interp_gpr; 417} 418 419static void tgsi_src(struct r600_shader_ctx *ctx, 420 const struct tgsi_full_src_register *tgsi_src, 421 struct r600_shader_src *r600_src) 422{ 423 memset(r600_src, 0, sizeof(*r600_src)); 424 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 425 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 426 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 427 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 428 r600_src->neg = tgsi_src->Register.Negate; 429 r600_src->abs = tgsi_src->Register.Absolute; 430 431 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 432 int index; 433 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 434 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 435 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 436 437 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 438 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 439 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 440 return; 441 } 442 index = tgsi_src->Register.Index; 443 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 444 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 445 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 446 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ 447 r600_src->swizzle[0] = 3; 448 r600_src->swizzle[1] = 3; 449 r600_src->swizzle[2] = 3; 450 r600_src->swizzle[3] = 3; 451 r600_src->sel = 0; 452 } else { 453 if (tgsi_src->Register.Indirect) 454 r600_src->rel = V_SQ_REL_RELATIVE; 455 r600_src->sel = tgsi_src->Register.Index; 456 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 457 } 458} 459 460static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 461{ 462 struct r600_bc_vtx vtx; 463 unsigned int ar_reg; 464 int r; 465 466 if (offset) { 467 struct r600_bc_alu alu; 468 469 memset(&alu, 0, sizeof(alu)); 470 471 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 472 alu.src[0].sel = ctx->ar_reg; 473 474 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 475 alu.src[1].value = offset; 476 477 alu.dst.sel = dst_reg; 478 alu.dst.write = 1; 479 alu.last = 1; 480 481 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 482 return r; 483 484 ar_reg = dst_reg; 485 } else { 486 ar_reg = ctx->ar_reg; 487 } 488 489 memset(&vtx, 0, sizeof(vtx)); 490 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 491 vtx.src_gpr = ar_reg; 492 vtx.mega_fetch_count = 16; 493 vtx.dst_gpr = dst_reg; 494 vtx.dst_sel_x = 0; /* SEL_X */ 495 vtx.dst_sel_y = 1; /* SEL_Y */ 496 vtx.dst_sel_z = 2; /* SEL_Z */ 497 vtx.dst_sel_w = 3; /* SEL_W */ 498 vtx.data_format = FMT_32_32_32_32_FLOAT; 499 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 500 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 501 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 502 vtx.endian = r600_endian_swap(32); 503 504 if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) 505 return r; 506 507 return 0; 508} 509 510static int tgsi_split_constant(struct r600_shader_ctx *ctx) 511{ 512 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 513 struct r600_bc_alu alu; 514 int i, j, k, nconst, r; 515 516 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 517 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 518 nconst++; 519 } 520 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 521 } 522 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 523 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 524 continue; 525 } 526 527 if (ctx->src[i].rel) { 528 int treg = r600_get_temp(ctx); 529 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 530 return r; 531 532 ctx->src[i].sel = treg; 533 ctx->src[i].rel = 0; 534 j--; 535 } else if (j > 0) { 536 int treg = r600_get_temp(ctx); 537 for (k = 0; k < 4; k++) { 538 memset(&alu, 0, sizeof(struct r600_bc_alu)); 539 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 540 alu.src[0].sel = ctx->src[i].sel; 541 alu.src[0].chan = k; 542 alu.src[0].rel = ctx->src[i].rel; 543 alu.dst.sel = treg; 544 alu.dst.chan = k; 545 alu.dst.write = 1; 546 if (k == 3) 547 alu.last = 1; 548 r = r600_bc_add_alu(ctx->bc, &alu); 549 if (r) 550 return r; 551 } 552 ctx->src[i].sel = treg; 553 ctx->src[i].rel =0; 554 j--; 555 } 556 } 557 return 0; 558} 559 560/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 561static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 562{ 563 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 564 struct r600_bc_alu alu; 565 int i, j, k, nliteral, r; 566 567 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 568 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 569 nliteral++; 570 } 571 } 572 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 573 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 574 int treg = r600_get_temp(ctx); 575 for (k = 0; k < 4; k++) { 576 memset(&alu, 0, sizeof(struct r600_bc_alu)); 577 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 578 alu.src[0].sel = ctx->src[i].sel; 579 alu.src[0].chan = k; 580 alu.src[0].value = ctx->src[i].value[k]; 581 alu.dst.sel = treg; 582 alu.dst.chan = k; 583 alu.dst.write = 1; 584 if (k == 3) 585 alu.last = 1; 586 r = r600_bc_add_alu(ctx->bc, &alu); 587 if (r) 588 return r; 589 } 590 ctx->src[i].sel = treg; 591 j--; 592 } 593 } 594 return 0; 595} 596 597static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 598{ 599 struct tgsi_full_immediate *immediate; 600 struct tgsi_full_property *property; 601 struct r600_shader_ctx ctx; 602 struct r600_bc_output output[32]; 603 unsigned output_done, noutput; 604 unsigned opcode; 605 int i, r = 0, pos0; 606 607 ctx.bc = &shader->bc; 608 ctx.shader = shader; 609 r = r600_bc_init(ctx.bc, shader->family); 610 if (r) 611 return r; 612 ctx.tokens = tokens; 613 tgsi_scan_shader(tokens, &ctx.info); 614 tgsi_parse_init(&ctx.parse, tokens); 615 ctx.type = ctx.parse.FullHeader.Processor.Processor; 616 shader->processor_type = ctx.type; 617 ctx.bc->type = shader->processor_type; 618 619 /* register allocations */ 620 /* Values [0,127] correspond to GPR[0..127]. 621 * Values [128,159] correspond to constant buffer bank 0 622 * Values [160,191] correspond to constant buffer bank 1 623 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 624 * Values [256,287] correspond to constant buffer bank 2 (EG) 625 * Values [288,319] correspond to constant buffer bank 3 (EG) 626 * Other special values are shown in the list below. 627 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 628 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 629 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 630 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 631 * 248 SQ_ALU_SRC_0: special constant 0.0. 632 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 633 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 634 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 635 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 636 * 253 SQ_ALU_SRC_LITERAL: literal constant. 637 * 254 SQ_ALU_SRC_PV: previous vector result. 638 * 255 SQ_ALU_SRC_PS: previous scalar result. 639 */ 640 for (i = 0; i < TGSI_FILE_COUNT; i++) { 641 ctx.file_offset[i] = 0; 642 } 643 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 644 ctx.file_offset[TGSI_FILE_INPUT] = 1; 645 if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) { 646 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 647 } else { 648 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 649 } 650 } 651 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev >= CHIPREV_EVERGREEN) { 652 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 653 } 654 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 655 ctx.info.file_count[TGSI_FILE_INPUT]; 656 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 657 ctx.info.file_count[TGSI_FILE_OUTPUT]; 658 659 /* Outside the GPR range. This will be translated to one of the 660 * kcache banks later. */ 661 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 662 663 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 664 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 665 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 666 ctx.temp_reg = ctx.ar_reg + 1; 667 668 ctx.nliterals = 0; 669 ctx.literals = NULL; 670 shader->fs_write_all = FALSE; 671 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 672 tgsi_parse_token(&ctx.parse); 673 switch (ctx.parse.FullToken.Token.Type) { 674 case TGSI_TOKEN_TYPE_IMMEDIATE: 675 immediate = &ctx.parse.FullToken.FullImmediate; 676 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 677 if(ctx.literals == NULL) { 678 r = -ENOMEM; 679 goto out_err; 680 } 681 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 682 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 683 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 684 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 685 ctx.nliterals++; 686 break; 687 case TGSI_TOKEN_TYPE_DECLARATION: 688 r = tgsi_declaration(&ctx); 689 if (r) 690 goto out_err; 691 break; 692 case TGSI_TOKEN_TYPE_INSTRUCTION: 693 r = tgsi_is_supported(&ctx); 694 if (r) 695 goto out_err; 696 ctx.max_driver_temp_used = 0; 697 /* reserve first tmp for everyone */ 698 r600_get_temp(&ctx); 699 700 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 701 if ((r = tgsi_split_constant(&ctx))) 702 goto out_err; 703 if ((r = tgsi_split_literal_constant(&ctx))) 704 goto out_err; 705 if (ctx.bc->chiprev == CHIPREV_CAYMAN) 706 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 707 else if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) 708 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 709 else 710 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 711 r = ctx.inst_info->process(&ctx); 712 if (r) 713 goto out_err; 714 break; 715 case TGSI_TOKEN_TYPE_PROPERTY: 716 property = &ctx.parse.FullToken.FullProperty; 717 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { 718 if (property->u[0].Data == 1) 719 shader->fs_write_all = TRUE; 720 } 721 break; 722 default: 723 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 724 r = -EINVAL; 725 goto out_err; 726 } 727 } 728 /* export output */ 729 noutput = shader->noutput; 730 for (i = 0, pos0 = 0; i < noutput; i++) { 731 memset(&output[i], 0, sizeof(struct r600_bc_output)); 732 output[i].gpr = shader->output[i].gpr; 733 output[i].elem_size = 3; 734 output[i].swizzle_x = 0; 735 output[i].swizzle_y = 1; 736 output[i].swizzle_z = 2; 737 output[i].swizzle_w = 3; 738 output[i].burst_count = 1; 739 output[i].barrier = 1; 740 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 741 output[i].array_base = i - pos0; 742 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 743 switch (ctx.type) { 744 case TGSI_PROCESSOR_VERTEX: 745 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 746 output[i].array_base = 60; 747 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 748 /* position doesn't count in array_base */ 749 pos0++; 750 } 751 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 752 output[i].array_base = 61; 753 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 754 /* position doesn't count in array_base */ 755 pos0++; 756 } 757 break; 758 case TGSI_PROCESSOR_FRAGMENT: 759 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 760 output[i].array_base = shader->output[i].sid; 761 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 762 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 763 output[i].array_base = 61; 764 output[i].swizzle_x = 2; 765 output[i].swizzle_y = 7; 766 output[i].swizzle_z = output[i].swizzle_w = 7; 767 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 768 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 769 output[i].array_base = 61; 770 output[i].swizzle_x = 7; 771 output[i].swizzle_y = 1; 772 output[i].swizzle_z = output[i].swizzle_w = 7; 773 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 774 } else { 775 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 776 r = -EINVAL; 777 goto out_err; 778 } 779 break; 780 default: 781 R600_ERR("unsupported processor type %d\n", ctx.type); 782 r = -EINVAL; 783 goto out_err; 784 } 785 } 786 /* add fake param output for vertex shader if no param is exported */ 787 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 788 for (i = 0, pos0 = 0; i < noutput; i++) { 789 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 790 pos0 = 1; 791 break; 792 } 793 } 794 if (!pos0) { 795 memset(&output[i], 0, sizeof(struct r600_bc_output)); 796 output[i].gpr = 0; 797 output[i].elem_size = 3; 798 output[i].swizzle_x = 0; 799 output[i].swizzle_y = 1; 800 output[i].swizzle_z = 2; 801 output[i].swizzle_w = 3; 802 output[i].burst_count = 1; 803 output[i].barrier = 1; 804 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 805 output[i].array_base = 0; 806 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 807 noutput++; 808 } 809 } 810 /* add fake pixel export */ 811 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 812 memset(&output[0], 0, sizeof(struct r600_bc_output)); 813 output[0].gpr = 0; 814 output[0].elem_size = 3; 815 output[0].swizzle_x = 7; 816 output[0].swizzle_y = 7; 817 output[0].swizzle_z = 7; 818 output[0].swizzle_w = 7; 819 output[0].burst_count = 1; 820 output[0].barrier = 1; 821 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 822 output[0].array_base = 0; 823 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 824 noutput++; 825 } 826 /* set export done on last export of each type */ 827 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 828 if (ctx.bc->chiprev < CHIPREV_CAYMAN) { 829 if (i == (noutput - 1)) { 830 output[i].end_of_program = 1; 831 } 832 } 833 if (!(output_done & (1 << output[i].type))) { 834 output_done |= (1 << output[i].type); 835 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 836 } 837 } 838 /* add output to bytecode */ 839 for (i = 0; i < noutput; i++) { 840 r = r600_bc_add_output(ctx.bc, &output[i]); 841 if (r) 842 goto out_err; 843 } 844 /* add program end */ 845 if (ctx.bc->chiprev == CHIPREV_CAYMAN) 846 cm_bc_add_cf_end(ctx.bc); 847 848 free(ctx.literals); 849 tgsi_parse_free(&ctx.parse); 850 return 0; 851out_err: 852 free(ctx.literals); 853 tgsi_parse_free(&ctx.parse); 854 return r; 855} 856 857static int tgsi_unsupported(struct r600_shader_ctx *ctx) 858{ 859 R600_ERR("%s tgsi opcode unsupported\n", 860 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); 861 return -EINVAL; 862} 863 864static int tgsi_end(struct r600_shader_ctx *ctx) 865{ 866 return 0; 867} 868 869static void r600_bc_src(struct r600_bc_alu_src *bc_src, 870 const struct r600_shader_src *shader_src, 871 unsigned chan) 872{ 873 bc_src->sel = shader_src->sel; 874 bc_src->chan = shader_src->swizzle[chan]; 875 bc_src->neg = shader_src->neg; 876 bc_src->abs = shader_src->abs; 877 bc_src->rel = shader_src->rel; 878 bc_src->value = shader_src->value[bc_src->chan]; 879} 880 881static void tgsi_dst(struct r600_shader_ctx *ctx, 882 const struct tgsi_full_dst_register *tgsi_dst, 883 unsigned swizzle, 884 struct r600_bc_alu_dst *r600_dst) 885{ 886 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 887 888 r600_dst->sel = tgsi_dst->Register.Index; 889 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 890 r600_dst->chan = swizzle; 891 r600_dst->write = 1; 892 if (tgsi_dst->Register.Indirect) 893 r600_dst->rel = V_SQ_REL_RELATIVE; 894 if (inst->Instruction.Saturate) { 895 r600_dst->clamp = 1; 896 } 897} 898 899static int tgsi_last_instruction(unsigned writemask) 900{ 901 int i, lasti = 0; 902 903 for (i = 0; i < 4; i++) { 904 if (writemask & (1 << i)) { 905 lasti = i; 906 } 907 } 908 return lasti; 909} 910 911static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 912{ 913 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 914 struct r600_bc_alu alu; 915 int i, j, r; 916 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 917 918 for (i = 0; i < lasti + 1; i++) { 919 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 920 continue; 921 922 memset(&alu, 0, sizeof(struct r600_bc_alu)); 923 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 924 925 alu.inst = ctx->inst_info->r600_opcode; 926 if (!swap) { 927 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 928 r600_bc_src(&alu.src[j], &ctx->src[j], i); 929 } 930 } else { 931 r600_bc_src(&alu.src[0], &ctx->src[1], i); 932 r600_bc_src(&alu.src[1], &ctx->src[0], i); 933 } 934 /* handle some special cases */ 935 switch (ctx->inst_info->tgsi_opcode) { 936 case TGSI_OPCODE_SUB: 937 alu.src[1].neg = 1; 938 break; 939 case TGSI_OPCODE_ABS: 940 alu.src[0].abs = 1; 941 if (alu.src[0].neg) 942 alu.src[0].neg = 0; 943 break; 944 default: 945 break; 946 } 947 if (i == lasti) { 948 alu.last = 1; 949 } 950 r = r600_bc_add_alu(ctx->bc, &alu); 951 if (r) 952 return r; 953 } 954 return 0; 955} 956 957static int tgsi_op2(struct r600_shader_ctx *ctx) 958{ 959 return tgsi_op2_s(ctx, 0); 960} 961 962static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 963{ 964 return tgsi_op2_s(ctx, 1); 965} 966 967static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 968{ 969 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 970 int i, j, r; 971 struct r600_bc_alu alu; 972 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 973 974 for (i = 0 ; i < last_slot; i++) { 975 memset(&alu, 0, sizeof(struct r600_bc_alu)); 976 alu.inst = ctx->inst_info->r600_opcode; 977 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 978 r600_bc_src(&alu.src[j], &ctx->src[j], 0); 979 } 980 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 981 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 982 983 if (i == last_slot - 1) 984 alu.last = 1; 985 r = r600_bc_add_alu(ctx->bc, &alu); 986 if (r) 987 return r; 988 } 989 return 0; 990} 991 992/* 993 * r600 - trunc to -PI..PI range 994 * r700 - normalize by dividing by 2PI 995 * see fdo bug 27901 996 */ 997static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 998{ 999 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 1000 static float double_pi = 3.1415926535 * 2; 1001 static float neg_pi = -3.1415926535; 1002 1003 int r; 1004 struct r600_bc_alu alu; 1005 1006 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1007 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1008 alu.is_op3 = 1; 1009 1010 alu.dst.chan = 0; 1011 alu.dst.sel = ctx->temp_reg; 1012 alu.dst.write = 1; 1013 1014 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1015 1016 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1017 alu.src[1].chan = 0; 1018 alu.src[1].value = *(uint32_t *)&half_inv_pi; 1019 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1020 alu.src[2].chan = 0; 1021 alu.last = 1; 1022 r = r600_bc_add_alu(ctx->bc, &alu); 1023 if (r) 1024 return r; 1025 1026 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1027 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1028 1029 alu.dst.chan = 0; 1030 alu.dst.sel = ctx->temp_reg; 1031 alu.dst.write = 1; 1032 1033 alu.src[0].sel = ctx->temp_reg; 1034 alu.src[0].chan = 0; 1035 alu.last = 1; 1036 r = r600_bc_add_alu(ctx->bc, &alu); 1037 if (r) 1038 return r; 1039 1040 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1041 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1042 alu.is_op3 = 1; 1043 1044 alu.dst.chan = 0; 1045 alu.dst.sel = ctx->temp_reg; 1046 alu.dst.write = 1; 1047 1048 alu.src[0].sel = ctx->temp_reg; 1049 alu.src[0].chan = 0; 1050 1051 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1052 alu.src[1].chan = 0; 1053 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1054 alu.src[2].chan = 0; 1055 1056 if (ctx->bc->chiprev == CHIPREV_R600) { 1057 alu.src[1].value = *(uint32_t *)&double_pi; 1058 alu.src[2].value = *(uint32_t *)&neg_pi; 1059 } else { 1060 alu.src[1].sel = V_SQ_ALU_SRC_1; 1061 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1062 alu.src[2].neg = 1; 1063 } 1064 1065 alu.last = 1; 1066 r = r600_bc_add_alu(ctx->bc, &alu); 1067 if (r) 1068 return r; 1069 return 0; 1070} 1071 1072static int cayman_trig(struct r600_shader_ctx *ctx) 1073{ 1074 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1075 struct r600_bc_alu alu; 1076 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1077 int i, r; 1078 1079 r = tgsi_setup_trig(ctx); 1080 if (r) 1081 return r; 1082 1083 1084 for (i = 0; i < last_slot; i++) { 1085 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1086 alu.inst = ctx->inst_info->r600_opcode; 1087 alu.dst.chan = i; 1088 1089 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1090 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1091 1092 alu.src[0].sel = ctx->temp_reg; 1093 alu.src[0].chan = 0; 1094 if (i == last_slot - 1) 1095 alu.last = 1; 1096 r = r600_bc_add_alu(ctx->bc, &alu); 1097 if (r) 1098 return r; 1099 } 1100 return 0; 1101} 1102 1103static int tgsi_trig(struct r600_shader_ctx *ctx) 1104{ 1105 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1106 struct r600_bc_alu alu; 1107 int i, r; 1108 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1109 1110 r = tgsi_setup_trig(ctx); 1111 if (r) 1112 return r; 1113 1114 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1115 alu.inst = ctx->inst_info->r600_opcode; 1116 alu.dst.chan = 0; 1117 alu.dst.sel = ctx->temp_reg; 1118 alu.dst.write = 1; 1119 1120 alu.src[0].sel = ctx->temp_reg; 1121 alu.src[0].chan = 0; 1122 alu.last = 1; 1123 r = r600_bc_add_alu(ctx->bc, &alu); 1124 if (r) 1125 return r; 1126 1127 /* replicate result */ 1128 for (i = 0; i < lasti + 1; i++) { 1129 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1130 continue; 1131 1132 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1133 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1134 1135 alu.src[0].sel = ctx->temp_reg; 1136 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1137 if (i == lasti) 1138 alu.last = 1; 1139 r = r600_bc_add_alu(ctx->bc, &alu); 1140 if (r) 1141 return r; 1142 } 1143 return 0; 1144} 1145 1146static int tgsi_scs(struct r600_shader_ctx *ctx) 1147{ 1148 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1149 struct r600_bc_alu alu; 1150 int i, r; 1151 1152 /* We'll only need the trig stuff if we are going to write to the 1153 * X or Y components of the destination vector. 1154 */ 1155 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1156 r = tgsi_setup_trig(ctx); 1157 if (r) 1158 return r; 1159 } 1160 1161 /* dst.x = COS */ 1162 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1163 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 1164 for (i = 0 ; i < 3; i++) { 1165 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1166 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1167 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1168 1169 if (i == 0) 1170 alu.dst.write = 1; 1171 else 1172 alu.dst.write = 0; 1173 alu.src[0].sel = ctx->temp_reg; 1174 alu.src[0].chan = 0; 1175 if (i == 2) 1176 alu.last = 1; 1177 r = r600_bc_add_alu(ctx->bc, &alu); 1178 if (r) 1179 return r; 1180 } 1181 } else { 1182 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1183 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1184 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1185 1186 alu.src[0].sel = ctx->temp_reg; 1187 alu.src[0].chan = 0; 1188 alu.last = 1; 1189 r = r600_bc_add_alu(ctx->bc, &alu); 1190 if (r) 1191 return r; 1192 } 1193 } 1194 1195 /* dst.y = SIN */ 1196 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1197 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 1198 for (i = 0 ; i < 3; i++) { 1199 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1200 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1201 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1202 if (i == 1) 1203 alu.dst.write = 1; 1204 else 1205 alu.dst.write = 0; 1206 alu.src[0].sel = ctx->temp_reg; 1207 alu.src[0].chan = 0; 1208 if (i == 2) 1209 alu.last = 1; 1210 r = r600_bc_add_alu(ctx->bc, &alu); 1211 if (r) 1212 return r; 1213 } 1214 } else { 1215 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1216 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1217 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1218 1219 alu.src[0].sel = ctx->temp_reg; 1220 alu.src[0].chan = 0; 1221 alu.last = 1; 1222 r = r600_bc_add_alu(ctx->bc, &alu); 1223 if (r) 1224 return r; 1225 } 1226 } 1227 1228 /* dst.z = 0.0; */ 1229 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1230 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1231 1232 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1233 1234 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1235 1236 alu.src[0].sel = V_SQ_ALU_SRC_0; 1237 alu.src[0].chan = 0; 1238 1239 alu.last = 1; 1240 1241 r = r600_bc_add_alu(ctx->bc, &alu); 1242 if (r) 1243 return r; 1244 } 1245 1246 /* dst.w = 1.0; */ 1247 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1248 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1249 1250 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1251 1252 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1253 1254 alu.src[0].sel = V_SQ_ALU_SRC_1; 1255 alu.src[0].chan = 0; 1256 1257 alu.last = 1; 1258 1259 r = r600_bc_add_alu(ctx->bc, &alu); 1260 if (r) 1261 return r; 1262 } 1263 1264 return 0; 1265} 1266 1267static int tgsi_kill(struct r600_shader_ctx *ctx) 1268{ 1269 struct r600_bc_alu alu; 1270 int i, r; 1271 1272 for (i = 0; i < 4; i++) { 1273 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1274 alu.inst = ctx->inst_info->r600_opcode; 1275 1276 alu.dst.chan = i; 1277 1278 alu.src[0].sel = V_SQ_ALU_SRC_0; 1279 1280 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1281 alu.src[1].sel = V_SQ_ALU_SRC_1; 1282 alu.src[1].neg = 1; 1283 } else { 1284 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1285 } 1286 if (i == 3) { 1287 alu.last = 1; 1288 } 1289 r = r600_bc_add_alu(ctx->bc, &alu); 1290 if (r) 1291 return r; 1292 } 1293 1294 /* kill must be last in ALU */ 1295 ctx->bc->force_add_cf = 1; 1296 ctx->shader->uses_kill = TRUE; 1297 return 0; 1298} 1299 1300static int tgsi_lit(struct r600_shader_ctx *ctx) 1301{ 1302 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1303 struct r600_bc_alu alu; 1304 int r; 1305 1306 /* dst.x, <- 1.0 */ 1307 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1308 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1309 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1310 alu.src[0].chan = 0; 1311 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1312 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1313 r = r600_bc_add_alu(ctx->bc, &alu); 1314 if (r) 1315 return r; 1316 1317 /* dst.y = max(src.x, 0.0) */ 1318 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1319 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1320 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1321 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1322 alu.src[1].chan = 0; 1323 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1324 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1325 r = r600_bc_add_alu(ctx->bc, &alu); 1326 if (r) 1327 return r; 1328 1329 /* dst.w, <- 1.0 */ 1330 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1331 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1332 alu.src[0].sel = V_SQ_ALU_SRC_1; 1333 alu.src[0].chan = 0; 1334 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1335 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1336 alu.last = 1; 1337 r = r600_bc_add_alu(ctx->bc, &alu); 1338 if (r) 1339 return r; 1340 1341 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1342 { 1343 int chan; 1344 int sel; 1345 int i; 1346 1347 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 1348 for (i = 0; i < 3; i++) { 1349 /* dst.z = log(src.y) */ 1350 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1351 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1352 r600_bc_src(&alu.src[0], &ctx->src[0], 1); 1353 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1354 if (i == 2) { 1355 alu.dst.write = 1; 1356 alu.last = 1; 1357 } else 1358 alu.dst.write = 0; 1359 1360 r = r600_bc_add_alu(ctx->bc, &alu); 1361 if (r) 1362 return r; 1363 } 1364 } else { 1365 /* dst.z = log(src.y) */ 1366 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1367 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1368 r600_bc_src(&alu.src[0], &ctx->src[0], 1); 1369 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1370 alu.last = 1; 1371 r = r600_bc_add_alu(ctx->bc, &alu); 1372 if (r) 1373 return r; 1374 } 1375 1376 chan = alu.dst.chan; 1377 sel = alu.dst.sel; 1378 1379 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1380 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1381 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1382 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1383 alu.src[1].sel = sel; 1384 alu.src[1].chan = chan; 1385 1386 r600_bc_src(&alu.src[2], &ctx->src[0], 0); 1387 alu.dst.sel = ctx->temp_reg; 1388 alu.dst.chan = 0; 1389 alu.dst.write = 1; 1390 alu.is_op3 = 1; 1391 alu.last = 1; 1392 r = r600_bc_add_alu(ctx->bc, &alu); 1393 if (r) 1394 return r; 1395 1396 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 1397 for (i = 0; i < 3; i++) { 1398 /* dst.z = exp(tmp.x) */ 1399 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1400 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1401 alu.src[0].sel = ctx->temp_reg; 1402 alu.src[0].chan = 0; 1403 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1404 if (i == 2) { 1405 alu.dst.write = 1; 1406 alu.last = 1; 1407 } else 1408 alu.dst.write = 0; 1409 r = r600_bc_add_alu(ctx->bc, &alu); 1410 if (r) 1411 return r; 1412 } 1413 } else { 1414 /* dst.z = exp(tmp.x) */ 1415 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1416 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1417 alu.src[0].sel = ctx->temp_reg; 1418 alu.src[0].chan = 0; 1419 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1420 alu.last = 1; 1421 r = r600_bc_add_alu(ctx->bc, &alu); 1422 if (r) 1423 return r; 1424 } 1425 } 1426 return 0; 1427} 1428 1429static int tgsi_rsq(struct r600_shader_ctx *ctx) 1430{ 1431 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1432 struct r600_bc_alu alu; 1433 int i, r; 1434 1435 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1436 1437 /* FIXME: 1438 * For state trackers other than OpenGL, we'll want to use 1439 * _RECIPSQRT_IEEE instead. 1440 */ 1441 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1442 1443 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1444 r600_bc_src(&alu.src[i], &ctx->src[i], 0); 1445 alu.src[i].abs = 1; 1446 } 1447 alu.dst.sel = ctx->temp_reg; 1448 alu.dst.write = 1; 1449 alu.last = 1; 1450 r = r600_bc_add_alu(ctx->bc, &alu); 1451 if (r) 1452 return r; 1453 /* replicate result */ 1454 return tgsi_helper_tempx_replicate(ctx); 1455} 1456 1457static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1458{ 1459 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1460 struct r600_bc_alu alu; 1461 int i, r; 1462 1463 for (i = 0; i < 4; i++) { 1464 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1465 alu.src[0].sel = ctx->temp_reg; 1466 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1467 alu.dst.chan = i; 1468 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1469 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1470 if (i == 3) 1471 alu.last = 1; 1472 r = r600_bc_add_alu(ctx->bc, &alu); 1473 if (r) 1474 return r; 1475 } 1476 return 0; 1477} 1478 1479static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1480{ 1481 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1482 struct r600_bc_alu alu; 1483 int i, r; 1484 1485 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1486 alu.inst = ctx->inst_info->r600_opcode; 1487 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1488 r600_bc_src(&alu.src[i], &ctx->src[i], 0); 1489 } 1490 alu.dst.sel = ctx->temp_reg; 1491 alu.dst.write = 1; 1492 alu.last = 1; 1493 r = r600_bc_add_alu(ctx->bc, &alu); 1494 if (r) 1495 return r; 1496 /* replicate result */ 1497 return tgsi_helper_tempx_replicate(ctx); 1498} 1499 1500static int cayman_pow(struct r600_shader_ctx *ctx) 1501{ 1502 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1503 int i, r; 1504 struct r600_bc_alu alu; 1505 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1506 1507 for (i = 0; i < 3; i++) { 1508 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1509 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1510 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1511 alu.dst.sel = ctx->temp_reg; 1512 alu.dst.chan = i; 1513 alu.dst.write = 1; 1514 if (i == 2) 1515 alu.last = 1; 1516 r = r600_bc_add_alu(ctx->bc, &alu); 1517 if (r) 1518 return r; 1519 } 1520 1521 /* b * LOG2(a) */ 1522 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1523 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1524 r600_bc_src(&alu.src[0], &ctx->src[1], 0); 1525 alu.src[1].sel = ctx->temp_reg; 1526 alu.dst.sel = ctx->temp_reg; 1527 alu.dst.write = 1; 1528 alu.last = 1; 1529 r = r600_bc_add_alu(ctx->bc, &alu); 1530 if (r) 1531 return r; 1532 1533 for (i = 0; i < last_slot; i++) { 1534 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1535 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1536 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1537 alu.src[0].sel = ctx->temp_reg; 1538 1539 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1540 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1541 if (i == last_slot - 1) 1542 alu.last = 1; 1543 r = r600_bc_add_alu(ctx->bc, &alu); 1544 if (r) 1545 return r; 1546 } 1547 return 0; 1548} 1549 1550static int tgsi_pow(struct r600_shader_ctx *ctx) 1551{ 1552 struct r600_bc_alu alu; 1553 int r; 1554 1555 /* LOG2(a) */ 1556 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1557 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1558 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1559 alu.dst.sel = ctx->temp_reg; 1560 alu.dst.write = 1; 1561 alu.last = 1; 1562 r = r600_bc_add_alu(ctx->bc, &alu); 1563 if (r) 1564 return r; 1565 /* b * LOG2(a) */ 1566 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1567 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1568 r600_bc_src(&alu.src[0], &ctx->src[1], 0); 1569 alu.src[1].sel = ctx->temp_reg; 1570 alu.dst.sel = ctx->temp_reg; 1571 alu.dst.write = 1; 1572 alu.last = 1; 1573 r = r600_bc_add_alu(ctx->bc, &alu); 1574 if (r) 1575 return r; 1576 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1577 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1578 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1579 alu.src[0].sel = ctx->temp_reg; 1580 alu.dst.sel = ctx->temp_reg; 1581 alu.dst.write = 1; 1582 alu.last = 1; 1583 r = r600_bc_add_alu(ctx->bc, &alu); 1584 if (r) 1585 return r; 1586 return tgsi_helper_tempx_replicate(ctx); 1587} 1588 1589static int tgsi_ssg(struct r600_shader_ctx *ctx) 1590{ 1591 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1592 struct r600_bc_alu alu; 1593 int i, r; 1594 1595 /* tmp = (src > 0 ? 1 : src) */ 1596 for (i = 0; i < 4; i++) { 1597 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1598 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1599 alu.is_op3 = 1; 1600 1601 alu.dst.sel = ctx->temp_reg; 1602 alu.dst.chan = i; 1603 1604 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1605 alu.src[1].sel = V_SQ_ALU_SRC_1; 1606 r600_bc_src(&alu.src[2], &ctx->src[0], i); 1607 1608 if (i == 3) 1609 alu.last = 1; 1610 r = r600_bc_add_alu(ctx->bc, &alu); 1611 if (r) 1612 return r; 1613 } 1614 1615 /* dst = (-tmp > 0 ? -1 : tmp) */ 1616 for (i = 0; i < 4; i++) { 1617 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1618 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1619 alu.is_op3 = 1; 1620 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1621 1622 alu.src[0].sel = ctx->temp_reg; 1623 alu.src[0].chan = i; 1624 alu.src[0].neg = 1; 1625 1626 alu.src[1].sel = V_SQ_ALU_SRC_1; 1627 alu.src[1].neg = 1; 1628 1629 alu.src[2].sel = ctx->temp_reg; 1630 alu.src[2].chan = i; 1631 1632 if (i == 3) 1633 alu.last = 1; 1634 r = r600_bc_add_alu(ctx->bc, &alu); 1635 if (r) 1636 return r; 1637 } 1638 return 0; 1639} 1640 1641static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1642{ 1643 struct r600_bc_alu alu; 1644 int i, r; 1645 1646 for (i = 0; i < 4; i++) { 1647 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1648 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1649 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1650 alu.dst.chan = i; 1651 } else { 1652 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1653 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1654 alu.src[0].sel = ctx->temp_reg; 1655 alu.src[0].chan = i; 1656 } 1657 if (i == 3) { 1658 alu.last = 1; 1659 } 1660 r = r600_bc_add_alu(ctx->bc, &alu); 1661 if (r) 1662 return r; 1663 } 1664 return 0; 1665} 1666 1667static int tgsi_op3(struct r600_shader_ctx *ctx) 1668{ 1669 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1670 struct r600_bc_alu alu; 1671 int i, j, r; 1672 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1673 1674 for (i = 0; i < lasti + 1; i++) { 1675 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1676 continue; 1677 1678 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1679 alu.inst = ctx->inst_info->r600_opcode; 1680 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1681 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1682 } 1683 1684 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1685 alu.dst.chan = i; 1686 alu.dst.write = 1; 1687 alu.is_op3 = 1; 1688 if (i == lasti) { 1689 alu.last = 1; 1690 } 1691 r = r600_bc_add_alu(ctx->bc, &alu); 1692 if (r) 1693 return r; 1694 } 1695 return 0; 1696} 1697 1698static int tgsi_dp(struct r600_shader_ctx *ctx) 1699{ 1700 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1701 struct r600_bc_alu alu; 1702 int i, j, r; 1703 1704 for (i = 0; i < 4; i++) { 1705 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1706 alu.inst = ctx->inst_info->r600_opcode; 1707 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1708 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1709 } 1710 1711 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1712 alu.dst.chan = i; 1713 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1714 /* handle some special cases */ 1715 switch (ctx->inst_info->tgsi_opcode) { 1716 case TGSI_OPCODE_DP2: 1717 if (i > 1) { 1718 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1719 alu.src[0].chan = alu.src[1].chan = 0; 1720 } 1721 break; 1722 case TGSI_OPCODE_DP3: 1723 if (i > 2) { 1724 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1725 alu.src[0].chan = alu.src[1].chan = 0; 1726 } 1727 break; 1728 case TGSI_OPCODE_DPH: 1729 if (i == 3) { 1730 alu.src[0].sel = V_SQ_ALU_SRC_1; 1731 alu.src[0].chan = 0; 1732 alu.src[0].neg = 0; 1733 } 1734 break; 1735 default: 1736 break; 1737 } 1738 if (i == 3) { 1739 alu.last = 1; 1740 } 1741 r = r600_bc_add_alu(ctx->bc, &alu); 1742 if (r) 1743 return r; 1744 } 1745 return 0; 1746} 1747 1748static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 1749 unsigned index) 1750{ 1751 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1752 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 1753 inst->Src[index].Register.File != TGSI_FILE_INPUT) || 1754 ctx->src[index].neg || ctx->src[index].abs; 1755} 1756 1757static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 1758 unsigned index) 1759{ 1760 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1761 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 1762} 1763 1764static int tgsi_tex(struct r600_shader_ctx *ctx) 1765{ 1766 static float one_point_five = 1.5f; 1767 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1768 struct r600_bc_tex tex; 1769 struct r600_bc_alu alu; 1770 unsigned src_gpr; 1771 int r, i, j; 1772 int opcode; 1773 /* Texture fetch instructions can only use gprs as source. 1774 * Also they cannot negate the source or take the absolute value */ 1775 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0); 1776 boolean src_loaded = FALSE; 1777 unsigned sampler_src_reg = 1; 1778 1779 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 1780 1781 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 1782 /* TGSI moves the sampler to src reg 3 for TXD */ 1783 sampler_src_reg = 3; 1784 1785 for (i = 1; i < 3; i++) { 1786 /* set gradients h/v */ 1787 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1788 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : 1789 SQ_TEX_INST_SET_GRADIENTS_V; 1790 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 1791 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 1792 1793 if (tgsi_tex_src_requires_loading(ctx, i)) { 1794 tex.src_gpr = r600_get_temp(ctx); 1795 tex.src_sel_x = 0; 1796 tex.src_sel_y = 1; 1797 tex.src_sel_z = 2; 1798 tex.src_sel_w = 3; 1799 1800 for (j = 0; j < 4; j++) { 1801 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1802 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1803 r600_bc_src(&alu.src[0], &ctx->src[i], j); 1804 alu.dst.sel = tex.src_gpr; 1805 alu.dst.chan = j; 1806 if (j == 3) 1807 alu.last = 1; 1808 alu.dst.write = 1; 1809 r = r600_bc_add_alu(ctx->bc, &alu); 1810 if (r) 1811 return r; 1812 } 1813 1814 } else { 1815 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); 1816 tex.src_sel_x = ctx->src[i].swizzle[0]; 1817 tex.src_sel_y = ctx->src[i].swizzle[1]; 1818 tex.src_sel_z = ctx->src[i].swizzle[2]; 1819 tex.src_sel_w = ctx->src[i].swizzle[3]; 1820 tex.src_rel = ctx->src[i].rel; 1821 } 1822 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ 1823 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 1824 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1825 tex.coord_type_x = 1; 1826 tex.coord_type_y = 1; 1827 tex.coord_type_z = 1; 1828 tex.coord_type_w = 1; 1829 } 1830 r = r600_bc_add_tex(ctx->bc, &tex); 1831 if (r) 1832 return r; 1833 } 1834 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1835 int out_chan; 1836 /* Add perspective divide */ 1837 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 1838 out_chan = 2; 1839 for (i = 0; i < 3; i++) { 1840 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1841 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1842 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1843 1844 alu.dst.sel = ctx->temp_reg; 1845 alu.dst.chan = i; 1846 if (i == 2) 1847 alu.last = 1; 1848 if (out_chan == i) 1849 alu.dst.write = 1; 1850 r = r600_bc_add_alu(ctx->bc, &alu); 1851 if (r) 1852 return r; 1853 } 1854 1855 } else { 1856 out_chan = 3; 1857 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1858 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1859 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1860 1861 alu.dst.sel = ctx->temp_reg; 1862 alu.dst.chan = out_chan; 1863 alu.last = 1; 1864 alu.dst.write = 1; 1865 r = r600_bc_add_alu(ctx->bc, &alu); 1866 if (r) 1867 return r; 1868 } 1869 1870 for (i = 0; i < 3; i++) { 1871 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1872 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1873 alu.src[0].sel = ctx->temp_reg; 1874 alu.src[0].chan = out_chan; 1875 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1876 alu.dst.sel = ctx->temp_reg; 1877 alu.dst.chan = i; 1878 alu.dst.write = 1; 1879 r = r600_bc_add_alu(ctx->bc, &alu); 1880 if (r) 1881 return r; 1882 } 1883 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1884 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1885 alu.src[0].sel = V_SQ_ALU_SRC_1; 1886 alu.src[0].chan = 0; 1887 alu.dst.sel = ctx->temp_reg; 1888 alu.dst.chan = 3; 1889 alu.last = 1; 1890 alu.dst.write = 1; 1891 r = r600_bc_add_alu(ctx->bc, &alu); 1892 if (r) 1893 return r; 1894 src_loaded = TRUE; 1895 src_gpr = ctx->temp_reg; 1896 } 1897 1898 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1899 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 1900 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 1901 1902 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1903 for (i = 0; i < 4; i++) { 1904 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1905 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1906 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 1907 r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 1908 alu.dst.sel = ctx->temp_reg; 1909 alu.dst.chan = i; 1910 if (i == 3) 1911 alu.last = 1; 1912 alu.dst.write = 1; 1913 r = r600_bc_add_alu(ctx->bc, &alu); 1914 if (r) 1915 return r; 1916 } 1917 1918 /* tmp1.z = RCP_e(|tmp1.z|) */ 1919 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 1920 for (i = 0; i < 3; i++) { 1921 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1922 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1923 alu.src[0].sel = ctx->temp_reg; 1924 alu.src[0].chan = 2; 1925 alu.src[0].abs = 1; 1926 alu.dst.sel = ctx->temp_reg; 1927 alu.dst.chan = i; 1928 if (i == 2) 1929 alu.dst.write = 1; 1930 if (i == 2) 1931 alu.last = 1; 1932 r = r600_bc_add_alu(ctx->bc, &alu); 1933 if (r) 1934 return r; 1935 } 1936 } else { 1937 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1938 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1939 alu.src[0].sel = ctx->temp_reg; 1940 alu.src[0].chan = 2; 1941 alu.src[0].abs = 1; 1942 alu.dst.sel = ctx->temp_reg; 1943 alu.dst.chan = 2; 1944 alu.dst.write = 1; 1945 alu.last = 1; 1946 r = r600_bc_add_alu(ctx->bc, &alu); 1947 if (r) 1948 return r; 1949 } 1950 1951 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1952 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1953 * muladd has no writemask, have to use another temp 1954 */ 1955 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1956 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1957 alu.is_op3 = 1; 1958 1959 alu.src[0].sel = ctx->temp_reg; 1960 alu.src[0].chan = 0; 1961 alu.src[1].sel = ctx->temp_reg; 1962 alu.src[1].chan = 2; 1963 1964 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1965 alu.src[2].chan = 0; 1966 alu.src[2].value = *(uint32_t *)&one_point_five; 1967 1968 alu.dst.sel = ctx->temp_reg; 1969 alu.dst.chan = 0; 1970 alu.dst.write = 1; 1971 1972 r = r600_bc_add_alu(ctx->bc, &alu); 1973 if (r) 1974 return r; 1975 1976 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1977 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1978 alu.is_op3 = 1; 1979 1980 alu.src[0].sel = ctx->temp_reg; 1981 alu.src[0].chan = 1; 1982 alu.src[1].sel = ctx->temp_reg; 1983 alu.src[1].chan = 2; 1984 1985 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1986 alu.src[2].chan = 0; 1987 alu.src[2].value = *(uint32_t *)&one_point_five; 1988 1989 alu.dst.sel = ctx->temp_reg; 1990 alu.dst.chan = 1; 1991 alu.dst.write = 1; 1992 1993 alu.last = 1; 1994 r = r600_bc_add_alu(ctx->bc, &alu); 1995 if (r) 1996 return r; 1997 1998 src_loaded = TRUE; 1999 src_gpr = ctx->temp_reg; 2000 } 2001 2002 if (src_requires_loading && !src_loaded) { 2003 for (i = 0; i < 4; i++) { 2004 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2005 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2006 r600_bc_src(&alu.src[0], &ctx->src[0], i); 2007 alu.dst.sel = ctx->temp_reg; 2008 alu.dst.chan = i; 2009 if (i == 3) 2010 alu.last = 1; 2011 alu.dst.write = 1; 2012 r = r600_bc_add_alu(ctx->bc, &alu); 2013 if (r) 2014 return r; 2015 } 2016 src_loaded = TRUE; 2017 src_gpr = ctx->temp_reg; 2018 } 2019 2020 opcode = ctx->inst_info->r600_opcode; 2021 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) { 2022 switch (opcode) { 2023 case SQ_TEX_INST_SAMPLE: 2024 opcode = SQ_TEX_INST_SAMPLE_C; 2025 break; 2026 case SQ_TEX_INST_SAMPLE_L: 2027 opcode = SQ_TEX_INST_SAMPLE_C_L; 2028 break; 2029 case SQ_TEX_INST_SAMPLE_G: 2030 opcode = SQ_TEX_INST_SAMPLE_C_G; 2031 break; 2032 } 2033 } 2034 2035 memset(&tex, 0, sizeof(struct r600_bc_tex)); 2036 tex.inst = opcode; 2037 2038 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 2039 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 2040 tex.src_gpr = src_gpr; 2041 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 2042 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 2043 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 2044 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 2045 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 2046 if (src_loaded) { 2047 tex.src_sel_x = 0; 2048 tex.src_sel_y = 1; 2049 tex.src_sel_z = 2; 2050 tex.src_sel_w = 3; 2051 } else { 2052 tex.src_sel_x = ctx->src[0].swizzle[0]; 2053 tex.src_sel_y = ctx->src[0].swizzle[1]; 2054 tex.src_sel_z = ctx->src[0].swizzle[2]; 2055 tex.src_sel_w = ctx->src[0].swizzle[3]; 2056 tex.src_rel = ctx->src[0].rel; 2057 } 2058 2059 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2060 tex.src_sel_x = 1; 2061 tex.src_sel_y = 0; 2062 tex.src_sel_z = 3; 2063 tex.src_sel_w = 1; 2064 } 2065 2066 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 2067 tex.coord_type_x = 1; 2068 tex.coord_type_y = 1; 2069 tex.coord_type_z = 1; 2070 tex.coord_type_w = 1; 2071 } 2072 2073 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { 2074 tex.coord_type_z = 0; 2075 tex.src_sel_z = tex.src_sel_y; 2076 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) 2077 tex.coord_type_z = 0; 2078 2079 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 2080 tex.src_sel_w = tex.src_sel_z; 2081 2082 r = r600_bc_add_tex(ctx->bc, &tex); 2083 if (r) 2084 return r; 2085 2086 /* add shadow ambient support - gallium doesn't do it yet */ 2087 return 0; 2088} 2089 2090static int tgsi_lrp(struct r600_shader_ctx *ctx) 2091{ 2092 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2093 struct r600_bc_alu alu; 2094 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2095 unsigned i; 2096 int r; 2097 2098 /* optimize if it's just an equal balance */ 2099 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 2100 for (i = 0; i < lasti + 1; i++) { 2101 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2102 continue; 2103 2104 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2105 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2106 r600_bc_src(&alu.src[0], &ctx->src[1], i); 2107 r600_bc_src(&alu.src[1], &ctx->src[2], i); 2108 alu.omod = 3; 2109 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2110 alu.dst.chan = i; 2111 if (i == lasti) { 2112 alu.last = 1; 2113 } 2114 r = r600_bc_add_alu(ctx->bc, &alu); 2115 if (r) 2116 return r; 2117 } 2118 return 0; 2119 } 2120 2121 /* 1 - src0 */ 2122 for (i = 0; i < lasti + 1; i++) { 2123 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2124 continue; 2125 2126 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2127 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2128 alu.src[0].sel = V_SQ_ALU_SRC_1; 2129 alu.src[0].chan = 0; 2130 r600_bc_src(&alu.src[1], &ctx->src[0], i); 2131 alu.src[1].neg = 1; 2132 alu.dst.sel = ctx->temp_reg; 2133 alu.dst.chan = i; 2134 if (i == lasti) { 2135 alu.last = 1; 2136 } 2137 alu.dst.write = 1; 2138 r = r600_bc_add_alu(ctx->bc, &alu); 2139 if (r) 2140 return r; 2141 } 2142 2143 /* (1 - src0) * src2 */ 2144 for (i = 0; i < lasti + 1; i++) { 2145 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2146 continue; 2147 2148 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2149 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2150 alu.src[0].sel = ctx->temp_reg; 2151 alu.src[0].chan = i; 2152 r600_bc_src(&alu.src[1], &ctx->src[2], i); 2153 alu.dst.sel = ctx->temp_reg; 2154 alu.dst.chan = i; 2155 if (i == lasti) { 2156 alu.last = 1; 2157 } 2158 alu.dst.write = 1; 2159 r = r600_bc_add_alu(ctx->bc, &alu); 2160 if (r) 2161 return r; 2162 } 2163 2164 /* src0 * src1 + (1 - src0) * src2 */ 2165 for (i = 0; i < lasti + 1; i++) { 2166 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2167 continue; 2168 2169 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2170 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2171 alu.is_op3 = 1; 2172 r600_bc_src(&alu.src[0], &ctx->src[0], i); 2173 r600_bc_src(&alu.src[1], &ctx->src[1], i); 2174 alu.src[2].sel = ctx->temp_reg; 2175 alu.src[2].chan = i; 2176 2177 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2178 alu.dst.chan = i; 2179 if (i == lasti) { 2180 alu.last = 1; 2181 } 2182 r = r600_bc_add_alu(ctx->bc, &alu); 2183 if (r) 2184 return r; 2185 } 2186 return 0; 2187} 2188 2189static int tgsi_cmp(struct r600_shader_ctx *ctx) 2190{ 2191 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2192 struct r600_bc_alu alu; 2193 int i, r; 2194 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2195 2196 for (i = 0; i < lasti + 1; i++) { 2197 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2198 continue; 2199 2200 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2201 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2202 r600_bc_src(&alu.src[0], &ctx->src[0], i); 2203 r600_bc_src(&alu.src[1], &ctx->src[2], i); 2204 r600_bc_src(&alu.src[2], &ctx->src[1], i); 2205 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2206 alu.dst.chan = i; 2207 alu.dst.write = 1; 2208 alu.is_op3 = 1; 2209 if (i == lasti) 2210 alu.last = 1; 2211 r = r600_bc_add_alu(ctx->bc, &alu); 2212 if (r) 2213 return r; 2214 } 2215 return 0; 2216} 2217 2218static int tgsi_xpd(struct r600_shader_ctx *ctx) 2219{ 2220 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2221 static const unsigned int src0_swizzle[] = {2, 0, 1}; 2222 static const unsigned int src1_swizzle[] = {1, 2, 0}; 2223 struct r600_bc_alu alu; 2224 uint32_t use_temp = 0; 2225 int i, r; 2226 2227 if (inst->Dst[0].Register.WriteMask != 0xf) 2228 use_temp = 1; 2229 2230 for (i = 0; i < 4; i++) { 2231 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2232 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2233 if (i < 3) { 2234 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 2235 r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 2236 } else { 2237 alu.src[0].sel = V_SQ_ALU_SRC_0; 2238 alu.src[0].chan = i; 2239 alu.src[1].sel = V_SQ_ALU_SRC_0; 2240 alu.src[1].chan = i; 2241 } 2242 2243 alu.dst.sel = ctx->temp_reg; 2244 alu.dst.chan = i; 2245 alu.dst.write = 1; 2246 2247 if (i == 3) 2248 alu.last = 1; 2249 r = r600_bc_add_alu(ctx->bc, &alu); 2250 if (r) 2251 return r; 2252 } 2253 2254 for (i = 0; i < 4; i++) { 2255 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2256 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2257 2258 if (i < 3) { 2259 r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 2260 r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 2261 } else { 2262 alu.src[0].sel = V_SQ_ALU_SRC_0; 2263 alu.src[0].chan = i; 2264 alu.src[1].sel = V_SQ_ALU_SRC_0; 2265 alu.src[1].chan = i; 2266 } 2267 2268 alu.src[2].sel = ctx->temp_reg; 2269 alu.src[2].neg = 1; 2270 alu.src[2].chan = i; 2271 2272 if (use_temp) 2273 alu.dst.sel = ctx->temp_reg; 2274 else 2275 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2276 alu.dst.chan = i; 2277 alu.dst.write = 1; 2278 alu.is_op3 = 1; 2279 if (i == 3) 2280 alu.last = 1; 2281 r = r600_bc_add_alu(ctx->bc, &alu); 2282 if (r) 2283 return r; 2284 } 2285 if (use_temp) 2286 return tgsi_helper_copy(ctx, inst); 2287 return 0; 2288} 2289 2290static int tgsi_exp(struct r600_shader_ctx *ctx) 2291{ 2292 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2293 struct r600_bc_alu alu; 2294 int r; 2295 int i; 2296 2297 /* result.x = 2^floor(src); */ 2298 if (inst->Dst[0].Register.WriteMask & 1) { 2299 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2300 2301 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2302 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2303 2304 alu.dst.sel = ctx->temp_reg; 2305 alu.dst.chan = 0; 2306 alu.dst.write = 1; 2307 alu.last = 1; 2308 r = r600_bc_add_alu(ctx->bc, &alu); 2309 if (r) 2310 return r; 2311 2312 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2313 for (i = 0; i < 3; i++) { 2314 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2315 alu.src[0].sel = ctx->temp_reg; 2316 alu.src[0].chan = 0; 2317 2318 alu.dst.sel = ctx->temp_reg; 2319 alu.dst.chan = i; 2320 if (i == 0) 2321 alu.dst.write = 1; 2322 if (i == 2) 2323 alu.last = 1; 2324 r = r600_bc_add_alu(ctx->bc, &alu); 2325 if (r) 2326 return r; 2327 } 2328 } else { 2329 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2330 alu.src[0].sel = ctx->temp_reg; 2331 alu.src[0].chan = 0; 2332 2333 alu.dst.sel = ctx->temp_reg; 2334 alu.dst.chan = 0; 2335 alu.dst.write = 1; 2336 alu.last = 1; 2337 r = r600_bc_add_alu(ctx->bc, &alu); 2338 if (r) 2339 return r; 2340 } 2341 } 2342 2343 /* result.y = tmp - floor(tmp); */ 2344 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2345 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2346 2347 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2348 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2349 2350 alu.dst.sel = ctx->temp_reg; 2351#if 0 2352 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2353 if (r) 2354 return r; 2355#endif 2356 alu.dst.write = 1; 2357 alu.dst.chan = 1; 2358 2359 alu.last = 1; 2360 2361 r = r600_bc_add_alu(ctx->bc, &alu); 2362 if (r) 2363 return r; 2364 } 2365 2366 /* result.z = RoughApprox2ToX(tmp);*/ 2367 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2368 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2369 for (i = 0; i < 3; i++) { 2370 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2371 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2372 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2373 2374 alu.dst.sel = ctx->temp_reg; 2375 alu.dst.chan = i; 2376 if (i == 2) { 2377 alu.dst.write = 1; 2378 alu.last = 1; 2379 } 2380 2381 r = r600_bc_add_alu(ctx->bc, &alu); 2382 if (r) 2383 return r; 2384 } 2385 } else { 2386 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2387 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2388 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2389 2390 alu.dst.sel = ctx->temp_reg; 2391 alu.dst.write = 1; 2392 alu.dst.chan = 2; 2393 2394 alu.last = 1; 2395 2396 r = r600_bc_add_alu(ctx->bc, &alu); 2397 if (r) 2398 return r; 2399 } 2400 } 2401 2402 /* result.w = 1.0;*/ 2403 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2404 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2405 2406 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2407 alu.src[0].sel = V_SQ_ALU_SRC_1; 2408 alu.src[0].chan = 0; 2409 2410 alu.dst.sel = ctx->temp_reg; 2411 alu.dst.chan = 3; 2412 alu.dst.write = 1; 2413 alu.last = 1; 2414 r = r600_bc_add_alu(ctx->bc, &alu); 2415 if (r) 2416 return r; 2417 } 2418 return tgsi_helper_copy(ctx, inst); 2419} 2420 2421static int tgsi_log(struct r600_shader_ctx *ctx) 2422{ 2423 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2424 struct r600_bc_alu alu; 2425 int r; 2426 int i; 2427 2428 /* result.x = floor(log2(src)); */ 2429 if (inst->Dst[0].Register.WriteMask & 1) { 2430 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2431 for (i = 0; i < 3; i++) { 2432 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2433 2434 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2435 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2436 2437 alu.dst.sel = ctx->temp_reg; 2438 alu.dst.chan = i; 2439 if (i == 0) 2440 alu.dst.write = 1; 2441 if (i == 2) 2442 alu.last = 1; 2443 r = r600_bc_add_alu(ctx->bc, &alu); 2444 if (r) 2445 return r; 2446 } 2447 2448 } else { 2449 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2450 2451 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2452 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2453 2454 alu.dst.sel = ctx->temp_reg; 2455 alu.dst.chan = 0; 2456 alu.dst.write = 1; 2457 alu.last = 1; 2458 r = r600_bc_add_alu(ctx->bc, &alu); 2459 if (r) 2460 return r; 2461 } 2462 2463 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2464 alu.src[0].sel = ctx->temp_reg; 2465 alu.src[0].chan = 0; 2466 2467 alu.dst.sel = ctx->temp_reg; 2468 alu.dst.chan = 0; 2469 alu.dst.write = 1; 2470 alu.last = 1; 2471 2472 r = r600_bc_add_alu(ctx->bc, &alu); 2473 if (r) 2474 return r; 2475 } 2476 2477 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2478 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2479 2480 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2481 for (i = 0; i < 3; i++) { 2482 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2483 2484 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2485 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2486 2487 alu.dst.sel = ctx->temp_reg; 2488 alu.dst.chan = i; 2489 if (i == 1) 2490 alu.dst.write = 1; 2491 if (i == 2) 2492 alu.last = 1; 2493 2494 r = r600_bc_add_alu(ctx->bc, &alu); 2495 if (r) 2496 return r; 2497 } 2498 } else { 2499 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2500 2501 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2502 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2503 2504 alu.dst.sel = ctx->temp_reg; 2505 alu.dst.chan = 1; 2506 alu.dst.write = 1; 2507 alu.last = 1; 2508 2509 r = r600_bc_add_alu(ctx->bc, &alu); 2510 if (r) 2511 return r; 2512 } 2513 2514 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2515 2516 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2517 alu.src[0].sel = ctx->temp_reg; 2518 alu.src[0].chan = 1; 2519 2520 alu.dst.sel = ctx->temp_reg; 2521 alu.dst.chan = 1; 2522 alu.dst.write = 1; 2523 alu.last = 1; 2524 2525 r = r600_bc_add_alu(ctx->bc, &alu); 2526 if (r) 2527 return r; 2528 2529 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2530 for (i = 0; i < 3; i++) { 2531 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2532 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2533 alu.src[0].sel = ctx->temp_reg; 2534 alu.src[0].chan = 1; 2535 2536 alu.dst.sel = ctx->temp_reg; 2537 alu.dst.chan = i; 2538 if (i == 1) 2539 alu.dst.write = 1; 2540 if (i == 2) 2541 alu.last = 1; 2542 2543 r = r600_bc_add_alu(ctx->bc, &alu); 2544 if (r) 2545 return r; 2546 } 2547 } else { 2548 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2549 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2550 alu.src[0].sel = ctx->temp_reg; 2551 alu.src[0].chan = 1; 2552 2553 alu.dst.sel = ctx->temp_reg; 2554 alu.dst.chan = 1; 2555 alu.dst.write = 1; 2556 alu.last = 1; 2557 2558 r = r600_bc_add_alu(ctx->bc, &alu); 2559 if (r) 2560 return r; 2561 } 2562 2563 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2564 for (i = 0; i < 3; i++) { 2565 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2566 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2567 alu.src[0].sel = ctx->temp_reg; 2568 alu.src[0].chan = 1; 2569 2570 alu.dst.sel = ctx->temp_reg; 2571 alu.dst.chan = i; 2572 if (i == 1) 2573 alu.dst.write = 1; 2574 if (i == 2) 2575 alu.last = 1; 2576 2577 r = r600_bc_add_alu(ctx->bc, &alu); 2578 if (r) 2579 return r; 2580 } 2581 } else { 2582 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2583 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2584 alu.src[0].sel = ctx->temp_reg; 2585 alu.src[0].chan = 1; 2586 2587 alu.dst.sel = ctx->temp_reg; 2588 alu.dst.chan = 1; 2589 alu.dst.write = 1; 2590 alu.last = 1; 2591 2592 r = r600_bc_add_alu(ctx->bc, &alu); 2593 if (r) 2594 return r; 2595 } 2596 2597 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2598 2599 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2600 2601 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2602 2603 alu.src[1].sel = ctx->temp_reg; 2604 alu.src[1].chan = 1; 2605 2606 alu.dst.sel = ctx->temp_reg; 2607 alu.dst.chan = 1; 2608 alu.dst.write = 1; 2609 alu.last = 1; 2610 2611 r = r600_bc_add_alu(ctx->bc, &alu); 2612 if (r) 2613 return r; 2614 } 2615 2616 /* result.z = log2(src);*/ 2617 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2618 if (ctx->bc->chiprev == CHIPREV_CAYMAN) { 2619 for (i = 0; i < 3; i++) { 2620 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2621 2622 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2623 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2624 2625 alu.dst.sel = ctx->temp_reg; 2626 if (i == 2) 2627 alu.dst.write = 1; 2628 alu.dst.chan = i; 2629 if (i == 2) 2630 alu.last = 1; 2631 2632 r = r600_bc_add_alu(ctx->bc, &alu); 2633 if (r) 2634 return r; 2635 } 2636 } else { 2637 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2638 2639 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2640 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2641 2642 alu.dst.sel = ctx->temp_reg; 2643 alu.dst.write = 1; 2644 alu.dst.chan = 2; 2645 alu.last = 1; 2646 2647 r = r600_bc_add_alu(ctx->bc, &alu); 2648 if (r) 2649 return r; 2650 } 2651 } 2652 2653 /* result.w = 1.0; */ 2654 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2655 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2656 2657 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2658 alu.src[0].sel = V_SQ_ALU_SRC_1; 2659 alu.src[0].chan = 0; 2660 2661 alu.dst.sel = ctx->temp_reg; 2662 alu.dst.chan = 3; 2663 alu.dst.write = 1; 2664 alu.last = 1; 2665 2666 r = r600_bc_add_alu(ctx->bc, &alu); 2667 if (r) 2668 return r; 2669 } 2670 2671 return tgsi_helper_copy(ctx, inst); 2672} 2673 2674static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2675{ 2676 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2677 struct r600_bc_alu alu; 2678 int r; 2679 2680 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2681 2682 switch (inst->Instruction.Opcode) { 2683 case TGSI_OPCODE_ARL: 2684 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2685 break; 2686 case TGSI_OPCODE_ARR: 2687 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2688 break; 2689 default: 2690 assert(0); 2691 return -1; 2692 } 2693 2694 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2695 alu.last = 1; 2696 alu.dst.sel = ctx->ar_reg; 2697 alu.dst.write = 1; 2698 r = r600_bc_add_alu(ctx->bc, &alu); 2699 if (r) 2700 return r; 2701 2702 /* TODO: Note that the MOVA can be avoided if we never use AR for 2703 * indexing non-CB registers in the current ALU clause. Similarly, we 2704 * need to load AR from ar_reg again if we started a new clause 2705 * between ARL and AR usage. The easy way to do that is to remove 2706 * the MOVA here, and load it for the first AR access after ar_reg 2707 * has been modified in each clause. */ 2708 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2709 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2710 alu.src[0].sel = ctx->ar_reg; 2711 alu.src[0].chan = 0; 2712 alu.last = 1; 2713 r = r600_bc_add_alu(ctx->bc, &alu); 2714 if (r) 2715 return r; 2716 return 0; 2717} 2718static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2719{ 2720 /* TODO from r600c, ar values don't persist between clauses */ 2721 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2722 struct r600_bc_alu alu; 2723 int r; 2724 2725 switch (inst->Instruction.Opcode) { 2726 case TGSI_OPCODE_ARL: 2727 memset(&alu, 0, sizeof(alu)); 2728 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 2729 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2730 alu.dst.sel = ctx->ar_reg; 2731 alu.dst.write = 1; 2732 alu.last = 1; 2733 2734 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2735 return r; 2736 2737 memset(&alu, 0, sizeof(alu)); 2738 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2739 alu.src[0].sel = ctx->ar_reg; 2740 alu.dst.sel = ctx->ar_reg; 2741 alu.dst.write = 1; 2742 alu.last = 1; 2743 2744 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2745 return r; 2746 break; 2747 case TGSI_OPCODE_ARR: 2748 memset(&alu, 0, sizeof(alu)); 2749 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2750 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2751 alu.dst.sel = ctx->ar_reg; 2752 alu.dst.write = 1; 2753 alu.last = 1; 2754 2755 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2756 return r; 2757 break; 2758 default: 2759 assert(0); 2760 return -1; 2761 } 2762 2763 memset(&alu, 0, sizeof(alu)); 2764 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2765 alu.src[0].sel = ctx->ar_reg; 2766 alu.last = 1; 2767 2768 r = r600_bc_add_alu(ctx->bc, &alu); 2769 if (r) 2770 return r; 2771 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2772 return 0; 2773} 2774 2775static int tgsi_opdst(struct r600_shader_ctx *ctx) 2776{ 2777 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2778 struct r600_bc_alu alu; 2779 int i, r = 0; 2780 2781 for (i = 0; i < 4; i++) { 2782 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2783 2784 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2785 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2786 2787 if (i == 0 || i == 3) { 2788 alu.src[0].sel = V_SQ_ALU_SRC_1; 2789 } else { 2790 r600_bc_src(&alu.src[0], &ctx->src[0], i); 2791 } 2792 2793 if (i == 0 || i == 2) { 2794 alu.src[1].sel = V_SQ_ALU_SRC_1; 2795 } else { 2796 r600_bc_src(&alu.src[1], &ctx->src[1], i); 2797 } 2798 if (i == 3) 2799 alu.last = 1; 2800 r = r600_bc_add_alu(ctx->bc, &alu); 2801 if (r) 2802 return r; 2803 } 2804 return 0; 2805} 2806 2807static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2808{ 2809 struct r600_bc_alu alu; 2810 int r; 2811 2812 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2813 alu.inst = opcode; 2814 alu.predicate = 1; 2815 2816 alu.dst.sel = ctx->temp_reg; 2817 alu.dst.write = 1; 2818 alu.dst.chan = 0; 2819 2820 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2821 alu.src[1].sel = V_SQ_ALU_SRC_0; 2822 alu.src[1].chan = 0; 2823 2824 alu.last = 1; 2825 2826 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2827 if (r) 2828 return r; 2829 return 0; 2830} 2831 2832static int pops(struct r600_shader_ctx *ctx, int pops) 2833{ 2834 int alu_pop = 3; 2835 if (ctx->bc->cf_last) { 2836 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) 2837 alu_pop = 0; 2838 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) 2839 alu_pop = 1; 2840 } 2841 alu_pop += pops; 2842 if (alu_pop == 1) { 2843 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; 2844 ctx->bc->force_add_cf = 1; 2845 } else if (alu_pop == 2) { 2846 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; 2847 ctx->bc->force_add_cf = 1; 2848 } else { 2849 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2850 ctx->bc->cf_last->pop_count = pops; 2851 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 2852 } 2853 return 0; 2854} 2855 2856static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2857{ 2858 switch(reason) { 2859 case FC_PUSH_VPM: 2860 ctx->bc->callstack[ctx->bc->call_sp].current--; 2861 break; 2862 case FC_PUSH_WQM: 2863 case FC_LOOP: 2864 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2865 break; 2866 case FC_REP: 2867 /* TOODO : for 16 vp asic should -= 2; */ 2868 ctx->bc->callstack[ctx->bc->call_sp].current --; 2869 break; 2870 } 2871} 2872 2873static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2874{ 2875 if (check_max_only) { 2876 int diff; 2877 switch (reason) { 2878 case FC_PUSH_VPM: 2879 diff = 1; 2880 break; 2881 case FC_PUSH_WQM: 2882 diff = 4; 2883 break; 2884 default: 2885 assert(0); 2886 diff = 0; 2887 } 2888 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2889 ctx->bc->callstack[ctx->bc->call_sp].max) { 2890 ctx->bc->callstack[ctx->bc->call_sp].max = 2891 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2892 } 2893 return; 2894 } 2895 switch (reason) { 2896 case FC_PUSH_VPM: 2897 ctx->bc->callstack[ctx->bc->call_sp].current++; 2898 break; 2899 case FC_PUSH_WQM: 2900 case FC_LOOP: 2901 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2902 break; 2903 case FC_REP: 2904 ctx->bc->callstack[ctx->bc->call_sp].current++; 2905 break; 2906 } 2907 2908 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2909 ctx->bc->callstack[ctx->bc->call_sp].max) { 2910 ctx->bc->callstack[ctx->bc->call_sp].max = 2911 ctx->bc->callstack[ctx->bc->call_sp].current; 2912 } 2913} 2914 2915static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2916{ 2917 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2918 2919 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2920 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2921 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2922 sp->num_mid++; 2923} 2924 2925static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2926{ 2927 ctx->bc->fc_sp++; 2928 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2929 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2930} 2931 2932static void fc_poplevel(struct r600_shader_ctx *ctx) 2933{ 2934 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2935 if (sp->mid) { 2936 free(sp->mid); 2937 sp->mid = NULL; 2938 } 2939 sp->num_mid = 0; 2940 sp->start = NULL; 2941 sp->type = 0; 2942 ctx->bc->fc_sp--; 2943} 2944 2945#if 0 2946static int emit_return(struct r600_shader_ctx *ctx) 2947{ 2948 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2949 return 0; 2950} 2951 2952static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2953{ 2954 2955 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2956 ctx->bc->cf_last->pop_count = pops; 2957 /* TODO work out offset */ 2958 return 0; 2959} 2960 2961static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2962{ 2963 return 0; 2964} 2965 2966static void emit_testflag(struct r600_shader_ctx *ctx) 2967{ 2968 2969} 2970 2971static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2972{ 2973 emit_testflag(ctx); 2974 emit_jump_to_offset(ctx, 1, 4); 2975 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2976 pops(ctx, ifidx + 1); 2977 emit_return(ctx); 2978} 2979 2980static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2981{ 2982 emit_testflag(ctx); 2983 2984 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2985 ctx->bc->cf_last->pop_count = 1; 2986 2987 fc_set_mid(ctx, fc_sp); 2988 2989 pops(ctx, 1); 2990} 2991#endif 2992 2993static int tgsi_if(struct r600_shader_ctx *ctx) 2994{ 2995 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2996 2997 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2998 2999 fc_pushlevel(ctx, FC_IF); 3000 3001 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 3002 return 0; 3003} 3004 3005static int tgsi_else(struct r600_shader_ctx *ctx) 3006{ 3007 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 3008 ctx->bc->cf_last->pop_count = 1; 3009 3010 fc_set_mid(ctx, ctx->bc->fc_sp); 3011 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 3012 return 0; 3013} 3014 3015static int tgsi_endif(struct r600_shader_ctx *ctx) 3016{ 3017 pops(ctx, 1); 3018 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 3019 R600_ERR("if/endif unbalanced in shader\n"); 3020 return -1; 3021 } 3022 3023 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 3024 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3025 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 3026 } else { 3027 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 3028 } 3029 fc_poplevel(ctx); 3030 3031 callstack_decrease_current(ctx, FC_PUSH_VPM); 3032 return 0; 3033} 3034 3035static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 3036{ 3037 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 3038 3039 fc_pushlevel(ctx, FC_LOOP); 3040 3041 /* check stack depth */ 3042 callstack_check_depth(ctx, FC_LOOP, 0); 3043 return 0; 3044} 3045 3046static int tgsi_endloop(struct r600_shader_ctx *ctx) 3047{ 3048 int i; 3049 3050 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 3051 3052 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 3053 R600_ERR("loop/endloop in shader code are not paired.\n"); 3054 return -EINVAL; 3055 } 3056 3057 /* fixup loop pointers - from r600isa 3058 LOOP END points to CF after LOOP START, 3059 LOOP START point to CF after LOOP END 3060 BRK/CONT point to LOOP END CF 3061 */ 3062 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 3063 3064 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3065 3066 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 3067 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 3068 } 3069 /* TODO add LOOPRET support */ 3070 fc_poplevel(ctx); 3071 callstack_decrease_current(ctx, FC_LOOP); 3072 return 0; 3073} 3074 3075static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 3076{ 3077 unsigned int fscp; 3078 3079 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 3080 { 3081 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 3082 break; 3083 } 3084 3085 if (fscp == 0) { 3086 R600_ERR("Break not inside loop/endloop pair\n"); 3087 return -EINVAL; 3088 } 3089 3090 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3091 ctx->bc->cf_last->pop_count = 1; 3092 3093 fc_set_mid(ctx, fscp); 3094 3095 pops(ctx, 1); 3096 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 3097 return 0; 3098} 3099 3100static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 3101 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3102 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3103 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3104 3105 /* FIXME: 3106 * For state trackers other than OpenGL, we'll want to use 3107 * _RECIP_IEEE instead. 3108 */ 3109 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 3110 3111 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 3112 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3113 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3114 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3115 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3116 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3117 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3118 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3119 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3120 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3121 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3122 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3123 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3124 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3125 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3126 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3127 /* gap */ 3128 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3129 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3130 /* gap */ 3131 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3132 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3133 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3134 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3135 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3136 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3137 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3138 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3139 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3140 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3141 /* gap */ 3142 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3143 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3144 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3145 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3146 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3147 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3148 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3149 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3150 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3151 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3152 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3153 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3154 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3155 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3156 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3157 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3158 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3159 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3160 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3161 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3162 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3163 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3164 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3165 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3166 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3167 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3168 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3169 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3170 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3171 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3172 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3173 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3174 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3175 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3176 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3177 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3178 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3179 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3180 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3181 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3182 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3183 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3184 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3185 /* gap */ 3186 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3187 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3188 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3189 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3190 /* gap */ 3191 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3192 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3193 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3194 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3195 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3196 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3197 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3198 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3199 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3200 /* gap */ 3201 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3202 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3203 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3204 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3205 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3206 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3207 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3208 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3209 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3210 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3211 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3212 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3213 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3214 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3215 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3216 /* gap */ 3217 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3218 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3219 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3220 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3221 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3222 /* gap */ 3223 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3224 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3225 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3226 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3227 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3228 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3229 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3230 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3231 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3232 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3233 /* gap */ 3234 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3235 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3236 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3237 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3238 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3239 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3240 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3241 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3242 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3243 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3244 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3245 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3246 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3247 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3248 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3249 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3250 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3251 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3252 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3253 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3254 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3255 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3256 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3257 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3258 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3259 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3260 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3261 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3262}; 3263 3264static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 3265 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3266 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3267 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3268 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3269 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, 3270 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3271 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3272 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3273 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3274 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3275 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3276 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3277 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3278 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3279 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3280 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3281 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3282 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3283 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3284 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3285 /* gap */ 3286 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3287 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3288 /* gap */ 3289 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3290 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3291 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3292 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3293 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3294 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3295 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3296 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3297 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3298 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3299 /* gap */ 3300 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3301 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3302 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3303 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3304 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3305 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3306 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3307 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3308 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3309 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3310 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3311 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3312 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3313 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3314 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3315 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3316 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3317 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3318 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3319 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3320 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3321 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3322 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3323 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3324 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3325 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3326 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3327 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3328 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3329 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3330 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3331 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3332 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3333 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3334 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3335 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3336 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3337 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3338 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3339 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3340 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3341 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3342 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3343 /* gap */ 3344 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3345 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3346 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3347 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3348 /* gap */ 3349 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3350 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3351 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3352 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3353 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3354 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3355 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3356 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3357 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3358 /* gap */ 3359 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3360 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3361 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3362 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3363 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3364 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3365 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3366 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3367 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3368 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3369 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3370 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3371 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3372 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3373 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3374 /* gap */ 3375 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3376 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3377 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3378 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3379 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3380 /* gap */ 3381 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3382 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3383 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3384 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3385 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3386 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3387 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3388 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3389 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3390 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3391 /* gap */ 3392 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3393 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3394 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3395 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3396 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3397 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3398 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3399 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3400 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3401 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3402 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3403 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3404 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3405 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3406 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3407 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3408 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3409 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3410 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3411 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3412 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3413 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3414 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3415 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3416 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3417 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3418 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3419 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3420}; 3421 3422static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 3423 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3424 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3425 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3426 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, 3427 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, 3428 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3429 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3430 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3431 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3432 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3433 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3434 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3435 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3436 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3437 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3438 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3439 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3440 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3441 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3442 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3443 /* gap */ 3444 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3445 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3446 /* gap */ 3447 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3448 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3449 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3450 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3451 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3452 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3453 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, 3454 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, 3455 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, 3456 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3457 /* gap */ 3458 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3459 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3460 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3461 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3462 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, 3463 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3464 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3465 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3466 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3467 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3468 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3469 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3470 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3471 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3472 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3473 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3474 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, 3475 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3476 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3477 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3478 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3479 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3480 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3481 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3482 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3483 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3484 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3485 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3486 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3487 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3488 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3489 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3490 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3491 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3492 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3493 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3494 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3495 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3496 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3497 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3498 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3499 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3500 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3501 /* gap */ 3502 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3503 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3504 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3505 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3506 /* gap */ 3507 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3508 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3509 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3510 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3511 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3512 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3513 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3514 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3515 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3516 /* gap */ 3517 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3518 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3519 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3520 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3521 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3522 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3523 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3524 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3525 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3526 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3527 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3528 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3529 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3530 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3531 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3532 /* gap */ 3533 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3534 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3535 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3536 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3537 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3538 /* gap */ 3539 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3540 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3541 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3542 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3543 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3544 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3545 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3546 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3547 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3548 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3549 /* gap */ 3550 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3551 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3552 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3553 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3554 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3555 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3556 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3557 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3558 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3559 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3560 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3561 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3562 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3563 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3564 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3565 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3566 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3567 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3568 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3569 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3570 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3571 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3572 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3573 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3574 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3575 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3576 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3577 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3578}; 3579