r600_shader.c revision f8279fb9d82cbbbbaf8a5cc26486142c21d4d2d2
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_info.h" 25#include "tgsi/tgsi_parse.h" 26#include "tgsi/tgsi_scan.h" 27#include "tgsi/tgsi_dump.h" 28#include "util/u_format.h" 29#include "r600_pipe.h" 30#include "r600_asm.h" 31#include "r600_sq.h" 32#include "r600_formats.h" 33#include "r600_opcodes.h" 34#include "r600d.h" 35#include <stdio.h> 36#include <errno.h> 37#include <byteswap.h> 38 39int r600_find_vs_semantic_index(struct r600_shader *vs, 40 struct r600_shader *ps, int id) 41{ 42 struct r600_shader_io *input = &ps->input[id]; 43 44 for (int i = 0; i < vs->noutput; i++) { 45 if (input->name == vs->output[i].name && 46 input->sid == vs->output[i].sid) { 47 return i - 1; 48 } 49 } 50 return 0; 51} 52 53static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 54{ 55 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 56 struct r600_shader *rshader = &shader->shader; 57 uint32_t *ptr; 58 int i; 59 60 /* copy new shader */ 61 if (shader->bo == NULL) { 62 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); 63 if (shader->bo == NULL) { 64 return -ENOMEM; 65 } 66 ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 67 if (R600_BIG_ENDIAN) { 68 for (i = 0; i < rshader->bc.ndw; ++i) { 69 ptr[i] = bswap_32(rshader->bc.bytecode[i]); 70 } 71 } else { 72 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr)); 73 } 74 r600_bo_unmap(rctx->radeon, shader->bo); 75 } 76 /* build state */ 77 switch (rshader->processor_type) { 78 case TGSI_PROCESSOR_VERTEX: 79 if (rshader->family >= CHIP_CEDAR) { 80 evergreen_pipe_shader_vs(ctx, shader); 81 } else { 82 r600_pipe_shader_vs(ctx, shader); 83 } 84 break; 85 case TGSI_PROCESSOR_FRAGMENT: 86 if (rshader->family >= CHIP_CEDAR) { 87 evergreen_pipe_shader_ps(ctx, shader); 88 } else { 89 r600_pipe_shader_ps(ctx, shader); 90 } 91 break; 92 default: 93 return -EINVAL; 94 } 95 return 0; 96} 97 98static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 99 100int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 101{ 102 static int dump_shaders = -1; 103 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 104 int r; 105 106 /* Would like some magic "get_bool_option_once" routine. 107 */ 108 if (dump_shaders == -1) 109 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 110 111 if (dump_shaders) { 112 fprintf(stderr, "--------------------------------------------------------------\n"); 113 tgsi_dump(tokens, 0); 114 } 115 shader->shader.family = r600_get_family(rctx->radeon); 116 r = r600_shader_from_tgsi(tokens, &shader->shader); 117 if (r) { 118 R600_ERR("translation from TGSI failed !\n"); 119 return r; 120 } 121 r = r600_bc_build(&shader->shader.bc); 122 if (r) { 123 R600_ERR("building bytecode failed !\n"); 124 return r; 125 } 126 if (dump_shaders) { 127 r600_bc_dump(&shader->shader.bc); 128 fprintf(stderr, "______________________________________________________________\n"); 129 } 130 return r600_pipe_shader(ctx, shader); 131} 132 133void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 134{ 135 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 136 137 r600_bo_reference(rctx->radeon, &shader->bo, NULL); 138 r600_bc_clear(&shader->shader.bc); 139} 140 141/* 142 * tgsi -> r600 shader 143 */ 144struct r600_shader_tgsi_instruction; 145 146struct r600_shader_src { 147 unsigned sel; 148 unsigned swizzle[4]; 149 unsigned neg; 150 unsigned abs; 151 unsigned rel; 152 uint32_t value[4]; 153}; 154 155struct r600_shader_ctx { 156 struct tgsi_shader_info info; 157 struct tgsi_parse_context parse; 158 const struct tgsi_token *tokens; 159 unsigned type; 160 unsigned file_offset[TGSI_FILE_COUNT]; 161 unsigned temp_reg; 162 unsigned ar_reg; 163 struct r600_shader_tgsi_instruction *inst_info; 164 struct r600_bc *bc; 165 struct r600_shader *shader; 166 struct r600_shader_src src[3]; 167 u32 *literals; 168 u32 nliterals; 169 u32 max_driver_temp_used; 170 /* needed for evergreen interpolation */ 171 boolean input_centroid; 172 boolean input_linear; 173 boolean input_perspective; 174 int num_interp_gpr; 175}; 176 177struct r600_shader_tgsi_instruction { 178 unsigned tgsi_opcode; 179 unsigned is_op3; 180 unsigned r600_opcode; 181 int (*process)(struct r600_shader_ctx *ctx); 182}; 183 184static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 185static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 186 187static int tgsi_is_supported(struct r600_shader_ctx *ctx) 188{ 189 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 190 int j; 191 192 if (i->Instruction.NumDstRegs > 1) { 193 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 194 return -EINVAL; 195 } 196 if (i->Instruction.Predicate) { 197 R600_ERR("predicate unsupported\n"); 198 return -EINVAL; 199 } 200#if 0 201 if (i->Instruction.Label) { 202 R600_ERR("label unsupported\n"); 203 return -EINVAL; 204 } 205#endif 206 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 207 if (i->Src[j].Register.Dimension) { 208 R600_ERR("unsupported src %d (dimension %d)\n", j, 209 i->Src[j].Register.Dimension); 210 return -EINVAL; 211 } 212 } 213 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 214 if (i->Dst[j].Register.Dimension) { 215 R600_ERR("unsupported dst (dimension)\n"); 216 return -EINVAL; 217 } 218 } 219 return 0; 220} 221 222static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 223{ 224 int i, r; 225 struct r600_bc_alu alu; 226 int gpr = 0, base_chan = 0; 227 int ij_index = 0; 228 229 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 230 ij_index = 0; 231 if (ctx->shader->input[input].centroid) 232 ij_index++; 233 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 234 ij_index = 0; 235 /* if we have perspective add one */ 236 if (ctx->input_perspective) { 237 ij_index++; 238 /* if we have perspective centroid */ 239 if (ctx->input_centroid) 240 ij_index++; 241 } 242 if (ctx->shader->input[input].centroid) 243 ij_index++; 244 } 245 246 /* work out gpr and base_chan from index */ 247 gpr = ij_index / 2; 248 base_chan = (2 * (ij_index % 2)) + 1; 249 250 for (i = 0; i < 8; i++) { 251 memset(&alu, 0, sizeof(struct r600_bc_alu)); 252 253 if (i < 4) 254 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 255 else 256 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 257 258 if ((i > 1) && (i < 6)) { 259 alu.dst.sel = ctx->shader->input[input].gpr; 260 alu.dst.write = 1; 261 } 262 263 alu.dst.chan = i % 4; 264 265 alu.src[0].sel = gpr; 266 alu.src[0].chan = (base_chan - (i % 2)); 267 268 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 269 270 alu.bank_swizzle_force = SQ_ALU_VEC_210; 271 if ((i % 4) == 3) 272 alu.last = 1; 273 r = r600_bc_add_alu(ctx->bc, &alu); 274 if (r) 275 return r; 276 } 277 return 0; 278} 279 280 281static int tgsi_declaration(struct r600_shader_ctx *ctx) 282{ 283 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 284 unsigned i; 285 int r; 286 287 switch (d->Declaration.File) { 288 case TGSI_FILE_INPUT: 289 i = ctx->shader->ninput++; 290 ctx->shader->input[i].name = d->Semantic.Name; 291 ctx->shader->input[i].sid = d->Semantic.Index; 292 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 293 ctx->shader->input[i].centroid = d->Declaration.Centroid; 294 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 295 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { 296 /* turn input into interpolate on EG */ 297 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 298 if (ctx->shader->input[i].interpolate > 0) { 299 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 300 evergreen_interp_alu(ctx, i); 301 } 302 } 303 } 304 break; 305 case TGSI_FILE_OUTPUT: 306 i = ctx->shader->noutput++; 307 ctx->shader->output[i].name = d->Semantic.Name; 308 ctx->shader->output[i].sid = d->Semantic.Index; 309 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 310 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 311 break; 312 case TGSI_FILE_CONSTANT: 313 case TGSI_FILE_TEMPORARY: 314 case TGSI_FILE_SAMPLER: 315 case TGSI_FILE_ADDRESS: 316 break; 317 318 case TGSI_FILE_SYSTEM_VALUE: 319 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 320 struct r600_bc_alu alu; 321 memset(&alu, 0, sizeof(struct r600_bc_alu)); 322 323 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 324 alu.src[0].sel = 0; 325 alu.src[0].chan = 3; 326 327 alu.dst.sel = 0; 328 alu.dst.chan = 3; 329 alu.dst.write = 1; 330 alu.last = 1; 331 332 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 333 return r; 334 break; 335 } 336 337 default: 338 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 339 return -EINVAL; 340 } 341 return 0; 342} 343 344static int r600_get_temp(struct r600_shader_ctx *ctx) 345{ 346 return ctx->temp_reg + ctx->max_driver_temp_used++; 347} 348 349/* 350 * for evergreen we need to scan the shader to find the number of GPRs we need to 351 * reserve for interpolation. 352 * 353 * we need to know if we are going to emit 354 * any centroid inputs 355 * if perspective and linear are required 356*/ 357static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 358{ 359 int i; 360 int num_baryc; 361 362 ctx->input_linear = FALSE; 363 ctx->input_perspective = FALSE; 364 ctx->input_centroid = FALSE; 365 ctx->num_interp_gpr = 1; 366 367 /* any centroid inputs */ 368 for (i = 0; i < ctx->info.num_inputs; i++) { 369 /* skip position/face */ 370 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 371 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 372 continue; 373 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 374 ctx->input_linear = TRUE; 375 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 376 ctx->input_perspective = TRUE; 377 if (ctx->info.input_centroid[i]) 378 ctx->input_centroid = TRUE; 379 } 380 381 num_baryc = 0; 382 /* ignoring sample for now */ 383 if (ctx->input_perspective) 384 num_baryc++; 385 if (ctx->input_linear) 386 num_baryc++; 387 if (ctx->input_centroid) 388 num_baryc *= 2; 389 390 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 391 392 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 393 return ctx->num_interp_gpr; 394} 395 396static void tgsi_src(struct r600_shader_ctx *ctx, 397 const struct tgsi_full_src_register *tgsi_src, 398 struct r600_shader_src *r600_src) 399{ 400 memset(r600_src, 0, sizeof(*r600_src)); 401 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 402 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 403 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 404 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 405 r600_src->neg = tgsi_src->Register.Negate; 406 r600_src->abs = tgsi_src->Register.Absolute; 407 408 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 409 int index; 410 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 411 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 412 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 413 414 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 415 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 416 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 417 return; 418 } 419 index = tgsi_src->Register.Index; 420 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 421 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 422 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 423 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ 424 r600_src->swizzle[0] = 3; 425 r600_src->swizzle[1] = 3; 426 r600_src->swizzle[2] = 3; 427 r600_src->swizzle[3] = 3; 428 r600_src->sel = 0; 429 } else { 430 if (tgsi_src->Register.Indirect) 431 r600_src->rel = V_SQ_REL_RELATIVE; 432 r600_src->sel = tgsi_src->Register.Index; 433 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 434 } 435} 436 437static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 438{ 439 struct r600_bc_vtx vtx; 440 unsigned int ar_reg; 441 int r; 442 443 if (offset) { 444 struct r600_bc_alu alu; 445 446 memset(&alu, 0, sizeof(alu)); 447 448 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 449 alu.src[0].sel = ctx->ar_reg; 450 451 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 452 alu.src[1].value = offset; 453 454 alu.dst.sel = dst_reg; 455 alu.dst.write = 1; 456 alu.last = 1; 457 458 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 459 return r; 460 461 ar_reg = dst_reg; 462 } else { 463 ar_reg = ctx->ar_reg; 464 } 465 466 memset(&vtx, 0, sizeof(vtx)); 467 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 468 vtx.src_gpr = ar_reg; 469 vtx.mega_fetch_count = 16; 470 vtx.dst_gpr = dst_reg; 471 vtx.dst_sel_x = 0; /* SEL_X */ 472 vtx.dst_sel_y = 1; /* SEL_Y */ 473 vtx.dst_sel_z = 2; /* SEL_Z */ 474 vtx.dst_sel_w = 3; /* SEL_W */ 475 vtx.data_format = FMT_32_32_32_32_FLOAT; 476 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 477 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 478 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 479 vtx.endian = r600_endian_swap(32); 480 481 if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) 482 return r; 483 484 return 0; 485} 486 487static int tgsi_split_constant(struct r600_shader_ctx *ctx) 488{ 489 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 490 struct r600_bc_alu alu; 491 int i, j, k, nconst, r; 492 493 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 494 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 495 nconst++; 496 } 497 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 498 } 499 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 500 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 501 continue; 502 } 503 504 if (ctx->src[i].rel) { 505 int treg = r600_get_temp(ctx); 506 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 507 return r; 508 509 ctx->src[i].sel = treg; 510 ctx->src[i].rel = 0; 511 j--; 512 } else if (j > 0) { 513 int treg = r600_get_temp(ctx); 514 for (k = 0; k < 4; k++) { 515 memset(&alu, 0, sizeof(struct r600_bc_alu)); 516 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 517 alu.src[0].sel = ctx->src[i].sel; 518 alu.src[0].chan = k; 519 alu.src[0].rel = ctx->src[i].rel; 520 alu.dst.sel = treg; 521 alu.dst.chan = k; 522 alu.dst.write = 1; 523 if (k == 3) 524 alu.last = 1; 525 r = r600_bc_add_alu(ctx->bc, &alu); 526 if (r) 527 return r; 528 } 529 ctx->src[i].sel = treg; 530 ctx->src[i].rel =0; 531 j--; 532 } 533 } 534 return 0; 535} 536 537/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 538static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 539{ 540 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 541 struct r600_bc_alu alu; 542 int i, j, k, nliteral, r; 543 544 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 545 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 546 nliteral++; 547 } 548 } 549 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 550 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 551 int treg = r600_get_temp(ctx); 552 for (k = 0; k < 4; k++) { 553 memset(&alu, 0, sizeof(struct r600_bc_alu)); 554 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 555 alu.src[0].sel = ctx->src[i].sel; 556 alu.src[0].chan = k; 557 alu.src[0].value = ctx->src[i].value[k]; 558 alu.dst.sel = treg; 559 alu.dst.chan = k; 560 alu.dst.write = 1; 561 if (k == 3) 562 alu.last = 1; 563 r = r600_bc_add_alu(ctx->bc, &alu); 564 if (r) 565 return r; 566 } 567 ctx->src[i].sel = treg; 568 j--; 569 } 570 } 571 return 0; 572} 573 574static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 575{ 576 struct tgsi_full_immediate *immediate; 577 struct tgsi_full_property *property; 578 struct r600_shader_ctx ctx; 579 struct r600_bc_output output[32]; 580 unsigned output_done, noutput; 581 unsigned opcode; 582 int i, r = 0, pos0; 583 584 ctx.bc = &shader->bc; 585 ctx.shader = shader; 586 r = r600_bc_init(ctx.bc, shader->family); 587 if (r) 588 return r; 589 ctx.tokens = tokens; 590 tgsi_scan_shader(tokens, &ctx.info); 591 tgsi_parse_init(&ctx.parse, tokens); 592 ctx.type = ctx.parse.FullHeader.Processor.Processor; 593 shader->processor_type = ctx.type; 594 ctx.bc->type = shader->processor_type; 595 596 /* register allocations */ 597 /* Values [0,127] correspond to GPR[0..127]. 598 * Values [128,159] correspond to constant buffer bank 0 599 * Values [160,191] correspond to constant buffer bank 1 600 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 601 * Values [256,287] correspond to constant buffer bank 2 (EG) 602 * Values [288,319] correspond to constant buffer bank 3 (EG) 603 * Other special values are shown in the list below. 604 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 605 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 606 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 607 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 608 * 248 SQ_ALU_SRC_0: special constant 0.0. 609 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 610 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 611 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 612 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 613 * 253 SQ_ALU_SRC_LITERAL: literal constant. 614 * 254 SQ_ALU_SRC_PV: previous vector result. 615 * 255 SQ_ALU_SRC_PS: previous scalar result. 616 */ 617 for (i = 0; i < TGSI_FILE_COUNT; i++) { 618 ctx.file_offset[i] = 0; 619 } 620 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 621 ctx.file_offset[TGSI_FILE_INPUT] = 1; 622 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { 623 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 624 } else { 625 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 626 } 627 } 628 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) { 629 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 630 } 631 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 632 ctx.info.file_count[TGSI_FILE_INPUT]; 633 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 634 ctx.info.file_count[TGSI_FILE_OUTPUT]; 635 636 /* Outside the GPR range. This will be translated to one of the 637 * kcache banks later. */ 638 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 639 640 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 641 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 642 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 643 ctx.temp_reg = ctx.ar_reg + 1; 644 645 ctx.nliterals = 0; 646 ctx.literals = NULL; 647 shader->fs_write_all = FALSE; 648 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 649 tgsi_parse_token(&ctx.parse); 650 switch (ctx.parse.FullToken.Token.Type) { 651 case TGSI_TOKEN_TYPE_IMMEDIATE: 652 immediate = &ctx.parse.FullToken.FullImmediate; 653 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 654 if(ctx.literals == NULL) { 655 r = -ENOMEM; 656 goto out_err; 657 } 658 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 659 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 660 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 661 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 662 ctx.nliterals++; 663 break; 664 case TGSI_TOKEN_TYPE_DECLARATION: 665 r = tgsi_declaration(&ctx); 666 if (r) 667 goto out_err; 668 break; 669 case TGSI_TOKEN_TYPE_INSTRUCTION: 670 r = tgsi_is_supported(&ctx); 671 if (r) 672 goto out_err; 673 ctx.max_driver_temp_used = 0; 674 /* reserve first tmp for everyone */ 675 r600_get_temp(&ctx); 676 677 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 678 if ((r = tgsi_split_constant(&ctx))) 679 goto out_err; 680 if ((r = tgsi_split_literal_constant(&ctx))) 681 goto out_err; 682 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) 683 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 684 else 685 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 686 r = ctx.inst_info->process(&ctx); 687 if (r) 688 goto out_err; 689 break; 690 case TGSI_TOKEN_TYPE_PROPERTY: 691 property = &ctx.parse.FullToken.FullProperty; 692 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { 693 if (property->u[0].Data == 1) 694 shader->fs_write_all = TRUE; 695 } 696 break; 697 default: 698 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 699 r = -EINVAL; 700 goto out_err; 701 } 702 } 703 /* export output */ 704 noutput = shader->noutput; 705 for (i = 0, pos0 = 0; i < noutput; i++) { 706 memset(&output[i], 0, sizeof(struct r600_bc_output)); 707 output[i].gpr = shader->output[i].gpr; 708 output[i].elem_size = 3; 709 output[i].swizzle_x = 0; 710 output[i].swizzle_y = 1; 711 output[i].swizzle_z = 2; 712 output[i].swizzle_w = 3; 713 output[i].burst_count = 1; 714 output[i].barrier = 1; 715 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 716 output[i].array_base = i - pos0; 717 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 718 switch (ctx.type) { 719 case TGSI_PROCESSOR_VERTEX: 720 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 721 output[i].array_base = 60; 722 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 723 /* position doesn't count in array_base */ 724 pos0++; 725 } 726 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 727 output[i].array_base = 61; 728 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 729 /* position doesn't count in array_base */ 730 pos0++; 731 } 732 break; 733 case TGSI_PROCESSOR_FRAGMENT: 734 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 735 output[i].array_base = shader->output[i].sid; 736 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 737 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 738 output[i].array_base = 61; 739 output[i].swizzle_x = 2; 740 output[i].swizzle_y = 7; 741 output[i].swizzle_z = output[i].swizzle_w = 7; 742 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 743 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 744 output[i].array_base = 61; 745 output[i].swizzle_x = 7; 746 output[i].swizzle_y = 1; 747 output[i].swizzle_z = output[i].swizzle_w = 7; 748 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 749 } else { 750 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 751 r = -EINVAL; 752 goto out_err; 753 } 754 break; 755 default: 756 R600_ERR("unsupported processor type %d\n", ctx.type); 757 r = -EINVAL; 758 goto out_err; 759 } 760 } 761 /* add fake param output for vertex shader if no param is exported */ 762 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 763 for (i = 0, pos0 = 0; i < noutput; i++) { 764 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 765 pos0 = 1; 766 break; 767 } 768 } 769 if (!pos0) { 770 memset(&output[i], 0, sizeof(struct r600_bc_output)); 771 output[i].gpr = 0; 772 output[i].elem_size = 3; 773 output[i].swizzle_x = 0; 774 output[i].swizzle_y = 1; 775 output[i].swizzle_z = 2; 776 output[i].swizzle_w = 3; 777 output[i].burst_count = 1; 778 output[i].barrier = 1; 779 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 780 output[i].array_base = 0; 781 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 782 noutput++; 783 } 784 } 785 /* add fake pixel export */ 786 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 787 memset(&output[0], 0, sizeof(struct r600_bc_output)); 788 output[0].gpr = 0; 789 output[0].elem_size = 3; 790 output[0].swizzle_x = 7; 791 output[0].swizzle_y = 7; 792 output[0].swizzle_z = 7; 793 output[0].swizzle_w = 7; 794 output[0].burst_count = 1; 795 output[0].barrier = 1; 796 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 797 output[0].array_base = 0; 798 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 799 noutput++; 800 } 801 /* set export done on last export of each type */ 802 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 803 if (i == (noutput - 1)) { 804 output[i].end_of_program = 1; 805 } 806 if (!(output_done & (1 << output[i].type))) { 807 output_done |= (1 << output[i].type); 808 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 809 } 810 } 811 /* add output to bytecode */ 812 for (i = 0; i < noutput; i++) { 813 r = r600_bc_add_output(ctx.bc, &output[i]); 814 if (r) 815 goto out_err; 816 } 817 free(ctx.literals); 818 tgsi_parse_free(&ctx.parse); 819 return 0; 820out_err: 821 free(ctx.literals); 822 tgsi_parse_free(&ctx.parse); 823 return r; 824} 825 826static int tgsi_unsupported(struct r600_shader_ctx *ctx) 827{ 828 R600_ERR("%s tgsi opcode unsupported\n", 829 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); 830 return -EINVAL; 831} 832 833static int tgsi_end(struct r600_shader_ctx *ctx) 834{ 835 return 0; 836} 837 838static void r600_bc_src(struct r600_bc_alu_src *bc_src, 839 const struct r600_shader_src *shader_src, 840 unsigned chan) 841{ 842 bc_src->sel = shader_src->sel; 843 bc_src->chan = shader_src->swizzle[chan]; 844 bc_src->neg = shader_src->neg; 845 bc_src->abs = shader_src->abs; 846 bc_src->rel = shader_src->rel; 847 bc_src->value = shader_src->value[bc_src->chan]; 848} 849 850static void tgsi_dst(struct r600_shader_ctx *ctx, 851 const struct tgsi_full_dst_register *tgsi_dst, 852 unsigned swizzle, 853 struct r600_bc_alu_dst *r600_dst) 854{ 855 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 856 857 r600_dst->sel = tgsi_dst->Register.Index; 858 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 859 r600_dst->chan = swizzle; 860 r600_dst->write = 1; 861 if (tgsi_dst->Register.Indirect) 862 r600_dst->rel = V_SQ_REL_RELATIVE; 863 if (inst->Instruction.Saturate) { 864 r600_dst->clamp = 1; 865 } 866} 867 868static int tgsi_last_instruction(unsigned writemask) 869{ 870 int i, lasti = 0; 871 872 for (i = 0; i < 4; i++) { 873 if (writemask & (1 << i)) { 874 lasti = i; 875 } 876 } 877 return lasti; 878} 879 880static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 881{ 882 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 883 struct r600_bc_alu alu; 884 int i, j, r; 885 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 886 887 for (i = 0; i < lasti + 1; i++) { 888 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 889 continue; 890 891 memset(&alu, 0, sizeof(struct r600_bc_alu)); 892 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 893 894 alu.inst = ctx->inst_info->r600_opcode; 895 if (!swap) { 896 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 897 r600_bc_src(&alu.src[j], &ctx->src[j], i); 898 } 899 } else { 900 r600_bc_src(&alu.src[0], &ctx->src[1], i); 901 r600_bc_src(&alu.src[1], &ctx->src[0], i); 902 } 903 /* handle some special cases */ 904 switch (ctx->inst_info->tgsi_opcode) { 905 case TGSI_OPCODE_SUB: 906 alu.src[1].neg = 1; 907 break; 908 case TGSI_OPCODE_ABS: 909 alu.src[0].abs = 1; 910 if (alu.src[0].neg) 911 alu.src[0].neg = 0; 912 break; 913 default: 914 break; 915 } 916 if (i == lasti) { 917 alu.last = 1; 918 } 919 r = r600_bc_add_alu(ctx->bc, &alu); 920 if (r) 921 return r; 922 } 923 return 0; 924} 925 926static int tgsi_op2(struct r600_shader_ctx *ctx) 927{ 928 return tgsi_op2_s(ctx, 0); 929} 930 931static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 932{ 933 return tgsi_op2_s(ctx, 1); 934} 935 936/* 937 * r600 - trunc to -PI..PI range 938 * r700 - normalize by dividing by 2PI 939 * see fdo bug 27901 940 */ 941static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 942{ 943 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 944 static float double_pi = 3.1415926535 * 2; 945 static float neg_pi = -3.1415926535; 946 947 int r; 948 struct r600_bc_alu alu; 949 950 memset(&alu, 0, sizeof(struct r600_bc_alu)); 951 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 952 alu.is_op3 = 1; 953 954 alu.dst.chan = 0; 955 alu.dst.sel = ctx->temp_reg; 956 alu.dst.write = 1; 957 958 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 959 960 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 961 alu.src[1].chan = 0; 962 alu.src[1].value = *(uint32_t *)&half_inv_pi; 963 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 964 alu.src[2].chan = 0; 965 alu.last = 1; 966 r = r600_bc_add_alu(ctx->bc, &alu); 967 if (r) 968 return r; 969 970 memset(&alu, 0, sizeof(struct r600_bc_alu)); 971 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 972 973 alu.dst.chan = 0; 974 alu.dst.sel = ctx->temp_reg; 975 alu.dst.write = 1; 976 977 alu.src[0].sel = ctx->temp_reg; 978 alu.src[0].chan = 0; 979 alu.last = 1; 980 r = r600_bc_add_alu(ctx->bc, &alu); 981 if (r) 982 return r; 983 984 memset(&alu, 0, sizeof(struct r600_bc_alu)); 985 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 986 alu.is_op3 = 1; 987 988 alu.dst.chan = 0; 989 alu.dst.sel = ctx->temp_reg; 990 alu.dst.write = 1; 991 992 alu.src[0].sel = ctx->temp_reg; 993 alu.src[0].chan = 0; 994 995 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 996 alu.src[1].chan = 0; 997 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 998 alu.src[2].chan = 0; 999 1000 if (ctx->bc->chiprev == CHIPREV_R600) { 1001 alu.src[1].value = *(uint32_t *)&double_pi; 1002 alu.src[2].value = *(uint32_t *)&neg_pi; 1003 } else { 1004 alu.src[1].sel = V_SQ_ALU_SRC_1; 1005 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1006 alu.src[2].neg = 1; 1007 } 1008 1009 alu.last = 1; 1010 r = r600_bc_add_alu(ctx->bc, &alu); 1011 if (r) 1012 return r; 1013 return 0; 1014} 1015 1016static int tgsi_trig(struct r600_shader_ctx *ctx) 1017{ 1018 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1019 struct r600_bc_alu alu; 1020 int i, r; 1021 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1022 1023 r = tgsi_setup_trig(ctx); 1024 if (r) 1025 return r; 1026 1027 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1028 alu.inst = ctx->inst_info->r600_opcode; 1029 alu.dst.chan = 0; 1030 alu.dst.sel = ctx->temp_reg; 1031 alu.dst.write = 1; 1032 1033 alu.src[0].sel = ctx->temp_reg; 1034 alu.src[0].chan = 0; 1035 alu.last = 1; 1036 r = r600_bc_add_alu(ctx->bc, &alu); 1037 if (r) 1038 return r; 1039 1040 /* replicate result */ 1041 for (i = 0; i < lasti + 1; i++) { 1042 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1043 continue; 1044 1045 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1046 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1047 1048 alu.src[0].sel = ctx->temp_reg; 1049 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1050 if (i == lasti) 1051 alu.last = 1; 1052 r = r600_bc_add_alu(ctx->bc, &alu); 1053 if (r) 1054 return r; 1055 } 1056 return 0; 1057} 1058 1059static int tgsi_scs(struct r600_shader_ctx *ctx) 1060{ 1061 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1062 struct r600_bc_alu alu; 1063 int r; 1064 1065 /* We'll only need the trig stuff if we are going to write to the 1066 * X or Y components of the destination vector. 1067 */ 1068 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1069 r = tgsi_setup_trig(ctx); 1070 if (r) 1071 return r; 1072 } 1073 1074 /* dst.x = COS */ 1075 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1076 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1077 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1078 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1079 1080 alu.src[0].sel = ctx->temp_reg; 1081 alu.src[0].chan = 0; 1082 alu.last = 1; 1083 r = r600_bc_add_alu(ctx->bc, &alu); 1084 if (r) 1085 return r; 1086 } 1087 1088 /* dst.y = SIN */ 1089 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1090 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1091 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1092 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1093 1094 alu.src[0].sel = ctx->temp_reg; 1095 alu.src[0].chan = 0; 1096 alu.last = 1; 1097 r = r600_bc_add_alu(ctx->bc, &alu); 1098 if (r) 1099 return r; 1100 } 1101 1102 /* dst.z = 0.0; */ 1103 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1104 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1105 1106 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1107 1108 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1109 1110 alu.src[0].sel = V_SQ_ALU_SRC_0; 1111 alu.src[0].chan = 0; 1112 1113 alu.last = 1; 1114 1115 r = r600_bc_add_alu(ctx->bc, &alu); 1116 if (r) 1117 return r; 1118 } 1119 1120 /* dst.w = 1.0; */ 1121 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1122 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1123 1124 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1125 1126 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1127 1128 alu.src[0].sel = V_SQ_ALU_SRC_1; 1129 alu.src[0].chan = 0; 1130 1131 alu.last = 1; 1132 1133 r = r600_bc_add_alu(ctx->bc, &alu); 1134 if (r) 1135 return r; 1136 } 1137 1138 return 0; 1139} 1140 1141static int tgsi_kill(struct r600_shader_ctx *ctx) 1142{ 1143 struct r600_bc_alu alu; 1144 int i, r; 1145 1146 for (i = 0; i < 4; i++) { 1147 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1148 alu.inst = ctx->inst_info->r600_opcode; 1149 1150 alu.dst.chan = i; 1151 1152 alu.src[0].sel = V_SQ_ALU_SRC_0; 1153 1154 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1155 alu.src[1].sel = V_SQ_ALU_SRC_1; 1156 alu.src[1].neg = 1; 1157 } else { 1158 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1159 } 1160 if (i == 3) { 1161 alu.last = 1; 1162 } 1163 r = r600_bc_add_alu(ctx->bc, &alu); 1164 if (r) 1165 return r; 1166 } 1167 1168 /* kill must be last in ALU */ 1169 ctx->bc->force_add_cf = 1; 1170 ctx->shader->uses_kill = TRUE; 1171 return 0; 1172} 1173 1174static int tgsi_lit(struct r600_shader_ctx *ctx) 1175{ 1176 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1177 struct r600_bc_alu alu; 1178 int r; 1179 1180 /* dst.x, <- 1.0 */ 1181 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1182 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1183 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1184 alu.src[0].chan = 0; 1185 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1186 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1187 r = r600_bc_add_alu(ctx->bc, &alu); 1188 if (r) 1189 return r; 1190 1191 /* dst.y = max(src.x, 0.0) */ 1192 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1193 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1194 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1195 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1196 alu.src[1].chan = 0; 1197 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1198 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1199 r = r600_bc_add_alu(ctx->bc, &alu); 1200 if (r) 1201 return r; 1202 1203 /* dst.w, <- 1.0 */ 1204 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1205 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1206 alu.src[0].sel = V_SQ_ALU_SRC_1; 1207 alu.src[0].chan = 0; 1208 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1209 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1210 alu.last = 1; 1211 r = r600_bc_add_alu(ctx->bc, &alu); 1212 if (r) 1213 return r; 1214 1215 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1216 { 1217 int chan; 1218 int sel; 1219 1220 /* dst.z = log(src.y) */ 1221 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1222 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1223 r600_bc_src(&alu.src[0], &ctx->src[0], 1); 1224 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1225 alu.last = 1; 1226 r = r600_bc_add_alu(ctx->bc, &alu); 1227 if (r) 1228 return r; 1229 1230 chan = alu.dst.chan; 1231 sel = alu.dst.sel; 1232 1233 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1234 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1235 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1236 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1237 alu.src[1].sel = sel; 1238 alu.src[1].chan = chan; 1239 1240 r600_bc_src(&alu.src[2], &ctx->src[0], 0); 1241 alu.dst.sel = ctx->temp_reg; 1242 alu.dst.chan = 0; 1243 alu.dst.write = 1; 1244 alu.is_op3 = 1; 1245 alu.last = 1; 1246 r = r600_bc_add_alu(ctx->bc, &alu); 1247 if (r) 1248 return r; 1249 1250 /* dst.z = exp(tmp.x) */ 1251 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1252 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1253 alu.src[0].sel = ctx->temp_reg; 1254 alu.src[0].chan = 0; 1255 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1256 alu.last = 1; 1257 r = r600_bc_add_alu(ctx->bc, &alu); 1258 if (r) 1259 return r; 1260 } 1261 return 0; 1262} 1263 1264static int tgsi_rsq(struct r600_shader_ctx *ctx) 1265{ 1266 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1267 struct r600_bc_alu alu; 1268 int i, r; 1269 1270 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1271 1272 /* FIXME: 1273 * For state trackers other than OpenGL, we'll want to use 1274 * _RECIPSQRT_IEEE instead. 1275 */ 1276 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1277 1278 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1279 r600_bc_src(&alu.src[i], &ctx->src[i], 0); 1280 alu.src[i].abs = 1; 1281 } 1282 alu.dst.sel = ctx->temp_reg; 1283 alu.dst.write = 1; 1284 alu.last = 1; 1285 r = r600_bc_add_alu(ctx->bc, &alu); 1286 if (r) 1287 return r; 1288 /* replicate result */ 1289 return tgsi_helper_tempx_replicate(ctx); 1290} 1291 1292static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1293{ 1294 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1295 struct r600_bc_alu alu; 1296 int i, r; 1297 1298 for (i = 0; i < 4; i++) { 1299 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1300 alu.src[0].sel = ctx->temp_reg; 1301 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1302 alu.dst.chan = i; 1303 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1304 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1305 if (i == 3) 1306 alu.last = 1; 1307 r = r600_bc_add_alu(ctx->bc, &alu); 1308 if (r) 1309 return r; 1310 } 1311 return 0; 1312} 1313 1314static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1315{ 1316 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1317 struct r600_bc_alu alu; 1318 int i, r; 1319 1320 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1321 alu.inst = ctx->inst_info->r600_opcode; 1322 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1323 r600_bc_src(&alu.src[i], &ctx->src[i], 0); 1324 } 1325 alu.dst.sel = ctx->temp_reg; 1326 alu.dst.write = 1; 1327 alu.last = 1; 1328 r = r600_bc_add_alu(ctx->bc, &alu); 1329 if (r) 1330 return r; 1331 /* replicate result */ 1332 return tgsi_helper_tempx_replicate(ctx); 1333} 1334 1335static int tgsi_pow(struct r600_shader_ctx *ctx) 1336{ 1337 struct r600_bc_alu alu; 1338 int r; 1339 1340 /* LOG2(a) */ 1341 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1342 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1343 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1344 alu.dst.sel = ctx->temp_reg; 1345 alu.dst.write = 1; 1346 alu.last = 1; 1347 r = r600_bc_add_alu(ctx->bc, &alu); 1348 if (r) 1349 return r; 1350 /* b * LOG2(a) */ 1351 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1352 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1353 r600_bc_src(&alu.src[0], &ctx->src[1], 0); 1354 alu.src[1].sel = ctx->temp_reg; 1355 alu.dst.sel = ctx->temp_reg; 1356 alu.dst.write = 1; 1357 alu.last = 1; 1358 r = r600_bc_add_alu(ctx->bc, &alu); 1359 if (r) 1360 return r; 1361 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1362 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1363 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1364 alu.src[0].sel = ctx->temp_reg; 1365 alu.dst.sel = ctx->temp_reg; 1366 alu.dst.write = 1; 1367 alu.last = 1; 1368 r = r600_bc_add_alu(ctx->bc, &alu); 1369 if (r) 1370 return r; 1371 return tgsi_helper_tempx_replicate(ctx); 1372} 1373 1374static int tgsi_ssg(struct r600_shader_ctx *ctx) 1375{ 1376 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1377 struct r600_bc_alu alu; 1378 int i, r; 1379 1380 /* tmp = (src > 0 ? 1 : src) */ 1381 for (i = 0; i < 4; i++) { 1382 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1383 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1384 alu.is_op3 = 1; 1385 1386 alu.dst.sel = ctx->temp_reg; 1387 alu.dst.chan = i; 1388 1389 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1390 alu.src[1].sel = V_SQ_ALU_SRC_1; 1391 r600_bc_src(&alu.src[2], &ctx->src[0], i); 1392 1393 if (i == 3) 1394 alu.last = 1; 1395 r = r600_bc_add_alu(ctx->bc, &alu); 1396 if (r) 1397 return r; 1398 } 1399 1400 /* dst = (-tmp > 0 ? -1 : tmp) */ 1401 for (i = 0; i < 4; i++) { 1402 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1403 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1404 alu.is_op3 = 1; 1405 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1406 1407 alu.src[0].sel = ctx->temp_reg; 1408 alu.src[0].chan = i; 1409 alu.src[0].neg = 1; 1410 1411 alu.src[1].sel = V_SQ_ALU_SRC_1; 1412 alu.src[1].neg = 1; 1413 1414 alu.src[2].sel = ctx->temp_reg; 1415 alu.src[2].chan = i; 1416 1417 if (i == 3) 1418 alu.last = 1; 1419 r = r600_bc_add_alu(ctx->bc, &alu); 1420 if (r) 1421 return r; 1422 } 1423 return 0; 1424} 1425 1426static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1427{ 1428 struct r600_bc_alu alu; 1429 int i, r; 1430 1431 for (i = 0; i < 4; i++) { 1432 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1433 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1434 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1435 alu.dst.chan = i; 1436 } else { 1437 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1438 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1439 alu.src[0].sel = ctx->temp_reg; 1440 alu.src[0].chan = i; 1441 } 1442 if (i == 3) { 1443 alu.last = 1; 1444 } 1445 r = r600_bc_add_alu(ctx->bc, &alu); 1446 if (r) 1447 return r; 1448 } 1449 return 0; 1450} 1451 1452static int tgsi_op3(struct r600_shader_ctx *ctx) 1453{ 1454 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1455 struct r600_bc_alu alu; 1456 int i, j, r; 1457 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1458 1459 for (i = 0; i < lasti + 1; i++) { 1460 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1461 continue; 1462 1463 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1464 alu.inst = ctx->inst_info->r600_opcode; 1465 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1466 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1467 } 1468 1469 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1470 alu.dst.chan = i; 1471 alu.dst.write = 1; 1472 alu.is_op3 = 1; 1473 if (i == lasti) { 1474 alu.last = 1; 1475 } 1476 r = r600_bc_add_alu(ctx->bc, &alu); 1477 if (r) 1478 return r; 1479 } 1480 return 0; 1481} 1482 1483static int tgsi_dp(struct r600_shader_ctx *ctx) 1484{ 1485 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1486 struct r600_bc_alu alu; 1487 int i, j, r; 1488 1489 for (i = 0; i < 4; i++) { 1490 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1491 alu.inst = ctx->inst_info->r600_opcode; 1492 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1493 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1494 } 1495 1496 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1497 alu.dst.chan = i; 1498 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1499 /* handle some special cases */ 1500 switch (ctx->inst_info->tgsi_opcode) { 1501 case TGSI_OPCODE_DP2: 1502 if (i > 1) { 1503 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1504 alu.src[0].chan = alu.src[1].chan = 0; 1505 } 1506 break; 1507 case TGSI_OPCODE_DP3: 1508 if (i > 2) { 1509 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1510 alu.src[0].chan = alu.src[1].chan = 0; 1511 } 1512 break; 1513 case TGSI_OPCODE_DPH: 1514 if (i == 3) { 1515 alu.src[0].sel = V_SQ_ALU_SRC_1; 1516 alu.src[0].chan = 0; 1517 alu.src[0].neg = 0; 1518 } 1519 break; 1520 default: 1521 break; 1522 } 1523 if (i == 3) { 1524 alu.last = 1; 1525 } 1526 r = r600_bc_add_alu(ctx->bc, &alu); 1527 if (r) 1528 return r; 1529 } 1530 return 0; 1531} 1532 1533static int tgsi_tex(struct r600_shader_ctx *ctx) 1534{ 1535 static float one_point_five = 1.5f; 1536 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1537 struct r600_bc_tex tex; 1538 struct r600_bc_alu alu; 1539 unsigned src_gpr; 1540 int r, i; 1541 int opcode; 1542 /* Texture fetch instructions can only use gprs as source. 1543 * Also they cannot negate the source or take the absolute value */ 1544 const boolean src_requires_loading = 1545 (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && 1546 inst->Src[0].Register.File != TGSI_FILE_INPUT) || 1547 ctx->src[0].neg || ctx->src[0].abs; 1548 boolean src_loaded = FALSE; 1549 1550 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1551 1552 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1553 /* Add perspective divide */ 1554 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1555 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1556 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1557 1558 alu.dst.sel = ctx->temp_reg; 1559 alu.dst.chan = 3; 1560 alu.last = 1; 1561 alu.dst.write = 1; 1562 r = r600_bc_add_alu(ctx->bc, &alu); 1563 if (r) 1564 return r; 1565 1566 for (i = 0; i < 3; i++) { 1567 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1568 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1569 alu.src[0].sel = ctx->temp_reg; 1570 alu.src[0].chan = 3; 1571 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1572 alu.dst.sel = ctx->temp_reg; 1573 alu.dst.chan = i; 1574 alu.dst.write = 1; 1575 r = r600_bc_add_alu(ctx->bc, &alu); 1576 if (r) 1577 return r; 1578 } 1579 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1580 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1581 alu.src[0].sel = V_SQ_ALU_SRC_1; 1582 alu.src[0].chan = 0; 1583 alu.dst.sel = ctx->temp_reg; 1584 alu.dst.chan = 3; 1585 alu.last = 1; 1586 alu.dst.write = 1; 1587 r = r600_bc_add_alu(ctx->bc, &alu); 1588 if (r) 1589 return r; 1590 src_loaded = TRUE; 1591 src_gpr = ctx->temp_reg; 1592 } 1593 1594 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1595 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 1596 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 1597 1598 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1599 for (i = 0; i < 4; i++) { 1600 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1601 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1602 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 1603 r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 1604 alu.dst.sel = ctx->temp_reg; 1605 alu.dst.chan = i; 1606 if (i == 3) 1607 alu.last = 1; 1608 alu.dst.write = 1; 1609 r = r600_bc_add_alu(ctx->bc, &alu); 1610 if (r) 1611 return r; 1612 } 1613 1614 /* tmp1.z = RCP_e(|tmp1.z|) */ 1615 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1616 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1617 alu.src[0].sel = ctx->temp_reg; 1618 alu.src[0].chan = 2; 1619 alu.src[0].abs = 1; 1620 alu.dst.sel = ctx->temp_reg; 1621 alu.dst.chan = 2; 1622 alu.dst.write = 1; 1623 alu.last = 1; 1624 r = r600_bc_add_alu(ctx->bc, &alu); 1625 if (r) 1626 return r; 1627 1628 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1629 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1630 * muladd has no writemask, have to use another temp 1631 */ 1632 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1633 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1634 alu.is_op3 = 1; 1635 1636 alu.src[0].sel = ctx->temp_reg; 1637 alu.src[0].chan = 0; 1638 alu.src[1].sel = ctx->temp_reg; 1639 alu.src[1].chan = 2; 1640 1641 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1642 alu.src[2].chan = 0; 1643 alu.src[2].value = *(uint32_t *)&one_point_five; 1644 1645 alu.dst.sel = ctx->temp_reg; 1646 alu.dst.chan = 0; 1647 alu.dst.write = 1; 1648 1649 r = r600_bc_add_alu(ctx->bc, &alu); 1650 if (r) 1651 return r; 1652 1653 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1654 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1655 alu.is_op3 = 1; 1656 1657 alu.src[0].sel = ctx->temp_reg; 1658 alu.src[0].chan = 1; 1659 alu.src[1].sel = ctx->temp_reg; 1660 alu.src[1].chan = 2; 1661 1662 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1663 alu.src[2].chan = 0; 1664 alu.src[2].value = *(uint32_t *)&one_point_five; 1665 1666 alu.dst.sel = ctx->temp_reg; 1667 alu.dst.chan = 1; 1668 alu.dst.write = 1; 1669 1670 alu.last = 1; 1671 r = r600_bc_add_alu(ctx->bc, &alu); 1672 if (r) 1673 return r; 1674 1675 src_loaded = TRUE; 1676 src_gpr = ctx->temp_reg; 1677 } 1678 1679 if (src_requires_loading && !src_loaded) { 1680 for (i = 0; i < 4; i++) { 1681 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1682 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1683 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1684 alu.dst.sel = ctx->temp_reg; 1685 alu.dst.chan = i; 1686 if (i == 3) 1687 alu.last = 1; 1688 alu.dst.write = 1; 1689 r = r600_bc_add_alu(ctx->bc, &alu); 1690 if (r) 1691 return r; 1692 } 1693 src_loaded = TRUE; 1694 src_gpr = ctx->temp_reg; 1695 } 1696 1697 opcode = ctx->inst_info->r600_opcode; 1698 if (opcode == SQ_TEX_INST_SAMPLE && 1699 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1700 opcode = SQ_TEX_INST_SAMPLE_C; 1701 1702 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1703 tex.inst = opcode; 1704 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1705 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 1706 tex.src_gpr = src_gpr; 1707 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1708 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 1709 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 1710 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 1711 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 1712 if (src_loaded) { 1713 tex.src_sel_x = 0; 1714 tex.src_sel_y = 1; 1715 tex.src_sel_z = 2; 1716 tex.src_sel_w = 3; 1717 } else { 1718 tex.src_sel_x = ctx->src[0].swizzle[0]; 1719 tex.src_sel_y = ctx->src[0].swizzle[1]; 1720 tex.src_sel_z = ctx->src[0].swizzle[2]; 1721 tex.src_sel_w = ctx->src[0].swizzle[3]; 1722 tex.src_rel = ctx->src[0].rel; 1723 } 1724 1725 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1726 tex.src_sel_x = 1; 1727 tex.src_sel_y = 0; 1728 tex.src_sel_z = 3; 1729 tex.src_sel_w = 1; 1730 } 1731 1732 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1733 tex.coord_type_x = 1; 1734 tex.coord_type_y = 1; 1735 tex.coord_type_z = 1; 1736 tex.coord_type_w = 1; 1737 } 1738 1739 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { 1740 tex.coord_type_z = 0; 1741 tex.src_sel_z = tex.src_sel_y; 1742 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) 1743 tex.coord_type_z = 0; 1744 1745 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 1746 tex.src_sel_w = tex.src_sel_z; 1747 1748 r = r600_bc_add_tex(ctx->bc, &tex); 1749 if (r) 1750 return r; 1751 1752 /* add shadow ambient support - gallium doesn't do it yet */ 1753 return 0; 1754} 1755 1756static int tgsi_lrp(struct r600_shader_ctx *ctx) 1757{ 1758 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1759 struct r600_bc_alu alu; 1760 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1761 unsigned i; 1762 int r; 1763 1764 /* optimize if it's just an equal balance */ 1765 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 1766 for (i = 0; i < lasti + 1; i++) { 1767 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1768 continue; 1769 1770 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1771 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1772 r600_bc_src(&alu.src[0], &ctx->src[1], i); 1773 r600_bc_src(&alu.src[1], &ctx->src[2], i); 1774 alu.omod = 3; 1775 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1776 alu.dst.chan = i; 1777 if (i == lasti) { 1778 alu.last = 1; 1779 } 1780 r = r600_bc_add_alu(ctx->bc, &alu); 1781 if (r) 1782 return r; 1783 } 1784 return 0; 1785 } 1786 1787 /* 1 - src0 */ 1788 for (i = 0; i < lasti + 1; i++) { 1789 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1790 continue; 1791 1792 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1793 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1794 alu.src[0].sel = V_SQ_ALU_SRC_1; 1795 alu.src[0].chan = 0; 1796 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1797 alu.src[1].neg = 1; 1798 alu.dst.sel = ctx->temp_reg; 1799 alu.dst.chan = i; 1800 if (i == lasti) { 1801 alu.last = 1; 1802 } 1803 alu.dst.write = 1; 1804 r = r600_bc_add_alu(ctx->bc, &alu); 1805 if (r) 1806 return r; 1807 } 1808 1809 /* (1 - src0) * src2 */ 1810 for (i = 0; i < lasti + 1; i++) { 1811 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1812 continue; 1813 1814 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1815 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1816 alu.src[0].sel = ctx->temp_reg; 1817 alu.src[0].chan = i; 1818 r600_bc_src(&alu.src[1], &ctx->src[2], i); 1819 alu.dst.sel = ctx->temp_reg; 1820 alu.dst.chan = i; 1821 if (i == lasti) { 1822 alu.last = 1; 1823 } 1824 alu.dst.write = 1; 1825 r = r600_bc_add_alu(ctx->bc, &alu); 1826 if (r) 1827 return r; 1828 } 1829 1830 /* src0 * src1 + (1 - src0) * src2 */ 1831 for (i = 0; i < lasti + 1; i++) { 1832 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1833 continue; 1834 1835 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1836 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1837 alu.is_op3 = 1; 1838 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1839 r600_bc_src(&alu.src[1], &ctx->src[1], i); 1840 alu.src[2].sel = ctx->temp_reg; 1841 alu.src[2].chan = i; 1842 1843 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1844 alu.dst.chan = i; 1845 if (i == lasti) { 1846 alu.last = 1; 1847 } 1848 r = r600_bc_add_alu(ctx->bc, &alu); 1849 if (r) 1850 return r; 1851 } 1852 return 0; 1853} 1854 1855static int tgsi_cmp(struct r600_shader_ctx *ctx) 1856{ 1857 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1858 struct r600_bc_alu alu; 1859 int i, r; 1860 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1861 1862 for (i = 0; i < lasti + 1; i++) { 1863 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1864 continue; 1865 1866 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1867 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 1868 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1869 r600_bc_src(&alu.src[1], &ctx->src[2], i); 1870 r600_bc_src(&alu.src[2], &ctx->src[1], i); 1871 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1872 alu.dst.chan = i; 1873 alu.dst.write = 1; 1874 alu.is_op3 = 1; 1875 if (i == lasti) 1876 alu.last = 1; 1877 r = r600_bc_add_alu(ctx->bc, &alu); 1878 if (r) 1879 return r; 1880 } 1881 return 0; 1882} 1883 1884static int tgsi_xpd(struct r600_shader_ctx *ctx) 1885{ 1886 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1887 static const unsigned int src0_swizzle[] = {2, 0, 1}; 1888 static const unsigned int src1_swizzle[] = {1, 2, 0}; 1889 struct r600_bc_alu alu; 1890 uint32_t use_temp = 0; 1891 int i, r; 1892 1893 if (inst->Dst[0].Register.WriteMask != 0xf) 1894 use_temp = 1; 1895 1896 for (i = 0; i < 4; i++) { 1897 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1898 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1899 if (i < 3) { 1900 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 1901 r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 1902 } else { 1903 alu.src[0].sel = V_SQ_ALU_SRC_0; 1904 alu.src[0].chan = i; 1905 alu.src[1].sel = V_SQ_ALU_SRC_0; 1906 alu.src[1].chan = i; 1907 } 1908 1909 alu.dst.sel = ctx->temp_reg; 1910 alu.dst.chan = i; 1911 alu.dst.write = 1; 1912 1913 if (i == 3) 1914 alu.last = 1; 1915 r = r600_bc_add_alu(ctx->bc, &alu); 1916 if (r) 1917 return r; 1918 } 1919 1920 for (i = 0; i < 4; i++) { 1921 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1922 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1923 1924 if (i < 3) { 1925 r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 1926 r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 1927 } else { 1928 alu.src[0].sel = V_SQ_ALU_SRC_0; 1929 alu.src[0].chan = i; 1930 alu.src[1].sel = V_SQ_ALU_SRC_0; 1931 alu.src[1].chan = i; 1932 } 1933 1934 alu.src[2].sel = ctx->temp_reg; 1935 alu.src[2].neg = 1; 1936 alu.src[2].chan = i; 1937 1938 if (use_temp) 1939 alu.dst.sel = ctx->temp_reg; 1940 else 1941 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1942 alu.dst.chan = i; 1943 alu.dst.write = 1; 1944 alu.is_op3 = 1; 1945 if (i == 3) 1946 alu.last = 1; 1947 r = r600_bc_add_alu(ctx->bc, &alu); 1948 if (r) 1949 return r; 1950 } 1951 if (use_temp) 1952 return tgsi_helper_copy(ctx, inst); 1953 return 0; 1954} 1955 1956static int tgsi_exp(struct r600_shader_ctx *ctx) 1957{ 1958 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1959 struct r600_bc_alu alu; 1960 int r; 1961 1962 /* result.x = 2^floor(src); */ 1963 if (inst->Dst[0].Register.WriteMask & 1) { 1964 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1965 1966 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 1967 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1968 1969 alu.dst.sel = ctx->temp_reg; 1970 alu.dst.chan = 0; 1971 alu.dst.write = 1; 1972 alu.last = 1; 1973 r = r600_bc_add_alu(ctx->bc, &alu); 1974 if (r) 1975 return r; 1976 1977 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1978 alu.src[0].sel = ctx->temp_reg; 1979 alu.src[0].chan = 0; 1980 1981 alu.dst.sel = ctx->temp_reg; 1982 alu.dst.chan = 0; 1983 alu.dst.write = 1; 1984 alu.last = 1; 1985 r = r600_bc_add_alu(ctx->bc, &alu); 1986 if (r) 1987 return r; 1988 } 1989 1990 /* result.y = tmp - floor(tmp); */ 1991 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 1992 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1993 1994 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1995 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1996 1997 alu.dst.sel = ctx->temp_reg; 1998#if 0 1999 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2000 if (r) 2001 return r; 2002#endif 2003 alu.dst.write = 1; 2004 alu.dst.chan = 1; 2005 2006 alu.last = 1; 2007 2008 r = r600_bc_add_alu(ctx->bc, &alu); 2009 if (r) 2010 return r; 2011 } 2012 2013 /* result.z = RoughApprox2ToX(tmp);*/ 2014 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2015 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2016 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2017 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2018 2019 alu.dst.sel = ctx->temp_reg; 2020 alu.dst.write = 1; 2021 alu.dst.chan = 2; 2022 2023 alu.last = 1; 2024 2025 r = r600_bc_add_alu(ctx->bc, &alu); 2026 if (r) 2027 return r; 2028 } 2029 2030 /* result.w = 1.0;*/ 2031 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2032 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2033 2034 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2035 alu.src[0].sel = V_SQ_ALU_SRC_1; 2036 alu.src[0].chan = 0; 2037 2038 alu.dst.sel = ctx->temp_reg; 2039 alu.dst.chan = 3; 2040 alu.dst.write = 1; 2041 alu.last = 1; 2042 r = r600_bc_add_alu(ctx->bc, &alu); 2043 if (r) 2044 return r; 2045 } 2046 return tgsi_helper_copy(ctx, inst); 2047} 2048 2049static int tgsi_log(struct r600_shader_ctx *ctx) 2050{ 2051 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2052 struct r600_bc_alu alu; 2053 int r; 2054 2055 /* result.x = floor(log2(src)); */ 2056 if (inst->Dst[0].Register.WriteMask & 1) { 2057 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2058 2059 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2060 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2061 2062 alu.dst.sel = ctx->temp_reg; 2063 alu.dst.chan = 0; 2064 alu.dst.write = 1; 2065 alu.last = 1; 2066 r = r600_bc_add_alu(ctx->bc, &alu); 2067 if (r) 2068 return r; 2069 2070 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2071 alu.src[0].sel = ctx->temp_reg; 2072 alu.src[0].chan = 0; 2073 2074 alu.dst.sel = ctx->temp_reg; 2075 alu.dst.chan = 0; 2076 alu.dst.write = 1; 2077 alu.last = 1; 2078 2079 r = r600_bc_add_alu(ctx->bc, &alu); 2080 if (r) 2081 return r; 2082 } 2083 2084 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2085 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2086 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2087 2088 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2089 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2090 2091 alu.dst.sel = ctx->temp_reg; 2092 alu.dst.chan = 1; 2093 alu.dst.write = 1; 2094 alu.last = 1; 2095 2096 r = r600_bc_add_alu(ctx->bc, &alu); 2097 if (r) 2098 return r; 2099 2100 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2101 2102 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2103 alu.src[0].sel = ctx->temp_reg; 2104 alu.src[0].chan = 1; 2105 2106 alu.dst.sel = ctx->temp_reg; 2107 alu.dst.chan = 1; 2108 alu.dst.write = 1; 2109 alu.last = 1; 2110 2111 r = r600_bc_add_alu(ctx->bc, &alu); 2112 if (r) 2113 return r; 2114 2115 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2116 2117 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2118 alu.src[0].sel = ctx->temp_reg; 2119 alu.src[0].chan = 1; 2120 2121 alu.dst.sel = ctx->temp_reg; 2122 alu.dst.chan = 1; 2123 alu.dst.write = 1; 2124 alu.last = 1; 2125 2126 r = r600_bc_add_alu(ctx->bc, &alu); 2127 if (r) 2128 return r; 2129 2130 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2131 2132 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2133 alu.src[0].sel = ctx->temp_reg; 2134 alu.src[0].chan = 1; 2135 2136 alu.dst.sel = ctx->temp_reg; 2137 alu.dst.chan = 1; 2138 alu.dst.write = 1; 2139 alu.last = 1; 2140 2141 r = r600_bc_add_alu(ctx->bc, &alu); 2142 if (r) 2143 return r; 2144 2145 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2146 2147 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2148 2149 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2150 2151 alu.src[1].sel = ctx->temp_reg; 2152 alu.src[1].chan = 1; 2153 2154 alu.dst.sel = ctx->temp_reg; 2155 alu.dst.chan = 1; 2156 alu.dst.write = 1; 2157 alu.last = 1; 2158 2159 r = r600_bc_add_alu(ctx->bc, &alu); 2160 if (r) 2161 return r; 2162 } 2163 2164 /* result.z = log2(src);*/ 2165 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2166 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2167 2168 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2169 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2170 2171 alu.dst.sel = ctx->temp_reg; 2172 alu.dst.write = 1; 2173 alu.dst.chan = 2; 2174 alu.last = 1; 2175 2176 r = r600_bc_add_alu(ctx->bc, &alu); 2177 if (r) 2178 return r; 2179 } 2180 2181 /* result.w = 1.0; */ 2182 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2183 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2184 2185 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2186 alu.src[0].sel = V_SQ_ALU_SRC_1; 2187 alu.src[0].chan = 0; 2188 2189 alu.dst.sel = ctx->temp_reg; 2190 alu.dst.chan = 3; 2191 alu.dst.write = 1; 2192 alu.last = 1; 2193 2194 r = r600_bc_add_alu(ctx->bc, &alu); 2195 if (r) 2196 return r; 2197 } 2198 2199 return tgsi_helper_copy(ctx, inst); 2200} 2201 2202static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2203{ 2204 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2205 struct r600_bc_alu alu; 2206 int r; 2207 2208 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2209 2210 switch (inst->Instruction.Opcode) { 2211 case TGSI_OPCODE_ARL: 2212 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2213 break; 2214 case TGSI_OPCODE_ARR: 2215 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2216 break; 2217 default: 2218 assert(0); 2219 return -1; 2220 } 2221 2222 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2223 alu.last = 1; 2224 alu.dst.sel = ctx->ar_reg; 2225 alu.dst.write = 1; 2226 r = r600_bc_add_alu(ctx->bc, &alu); 2227 if (r) 2228 return r; 2229 2230 /* TODO: Note that the MOVA can be avoided if we never use AR for 2231 * indexing non-CB registers in the current ALU clause. Similarly, we 2232 * need to load AR from ar_reg again if we started a new clause 2233 * between ARL and AR usage. The easy way to do that is to remove 2234 * the MOVA here, and load it for the first AR access after ar_reg 2235 * has been modified in each clause. */ 2236 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2237 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2238 alu.src[0].sel = ctx->ar_reg; 2239 alu.src[0].chan = 0; 2240 alu.last = 1; 2241 r = r600_bc_add_alu(ctx->bc, &alu); 2242 if (r) 2243 return r; 2244 return 0; 2245} 2246static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2247{ 2248 /* TODO from r600c, ar values don't persist between clauses */ 2249 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2250 struct r600_bc_alu alu; 2251 int r; 2252 2253 switch (inst->Instruction.Opcode) { 2254 case TGSI_OPCODE_ARL: 2255 memset(&alu, 0, sizeof(alu)); 2256 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 2257 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2258 alu.dst.sel = ctx->ar_reg; 2259 alu.dst.write = 1; 2260 alu.last = 1; 2261 2262 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2263 return r; 2264 2265 memset(&alu, 0, sizeof(alu)); 2266 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2267 alu.src[0].sel = ctx->ar_reg; 2268 alu.dst.sel = ctx->ar_reg; 2269 alu.dst.write = 1; 2270 alu.last = 1; 2271 2272 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2273 return r; 2274 break; 2275 case TGSI_OPCODE_ARR: 2276 memset(&alu, 0, sizeof(alu)); 2277 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2278 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2279 alu.dst.sel = ctx->ar_reg; 2280 alu.dst.write = 1; 2281 alu.last = 1; 2282 2283 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2284 return r; 2285 break; 2286 default: 2287 assert(0); 2288 return -1; 2289 } 2290 2291 memset(&alu, 0, sizeof(alu)); 2292 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2293 alu.src[0].sel = ctx->ar_reg; 2294 alu.last = 1; 2295 2296 r = r600_bc_add_alu(ctx->bc, &alu); 2297 if (r) 2298 return r; 2299 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2300 return 0; 2301} 2302 2303static int tgsi_opdst(struct r600_shader_ctx *ctx) 2304{ 2305 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2306 struct r600_bc_alu alu; 2307 int i, r = 0; 2308 2309 for (i = 0; i < 4; i++) { 2310 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2311 2312 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2313 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2314 2315 if (i == 0 || i == 3) { 2316 alu.src[0].sel = V_SQ_ALU_SRC_1; 2317 } else { 2318 r600_bc_src(&alu.src[0], &ctx->src[0], i); 2319 } 2320 2321 if (i == 0 || i == 2) { 2322 alu.src[1].sel = V_SQ_ALU_SRC_1; 2323 } else { 2324 r600_bc_src(&alu.src[1], &ctx->src[1], i); 2325 } 2326 if (i == 3) 2327 alu.last = 1; 2328 r = r600_bc_add_alu(ctx->bc, &alu); 2329 if (r) 2330 return r; 2331 } 2332 return 0; 2333} 2334 2335static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2336{ 2337 struct r600_bc_alu alu; 2338 int r; 2339 2340 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2341 alu.inst = opcode; 2342 alu.predicate = 1; 2343 2344 alu.dst.sel = ctx->temp_reg; 2345 alu.dst.write = 1; 2346 alu.dst.chan = 0; 2347 2348 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2349 alu.src[1].sel = V_SQ_ALU_SRC_0; 2350 alu.src[1].chan = 0; 2351 2352 alu.last = 1; 2353 2354 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2355 if (r) 2356 return r; 2357 return 0; 2358} 2359 2360static int pops(struct r600_shader_ctx *ctx, int pops) 2361{ 2362 int alu_pop = 3; 2363 if (ctx->bc->cf_last) { 2364 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) 2365 alu_pop = 0; 2366 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) 2367 alu_pop = 1; 2368 } 2369 alu_pop += pops; 2370 if (alu_pop == 1) { 2371 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; 2372 ctx->bc->force_add_cf = 1; 2373 } else if (alu_pop == 2) { 2374 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; 2375 ctx->bc->force_add_cf = 1; 2376 } else { 2377 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2378 ctx->bc->cf_last->pop_count = pops; 2379 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 2380 } 2381 return 0; 2382} 2383 2384static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2385{ 2386 switch(reason) { 2387 case FC_PUSH_VPM: 2388 ctx->bc->callstack[ctx->bc->call_sp].current--; 2389 break; 2390 case FC_PUSH_WQM: 2391 case FC_LOOP: 2392 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2393 break; 2394 case FC_REP: 2395 /* TOODO : for 16 vp asic should -= 2; */ 2396 ctx->bc->callstack[ctx->bc->call_sp].current --; 2397 break; 2398 } 2399} 2400 2401static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2402{ 2403 if (check_max_only) { 2404 int diff; 2405 switch (reason) { 2406 case FC_PUSH_VPM: 2407 diff = 1; 2408 break; 2409 case FC_PUSH_WQM: 2410 diff = 4; 2411 break; 2412 default: 2413 assert(0); 2414 diff = 0; 2415 } 2416 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2417 ctx->bc->callstack[ctx->bc->call_sp].max) { 2418 ctx->bc->callstack[ctx->bc->call_sp].max = 2419 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2420 } 2421 return; 2422 } 2423 switch (reason) { 2424 case FC_PUSH_VPM: 2425 ctx->bc->callstack[ctx->bc->call_sp].current++; 2426 break; 2427 case FC_PUSH_WQM: 2428 case FC_LOOP: 2429 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2430 break; 2431 case FC_REP: 2432 ctx->bc->callstack[ctx->bc->call_sp].current++; 2433 break; 2434 } 2435 2436 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2437 ctx->bc->callstack[ctx->bc->call_sp].max) { 2438 ctx->bc->callstack[ctx->bc->call_sp].max = 2439 ctx->bc->callstack[ctx->bc->call_sp].current; 2440 } 2441} 2442 2443static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2444{ 2445 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2446 2447 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2448 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2449 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2450 sp->num_mid++; 2451} 2452 2453static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2454{ 2455 ctx->bc->fc_sp++; 2456 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2457 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2458} 2459 2460static void fc_poplevel(struct r600_shader_ctx *ctx) 2461{ 2462 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2463 if (sp->mid) { 2464 free(sp->mid); 2465 sp->mid = NULL; 2466 } 2467 sp->num_mid = 0; 2468 sp->start = NULL; 2469 sp->type = 0; 2470 ctx->bc->fc_sp--; 2471} 2472 2473#if 0 2474static int emit_return(struct r600_shader_ctx *ctx) 2475{ 2476 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2477 return 0; 2478} 2479 2480static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2481{ 2482 2483 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2484 ctx->bc->cf_last->pop_count = pops; 2485 /* TODO work out offset */ 2486 return 0; 2487} 2488 2489static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2490{ 2491 return 0; 2492} 2493 2494static void emit_testflag(struct r600_shader_ctx *ctx) 2495{ 2496 2497} 2498 2499static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2500{ 2501 emit_testflag(ctx); 2502 emit_jump_to_offset(ctx, 1, 4); 2503 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2504 pops(ctx, ifidx + 1); 2505 emit_return(ctx); 2506} 2507 2508static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2509{ 2510 emit_testflag(ctx); 2511 2512 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2513 ctx->bc->cf_last->pop_count = 1; 2514 2515 fc_set_mid(ctx, fc_sp); 2516 2517 pops(ctx, 1); 2518} 2519#endif 2520 2521static int tgsi_if(struct r600_shader_ctx *ctx) 2522{ 2523 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2524 2525 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2526 2527 fc_pushlevel(ctx, FC_IF); 2528 2529 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2530 return 0; 2531} 2532 2533static int tgsi_else(struct r600_shader_ctx *ctx) 2534{ 2535 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2536 ctx->bc->cf_last->pop_count = 1; 2537 2538 fc_set_mid(ctx, ctx->bc->fc_sp); 2539 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2540 return 0; 2541} 2542 2543static int tgsi_endif(struct r600_shader_ctx *ctx) 2544{ 2545 pops(ctx, 1); 2546 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2547 R600_ERR("if/endif unbalanced in shader\n"); 2548 return -1; 2549 } 2550 2551 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2552 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2553 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2554 } else { 2555 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2556 } 2557 fc_poplevel(ctx); 2558 2559 callstack_decrease_current(ctx, FC_PUSH_VPM); 2560 return 0; 2561} 2562 2563static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2564{ 2565 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2566 2567 fc_pushlevel(ctx, FC_LOOP); 2568 2569 /* check stack depth */ 2570 callstack_check_depth(ctx, FC_LOOP, 0); 2571 return 0; 2572} 2573 2574static int tgsi_endloop(struct r600_shader_ctx *ctx) 2575{ 2576 int i; 2577 2578 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2579 2580 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2581 R600_ERR("loop/endloop in shader code are not paired.\n"); 2582 return -EINVAL; 2583 } 2584 2585 /* fixup loop pointers - from r600isa 2586 LOOP END points to CF after LOOP START, 2587 LOOP START point to CF after LOOP END 2588 BRK/CONT point to LOOP END CF 2589 */ 2590 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2591 2592 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2593 2594 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2595 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2596 } 2597 /* TODO add LOOPRET support */ 2598 fc_poplevel(ctx); 2599 callstack_decrease_current(ctx, FC_LOOP); 2600 return 0; 2601} 2602 2603static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2604{ 2605 unsigned int fscp; 2606 2607 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2608 { 2609 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2610 break; 2611 } 2612 2613 if (fscp == 0) { 2614 R600_ERR("Break not inside loop/endloop pair\n"); 2615 return -EINVAL; 2616 } 2617 2618 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2619 ctx->bc->cf_last->pop_count = 1; 2620 2621 fc_set_mid(ctx, fscp); 2622 2623 pops(ctx, 1); 2624 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2625 return 0; 2626} 2627 2628static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 2629 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 2630 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2631 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2632 2633 /* FIXME: 2634 * For state trackers other than OpenGL, we'll want to use 2635 * _RECIP_IEEE instead. 2636 */ 2637 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 2638 2639 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 2640 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2641 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2642 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2643 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2644 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2645 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2646 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2647 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2648 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2649 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2650 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2651 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2652 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2653 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2654 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2655 /* gap */ 2656 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2657 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2658 /* gap */ 2659 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2660 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2661 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2662 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2663 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2664 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2665 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2666 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2667 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2668 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2669 /* gap */ 2670 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2671 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2672 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2673 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2674 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2675 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2676 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2677 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2678 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2679 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2680 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2681 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2682 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2683 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2684 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2685 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2686 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2687 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2688 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2689 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2690 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2691 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2692 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2693 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2694 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2695 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2696 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2697 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2698 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2699 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 2700 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2701 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2702 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2703 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2704 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2705 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2706 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2707 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2708 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2709 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2710 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2711 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2712 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2713 /* gap */ 2714 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2715 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2716 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2717 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2718 /* gap */ 2719 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2720 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2721 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2722 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2723 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2724 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2725 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2726 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 2727 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2728 /* gap */ 2729 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2730 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2731 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2732 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2733 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2734 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2735 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2736 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2737 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2738 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2739 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2740 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2741 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2742 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2743 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2744 /* gap */ 2745 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2746 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2747 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2748 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2749 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2750 /* gap */ 2751 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2752 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2753 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2754 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2755 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2756 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2757 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2758 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2759 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2760 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2761 /* gap */ 2762 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2763 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2764 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2765 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2766 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2767 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2768 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2769 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2770 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2771 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2772 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2773 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2774 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2775 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2776 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2777 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2778 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2779 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2780 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2781 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2782 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2783 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2784 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2785 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2786 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2787 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2788 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2789 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2790}; 2791 2792static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 2793 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 2794 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2795 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2796 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 2797 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 2798 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2799 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2800 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2801 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2802 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2803 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2804 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2805 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2806 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2807 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2808 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2809 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2810 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2811 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2812 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2813 /* gap */ 2814 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2815 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2816 /* gap */ 2817 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2818 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2819 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2820 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2821 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2822 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2823 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2824 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2825 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2826 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2827 /* gap */ 2828 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2829 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2830 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2831 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2832 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2833 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2834 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2835 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2836 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2837 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2838 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2839 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2840 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2841 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2842 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2843 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2844 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2845 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2846 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2847 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2848 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2849 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2850 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2851 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2852 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2853 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2854 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2855 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2856 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2857 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 2858 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2859 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2860 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2861 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2862 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2863 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2864 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2865 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2866 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2867 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2868 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2869 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2870 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2871 /* gap */ 2872 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2873 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2874 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2875 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2876 /* gap */ 2877 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2878 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2879 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2880 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2881 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2882 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2883 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2884 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 2885 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2886 /* gap */ 2887 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2888 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2889 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2890 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2891 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2892 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2893 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2894 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2895 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2896 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2897 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2898 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2899 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2900 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2901 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2902 /* gap */ 2903 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2904 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2905 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2906 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2907 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2908 /* gap */ 2909 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2910 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2911 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2912 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2913 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2914 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2915 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2916 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2917 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2918 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2919 /* gap */ 2920 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2921 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2922 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2923 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2924 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2925 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2926 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2927 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2928 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2929 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2930 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2931 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2932 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2933 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2934 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2935 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2936 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2937 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2938 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2939 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2940 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2941 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2942 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2943 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2944 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2945 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2946 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2947 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2948}; 2949