r600_shader.c revision 843dfe3206c4f397c7911b748373dde5540392a4
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_pipe.h" 29#include "r600_asm.h" 30#include "r600_sq.h" 31#include "r600_formats.h" 32#include "r600_opcodes.h" 33#include "r600d.h" 34#include <stdio.h> 35#include <errno.h> 36#include <byteswap.h> 37 38#ifdef PIPE_ARCH_BIG_ENDIAN 39#define CPU_TO_LE32(x) bswap_32(x) 40#else 41#define CPU_TO_LE32(x) (x) 42#endif 43 44int r600_find_vs_semantic_index(struct r600_shader *vs, 45 struct r600_shader *ps, int id) 46{ 47 struct r600_shader_io *input = &ps->input[id]; 48 49 for (int i = 0; i < vs->noutput; i++) { 50 if (input->name == vs->output[i].name && 51 input->sid == vs->output[i].sid) { 52 return i - 1; 53 } 54 } 55 return 0; 56} 57 58static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 59{ 60 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 61 struct r600_shader *rshader = &shader->shader; 62 uint32_t *ptr; 63 int i; 64 65 /* copy new shader */ 66 if (shader->bo == NULL) { 67 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); 68 if (shader->bo == NULL) { 69 return -ENOMEM; 70 } 71 ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 72 for(i = 0; i < rshader->bc.ndw; i++) { 73 *(ptr + i) = CPU_TO_LE32(*(rshader->bc.bytecode + i)); 74 } 75 r600_bo_unmap(rctx->radeon, shader->bo); 76 } 77 /* build state */ 78 switch (rshader->processor_type) { 79 case TGSI_PROCESSOR_VERTEX: 80 if (rshader->family >= CHIP_CEDAR) { 81 evergreen_pipe_shader_vs(ctx, shader); 82 } else { 83 r600_pipe_shader_vs(ctx, shader); 84 } 85 break; 86 case TGSI_PROCESSOR_FRAGMENT: 87 if (rshader->family >= CHIP_CEDAR) { 88 evergreen_pipe_shader_ps(ctx, shader); 89 } else { 90 r600_pipe_shader_ps(ctx, shader); 91 } 92 break; 93 default: 94 return -EINVAL; 95 } 96 return 0; 97} 98 99static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 100 101int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 102{ 103 static int dump_shaders = -1; 104 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 105 int r; 106 107 /* Would like some magic "get_bool_option_once" routine. 108 */ 109 if (dump_shaders == -1) 110 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 111 112 if (dump_shaders) { 113 fprintf(stderr, "--------------------------------------------------------------\n"); 114 tgsi_dump(tokens, 0); 115 } 116 shader->shader.family = r600_get_family(rctx->radeon); 117 r = r600_shader_from_tgsi(tokens, &shader->shader); 118 if (r) { 119 R600_ERR("translation from TGSI failed !\n"); 120 return r; 121 } 122 r = r600_bc_build(&shader->shader.bc); 123 if (r) { 124 R600_ERR("building bytecode failed !\n"); 125 return r; 126 } 127 if (dump_shaders) { 128 r600_bc_dump(&shader->shader.bc); 129 fprintf(stderr, "______________________________________________________________\n"); 130 } 131 return r600_pipe_shader(ctx, shader); 132} 133 134void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 135{ 136 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 137 138 r600_bo_reference(rctx->radeon, &shader->bo, NULL); 139 r600_bc_clear(&shader->shader.bc); 140} 141 142/* 143 * tgsi -> r600 shader 144 */ 145struct r600_shader_tgsi_instruction; 146 147struct r600_shader_src { 148 unsigned sel; 149 unsigned swizzle[4]; 150 unsigned neg; 151 unsigned abs; 152 unsigned rel; 153 uint32_t value[4]; 154}; 155 156struct r600_shader_ctx { 157 struct tgsi_shader_info info; 158 struct tgsi_parse_context parse; 159 const struct tgsi_token *tokens; 160 unsigned type; 161 unsigned file_offset[TGSI_FILE_COUNT]; 162 unsigned temp_reg; 163 unsigned ar_reg; 164 struct r600_shader_tgsi_instruction *inst_info; 165 struct r600_bc *bc; 166 struct r600_shader *shader; 167 struct r600_shader_src src[3]; 168 u32 *literals; 169 u32 nliterals; 170 u32 max_driver_temp_used; 171 /* needed for evergreen interpolation */ 172 boolean input_centroid; 173 boolean input_linear; 174 boolean input_perspective; 175 int num_interp_gpr; 176}; 177 178struct r600_shader_tgsi_instruction { 179 unsigned tgsi_opcode; 180 unsigned is_op3; 181 unsigned r600_opcode; 182 int (*process)(struct r600_shader_ctx *ctx); 183}; 184 185static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 186static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 187 188static int tgsi_is_supported(struct r600_shader_ctx *ctx) 189{ 190 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 191 int j; 192 193 if (i->Instruction.NumDstRegs > 1) { 194 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 195 return -EINVAL; 196 } 197 if (i->Instruction.Predicate) { 198 R600_ERR("predicate unsupported\n"); 199 return -EINVAL; 200 } 201#if 0 202 if (i->Instruction.Label) { 203 R600_ERR("label unsupported\n"); 204 return -EINVAL; 205 } 206#endif 207 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 208 if (i->Src[j].Register.Dimension) { 209 R600_ERR("unsupported src %d (dimension %d)\n", j, 210 i->Src[j].Register.Dimension); 211 return -EINVAL; 212 } 213 } 214 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 215 if (i->Dst[j].Register.Dimension) { 216 R600_ERR("unsupported dst (dimension)\n"); 217 return -EINVAL; 218 } 219 } 220 return 0; 221} 222 223static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 224{ 225 int i, r; 226 struct r600_bc_alu alu; 227 int gpr = 0, base_chan = 0; 228 int ij_index = 0; 229 230 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 231 ij_index = 0; 232 if (ctx->shader->input[input].centroid) 233 ij_index++; 234 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 235 ij_index = 0; 236 /* if we have perspective add one */ 237 if (ctx->input_perspective) { 238 ij_index++; 239 /* if we have perspective centroid */ 240 if (ctx->input_centroid) 241 ij_index++; 242 } 243 if (ctx->shader->input[input].centroid) 244 ij_index++; 245 } 246 247 /* work out gpr and base_chan from index */ 248 gpr = ij_index / 2; 249 base_chan = (2 * (ij_index % 2)) + 1; 250 251 for (i = 0; i < 8; i++) { 252 memset(&alu, 0, sizeof(struct r600_bc_alu)); 253 254 if (i < 4) 255 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 256 else 257 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 258 259 if ((i > 1) && (i < 6)) { 260 alu.dst.sel = ctx->shader->input[input].gpr; 261 alu.dst.write = 1; 262 } 263 264 alu.dst.chan = i % 4; 265 266 alu.src[0].sel = gpr; 267 alu.src[0].chan = (base_chan - (i % 2)); 268 269 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 270 271 alu.bank_swizzle_force = SQ_ALU_VEC_210; 272 if ((i % 4) == 3) 273 alu.last = 1; 274 r = r600_bc_add_alu(ctx->bc, &alu); 275 if (r) 276 return r; 277 } 278 return 0; 279} 280 281 282static int tgsi_declaration(struct r600_shader_ctx *ctx) 283{ 284 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 285 unsigned i; 286 int r; 287 288 switch (d->Declaration.File) { 289 case TGSI_FILE_INPUT: 290 i = ctx->shader->ninput++; 291 ctx->shader->input[i].name = d->Semantic.Name; 292 ctx->shader->input[i].sid = d->Semantic.Index; 293 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 294 ctx->shader->input[i].centroid = d->Declaration.Centroid; 295 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 296 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { 297 /* turn input into interpolate on EG */ 298 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 299 if (ctx->shader->input[i].interpolate > 0) { 300 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 301 evergreen_interp_alu(ctx, i); 302 } 303 } 304 } 305 break; 306 case TGSI_FILE_OUTPUT: 307 i = ctx->shader->noutput++; 308 ctx->shader->output[i].name = d->Semantic.Name; 309 ctx->shader->output[i].sid = d->Semantic.Index; 310 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 311 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 312 break; 313 case TGSI_FILE_CONSTANT: 314 case TGSI_FILE_TEMPORARY: 315 case TGSI_FILE_SAMPLER: 316 case TGSI_FILE_ADDRESS: 317 break; 318 319 case TGSI_FILE_SYSTEM_VALUE: 320 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 321 struct r600_bc_alu alu; 322 memset(&alu, 0, sizeof(struct r600_bc_alu)); 323 324 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 325 alu.src[0].sel = 0; 326 alu.src[0].chan = 3; 327 328 alu.dst.sel = 0; 329 alu.dst.chan = 3; 330 alu.dst.write = 1; 331 alu.last = 1; 332 333 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 334 return r; 335 break; 336 } 337 338 default: 339 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 340 return -EINVAL; 341 } 342 return 0; 343} 344 345static int r600_get_temp(struct r600_shader_ctx *ctx) 346{ 347 return ctx->temp_reg + ctx->max_driver_temp_used++; 348} 349 350/* 351 * for evergreen we need to scan the shader to find the number of GPRs we need to 352 * reserve for interpolation. 353 * 354 * we need to know if we are going to emit 355 * any centroid inputs 356 * if perspective and linear are required 357*/ 358static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 359{ 360 int i; 361 int num_baryc; 362 363 ctx->input_linear = FALSE; 364 ctx->input_perspective = FALSE; 365 ctx->input_centroid = FALSE; 366 ctx->num_interp_gpr = 1; 367 368 /* any centroid inputs */ 369 for (i = 0; i < ctx->info.num_inputs; i++) { 370 /* skip position/face */ 371 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 372 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 373 continue; 374 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 375 ctx->input_linear = TRUE; 376 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 377 ctx->input_perspective = TRUE; 378 if (ctx->info.input_centroid[i]) 379 ctx->input_centroid = TRUE; 380 } 381 382 num_baryc = 0; 383 /* ignoring sample for now */ 384 if (ctx->input_perspective) 385 num_baryc++; 386 if (ctx->input_linear) 387 num_baryc++; 388 if (ctx->input_centroid) 389 num_baryc *= 2; 390 391 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 392 393 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 394 return ctx->num_interp_gpr; 395} 396 397static void tgsi_src(struct r600_shader_ctx *ctx, 398 const struct tgsi_full_src_register *tgsi_src, 399 struct r600_shader_src *r600_src) 400{ 401 memset(r600_src, 0, sizeof(*r600_src)); 402 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 403 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 404 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 405 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 406 r600_src->neg = tgsi_src->Register.Negate; 407 r600_src->abs = tgsi_src->Register.Absolute; 408 409 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 410 int index; 411 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 412 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 413 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 414 415 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 416 r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 417 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 418 return; 419 } 420 index = tgsi_src->Register.Index; 421 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 422 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 423 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 424 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ 425 r600_src->swizzle[0] = 3; 426 r600_src->swizzle[1] = 3; 427 r600_src->swizzle[2] = 3; 428 r600_src->swizzle[3] = 3; 429 r600_src->sel = 0; 430 } else { 431 if (tgsi_src->Register.Indirect) 432 r600_src->rel = V_SQ_REL_RELATIVE; 433 r600_src->sel = tgsi_src->Register.Index; 434 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 435 } 436} 437 438static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 439{ 440 struct r600_bc_vtx vtx; 441 unsigned int ar_reg; 442 int r; 443 444 if (offset) { 445 struct r600_bc_alu alu; 446 447 memset(&alu, 0, sizeof(alu)); 448 449 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 450 alu.src[0].sel = ctx->ar_reg; 451 452 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 453 alu.src[1].value = offset; 454 455 alu.dst.sel = dst_reg; 456 alu.dst.write = 1; 457 alu.last = 1; 458 459 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 460 return r; 461 462 ar_reg = dst_reg; 463 } else { 464 ar_reg = ctx->ar_reg; 465 } 466 467 memset(&vtx, 0, sizeof(vtx)); 468 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 469 vtx.src_gpr = ar_reg; 470 vtx.mega_fetch_count = 16; 471 vtx.dst_gpr = dst_reg; 472 vtx.dst_sel_x = 0; /* SEL_X */ 473 vtx.dst_sel_y = 1; /* SEL_Y */ 474 vtx.dst_sel_z = 2; /* SEL_Z */ 475 vtx.dst_sel_w = 3; /* SEL_W */ 476 vtx.data_format = FMT_32_32_32_32_FLOAT; 477 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 478 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 479 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 480#ifdef PIPE_ARCH_BIG_ENDIAN 481 vtx.endian = ENDIAN_8IN32; 482#else 483 vtx.endian = ENDIAN_NONE; 484#endif 485 486 if ((r = r600_bc_add_vtx(ctx->bc, &vtx))) 487 return r; 488 489 return 0; 490} 491 492static int tgsi_split_constant(struct r600_shader_ctx *ctx) 493{ 494 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 495 struct r600_bc_alu alu; 496 int i, j, k, nconst, r; 497 498 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 499 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 500 nconst++; 501 } 502 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 503 } 504 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 505 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 506 continue; 507 } 508 509 if (ctx->src[i].rel) { 510 int treg = r600_get_temp(ctx); 511 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 512 return r; 513 514 ctx->src[i].sel = treg; 515 ctx->src[i].rel = 0; 516 j--; 517 } else if (j > 0) { 518 int treg = r600_get_temp(ctx); 519 for (k = 0; k < 4; k++) { 520 memset(&alu, 0, sizeof(struct r600_bc_alu)); 521 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 522 alu.src[0].sel = ctx->src[i].sel; 523 alu.src[0].chan = k; 524 alu.src[0].rel = ctx->src[i].rel; 525 alu.dst.sel = treg; 526 alu.dst.chan = k; 527 alu.dst.write = 1; 528 if (k == 3) 529 alu.last = 1; 530 r = r600_bc_add_alu(ctx->bc, &alu); 531 if (r) 532 return r; 533 } 534 ctx->src[i].sel = treg; 535 ctx->src[i].rel =0; 536 j--; 537 } 538 } 539 return 0; 540} 541 542/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 543static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 544{ 545 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 546 struct r600_bc_alu alu; 547 int i, j, k, nliteral, r; 548 549 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 550 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 551 nliteral++; 552 } 553 } 554 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 555 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 556 int treg = r600_get_temp(ctx); 557 for (k = 0; k < 4; k++) { 558 memset(&alu, 0, sizeof(struct r600_bc_alu)); 559 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 560 alu.src[0].sel = ctx->src[i].sel; 561 alu.src[0].chan = k; 562 alu.src[0].value = ctx->src[i].value[k]; 563 alu.dst.sel = treg; 564 alu.dst.chan = k; 565 alu.dst.write = 1; 566 if (k == 3) 567 alu.last = 1; 568 r = r600_bc_add_alu(ctx->bc, &alu); 569 if (r) 570 return r; 571 } 572 ctx->src[i].sel = treg; 573 j--; 574 } 575 } 576 return 0; 577} 578 579static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 580{ 581 struct tgsi_full_immediate *immediate; 582 struct tgsi_full_property *property; 583 struct r600_shader_ctx ctx; 584 struct r600_bc_output output[32]; 585 unsigned output_done, noutput; 586 unsigned opcode; 587 int i, r = 0, pos0; 588 589 ctx.bc = &shader->bc; 590 ctx.shader = shader; 591 r = r600_bc_init(ctx.bc, shader->family); 592 if (r) 593 return r; 594 ctx.tokens = tokens; 595 tgsi_scan_shader(tokens, &ctx.info); 596 tgsi_parse_init(&ctx.parse, tokens); 597 ctx.type = ctx.parse.FullHeader.Processor.Processor; 598 shader->processor_type = ctx.type; 599 ctx.bc->type = shader->processor_type; 600 601 /* register allocations */ 602 /* Values [0,127] correspond to GPR[0..127]. 603 * Values [128,159] correspond to constant buffer bank 0 604 * Values [160,191] correspond to constant buffer bank 1 605 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 606 * Values [256,287] correspond to constant buffer bank 2 (EG) 607 * Values [288,319] correspond to constant buffer bank 3 (EG) 608 * Other special values are shown in the list below. 609 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 610 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 611 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 612 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 613 * 248 SQ_ALU_SRC_0: special constant 0.0. 614 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 615 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 616 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 617 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 618 * 253 SQ_ALU_SRC_LITERAL: literal constant. 619 * 254 SQ_ALU_SRC_PV: previous vector result. 620 * 255 SQ_ALU_SRC_PS: previous scalar result. 621 */ 622 for (i = 0; i < TGSI_FILE_COUNT; i++) { 623 ctx.file_offset[i] = 0; 624 } 625 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 626 ctx.file_offset[TGSI_FILE_INPUT] = 1; 627 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { 628 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 629 } else { 630 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 631 } 632 } 633 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) { 634 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 635 } 636 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 637 ctx.info.file_count[TGSI_FILE_INPUT]; 638 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 639 ctx.info.file_count[TGSI_FILE_OUTPUT]; 640 641 /* Outside the GPR range. This will be translated to one of the 642 * kcache banks later. */ 643 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 644 645 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 646 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 647 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 648 ctx.temp_reg = ctx.ar_reg + 1; 649 650 ctx.nliterals = 0; 651 ctx.literals = NULL; 652 shader->fs_write_all = FALSE; 653 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 654 tgsi_parse_token(&ctx.parse); 655 switch (ctx.parse.FullToken.Token.Type) { 656 case TGSI_TOKEN_TYPE_IMMEDIATE: 657 immediate = &ctx.parse.FullToken.FullImmediate; 658 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 659 if(ctx.literals == NULL) { 660 r = -ENOMEM; 661 goto out_err; 662 } 663 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 664 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 665 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 666 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 667 ctx.nliterals++; 668 break; 669 case TGSI_TOKEN_TYPE_DECLARATION: 670 r = tgsi_declaration(&ctx); 671 if (r) 672 goto out_err; 673 break; 674 case TGSI_TOKEN_TYPE_INSTRUCTION: 675 r = tgsi_is_supported(&ctx); 676 if (r) 677 goto out_err; 678 ctx.max_driver_temp_used = 0; 679 /* reserve first tmp for everyone */ 680 r600_get_temp(&ctx); 681 682 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 683 if ((r = tgsi_split_constant(&ctx))) 684 goto out_err; 685 if ((r = tgsi_split_literal_constant(&ctx))) 686 goto out_err; 687 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) 688 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 689 else 690 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 691 r = ctx.inst_info->process(&ctx); 692 if (r) 693 goto out_err; 694 break; 695 case TGSI_TOKEN_TYPE_PROPERTY: 696 property = &ctx.parse.FullToken.FullProperty; 697 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { 698 if (property->u[0].Data == 1) 699 shader->fs_write_all = TRUE; 700 } 701 break; 702 default: 703 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 704 r = -EINVAL; 705 goto out_err; 706 } 707 } 708 /* export output */ 709 noutput = shader->noutput; 710 for (i = 0, pos0 = 0; i < noutput; i++) { 711 memset(&output[i], 0, sizeof(struct r600_bc_output)); 712 output[i].gpr = shader->output[i].gpr; 713 output[i].elem_size = 3; 714 output[i].swizzle_x = 0; 715 output[i].swizzle_y = 1; 716 output[i].swizzle_z = 2; 717 output[i].swizzle_w = 3; 718 output[i].burst_count = 1; 719 output[i].barrier = 1; 720 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 721 output[i].array_base = i - pos0; 722 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 723 switch (ctx.type) { 724 case TGSI_PROCESSOR_VERTEX: 725 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 726 output[i].array_base = 60; 727 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 728 /* position doesn't count in array_base */ 729 pos0++; 730 } 731 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 732 output[i].array_base = 61; 733 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 734 /* position doesn't count in array_base */ 735 pos0++; 736 } 737 break; 738 case TGSI_PROCESSOR_FRAGMENT: 739 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 740 output[i].array_base = shader->output[i].sid; 741 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 742 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 743 output[i].array_base = 61; 744 output[i].swizzle_x = 2; 745 output[i].swizzle_y = 7; 746 output[i].swizzle_z = output[i].swizzle_w = 7; 747 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 748 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 749 output[i].array_base = 61; 750 output[i].swizzle_x = 7; 751 output[i].swizzle_y = 1; 752 output[i].swizzle_z = output[i].swizzle_w = 7; 753 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 754 } else { 755 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 756 r = -EINVAL; 757 goto out_err; 758 } 759 break; 760 default: 761 R600_ERR("unsupported processor type %d\n", ctx.type); 762 r = -EINVAL; 763 goto out_err; 764 } 765 } 766 /* add fake param output for vertex shader if no param is exported */ 767 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 768 for (i = 0, pos0 = 0; i < noutput; i++) { 769 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 770 pos0 = 1; 771 break; 772 } 773 } 774 if (!pos0) { 775 memset(&output[i], 0, sizeof(struct r600_bc_output)); 776 output[i].gpr = 0; 777 output[i].elem_size = 3; 778 output[i].swizzle_x = 0; 779 output[i].swizzle_y = 1; 780 output[i].swizzle_z = 2; 781 output[i].swizzle_w = 3; 782 output[i].burst_count = 1; 783 output[i].barrier = 1; 784 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 785 output[i].array_base = 0; 786 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 787 noutput++; 788 } 789 } 790 /* add fake pixel export */ 791 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 792 memset(&output[0], 0, sizeof(struct r600_bc_output)); 793 output[0].gpr = 0; 794 output[0].elem_size = 3; 795 output[0].swizzle_x = 7; 796 output[0].swizzle_y = 7; 797 output[0].swizzle_z = 7; 798 output[0].swizzle_w = 7; 799 output[0].burst_count = 1; 800 output[0].barrier = 1; 801 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 802 output[0].array_base = 0; 803 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 804 noutput++; 805 } 806 /* set export done on last export of each type */ 807 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 808 if (i == (noutput - 1)) { 809 output[i].end_of_program = 1; 810 } 811 if (!(output_done & (1 << output[i].type))) { 812 output_done |= (1 << output[i].type); 813 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 814 } 815 } 816 /* add output to bytecode */ 817 for (i = 0; i < noutput; i++) { 818 r = r600_bc_add_output(ctx.bc, &output[i]); 819 if (r) 820 goto out_err; 821 } 822 free(ctx.literals); 823 tgsi_parse_free(&ctx.parse); 824 return 0; 825out_err: 826 free(ctx.literals); 827 tgsi_parse_free(&ctx.parse); 828 return r; 829} 830 831static int tgsi_unsupported(struct r600_shader_ctx *ctx) 832{ 833 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 834 return -EINVAL; 835} 836 837static int tgsi_end(struct r600_shader_ctx *ctx) 838{ 839 return 0; 840} 841 842static void r600_bc_src(struct r600_bc_alu_src *bc_src, 843 const struct r600_shader_src *shader_src, 844 unsigned chan) 845{ 846 bc_src->sel = shader_src->sel; 847 bc_src->chan = shader_src->swizzle[chan]; 848 bc_src->neg = shader_src->neg; 849 bc_src->abs = shader_src->abs; 850 bc_src->rel = shader_src->rel; 851 bc_src->value = shader_src->value[bc_src->chan]; 852} 853 854static void tgsi_dst(struct r600_shader_ctx *ctx, 855 const struct tgsi_full_dst_register *tgsi_dst, 856 unsigned swizzle, 857 struct r600_bc_alu_dst *r600_dst) 858{ 859 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 860 861 r600_dst->sel = tgsi_dst->Register.Index; 862 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 863 r600_dst->chan = swizzle; 864 r600_dst->write = 1; 865 if (tgsi_dst->Register.Indirect) 866 r600_dst->rel = V_SQ_REL_RELATIVE; 867 if (inst->Instruction.Saturate) { 868 r600_dst->clamp = 1; 869 } 870} 871 872static int tgsi_last_instruction(unsigned writemask) 873{ 874 int i, lasti = 0; 875 876 for (i = 0; i < 4; i++) { 877 if (writemask & (1 << i)) { 878 lasti = i; 879 } 880 } 881 return lasti; 882} 883 884static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 885{ 886 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 887 struct r600_bc_alu alu; 888 int i, j, r; 889 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 890 891 for (i = 0; i < lasti + 1; i++) { 892 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 893 continue; 894 895 memset(&alu, 0, sizeof(struct r600_bc_alu)); 896 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 897 898 alu.inst = ctx->inst_info->r600_opcode; 899 if (!swap) { 900 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 901 r600_bc_src(&alu.src[j], &ctx->src[j], i); 902 } 903 } else { 904 r600_bc_src(&alu.src[0], &ctx->src[1], i); 905 r600_bc_src(&alu.src[1], &ctx->src[0], i); 906 } 907 /* handle some special cases */ 908 switch (ctx->inst_info->tgsi_opcode) { 909 case TGSI_OPCODE_SUB: 910 alu.src[1].neg = 1; 911 break; 912 case TGSI_OPCODE_ABS: 913 alu.src[0].abs = 1; 914 break; 915 default: 916 break; 917 } 918 if (i == lasti) { 919 alu.last = 1; 920 } 921 r = r600_bc_add_alu(ctx->bc, &alu); 922 if (r) 923 return r; 924 } 925 return 0; 926} 927 928static int tgsi_op2(struct r600_shader_ctx *ctx) 929{ 930 return tgsi_op2_s(ctx, 0); 931} 932 933static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 934{ 935 return tgsi_op2_s(ctx, 1); 936} 937 938/* 939 * r600 - trunc to -PI..PI range 940 * r700 - normalize by dividing by 2PI 941 * see fdo bug 27901 942 */ 943static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 944{ 945 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 946 static float double_pi = 3.1415926535 * 2; 947 static float neg_pi = -3.1415926535; 948 949 int r; 950 struct r600_bc_alu alu; 951 952 memset(&alu, 0, sizeof(struct r600_bc_alu)); 953 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 954 alu.is_op3 = 1; 955 956 alu.dst.chan = 0; 957 alu.dst.sel = ctx->temp_reg; 958 alu.dst.write = 1; 959 960 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 961 962 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 963 alu.src[1].chan = 0; 964 alu.src[1].value = *(uint32_t *)&half_inv_pi; 965 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 966 alu.src[2].chan = 0; 967 alu.last = 1; 968 r = r600_bc_add_alu(ctx->bc, &alu); 969 if (r) 970 return r; 971 972 memset(&alu, 0, sizeof(struct r600_bc_alu)); 973 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 974 975 alu.dst.chan = 0; 976 alu.dst.sel = ctx->temp_reg; 977 alu.dst.write = 1; 978 979 alu.src[0].sel = ctx->temp_reg; 980 alu.src[0].chan = 0; 981 alu.last = 1; 982 r = r600_bc_add_alu(ctx->bc, &alu); 983 if (r) 984 return r; 985 986 memset(&alu, 0, sizeof(struct r600_bc_alu)); 987 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 988 alu.is_op3 = 1; 989 990 alu.dst.chan = 0; 991 alu.dst.sel = ctx->temp_reg; 992 alu.dst.write = 1; 993 994 alu.src[0].sel = ctx->temp_reg; 995 alu.src[0].chan = 0; 996 997 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 998 alu.src[1].chan = 0; 999 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1000 alu.src[2].chan = 0; 1001 1002 if (ctx->bc->chiprev == CHIPREV_R600) { 1003 alu.src[1].value = *(uint32_t *)&double_pi; 1004 alu.src[2].value = *(uint32_t *)&neg_pi; 1005 } else { 1006 alu.src[1].sel = V_SQ_ALU_SRC_1; 1007 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1008 alu.src[2].neg = 1; 1009 } 1010 1011 alu.last = 1; 1012 r = r600_bc_add_alu(ctx->bc, &alu); 1013 if (r) 1014 return r; 1015 return 0; 1016} 1017 1018static int tgsi_trig(struct r600_shader_ctx *ctx) 1019{ 1020 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1021 struct r600_bc_alu alu; 1022 int i, r; 1023 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1024 1025 r = tgsi_setup_trig(ctx); 1026 if (r) 1027 return r; 1028 1029 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1030 alu.inst = ctx->inst_info->r600_opcode; 1031 alu.dst.chan = 0; 1032 alu.dst.sel = ctx->temp_reg; 1033 alu.dst.write = 1; 1034 1035 alu.src[0].sel = ctx->temp_reg; 1036 alu.src[0].chan = 0; 1037 alu.last = 1; 1038 r = r600_bc_add_alu(ctx->bc, &alu); 1039 if (r) 1040 return r; 1041 1042 /* replicate result */ 1043 for (i = 0; i < lasti + 1; i++) { 1044 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1045 continue; 1046 1047 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1048 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1049 1050 alu.src[0].sel = ctx->temp_reg; 1051 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1052 if (i == lasti) 1053 alu.last = 1; 1054 r = r600_bc_add_alu(ctx->bc, &alu); 1055 if (r) 1056 return r; 1057 } 1058 return 0; 1059} 1060 1061static int tgsi_scs(struct r600_shader_ctx *ctx) 1062{ 1063 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1064 struct r600_bc_alu alu; 1065 int r; 1066 1067 /* We'll only need the trig stuff if we are going to write to the 1068 * X or Y components of the destination vector. 1069 */ 1070 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1071 r = tgsi_setup_trig(ctx); 1072 if (r) 1073 return r; 1074 } 1075 1076 /* dst.x = COS */ 1077 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1078 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1079 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1080 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1081 1082 alu.src[0].sel = ctx->temp_reg; 1083 alu.src[0].chan = 0; 1084 alu.last = 1; 1085 r = r600_bc_add_alu(ctx->bc, &alu); 1086 if (r) 1087 return r; 1088 } 1089 1090 /* dst.y = SIN */ 1091 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1092 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1093 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1094 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1095 1096 alu.src[0].sel = ctx->temp_reg; 1097 alu.src[0].chan = 0; 1098 alu.last = 1; 1099 r = r600_bc_add_alu(ctx->bc, &alu); 1100 if (r) 1101 return r; 1102 } 1103 1104 /* dst.z = 0.0; */ 1105 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1106 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1107 1108 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1109 1110 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1111 1112 alu.src[0].sel = V_SQ_ALU_SRC_0; 1113 alu.src[0].chan = 0; 1114 1115 alu.last = 1; 1116 1117 r = r600_bc_add_alu(ctx->bc, &alu); 1118 if (r) 1119 return r; 1120 } 1121 1122 /* dst.w = 1.0; */ 1123 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1124 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1125 1126 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1127 1128 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1129 1130 alu.src[0].sel = V_SQ_ALU_SRC_1; 1131 alu.src[0].chan = 0; 1132 1133 alu.last = 1; 1134 1135 r = r600_bc_add_alu(ctx->bc, &alu); 1136 if (r) 1137 return r; 1138 } 1139 1140 return 0; 1141} 1142 1143static int tgsi_kill(struct r600_shader_ctx *ctx) 1144{ 1145 struct r600_bc_alu alu; 1146 int i, r; 1147 1148 for (i = 0; i < 4; i++) { 1149 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1150 alu.inst = ctx->inst_info->r600_opcode; 1151 1152 alu.dst.chan = i; 1153 1154 alu.src[0].sel = V_SQ_ALU_SRC_0; 1155 1156 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1157 alu.src[1].sel = V_SQ_ALU_SRC_1; 1158 alu.src[1].neg = 1; 1159 } else { 1160 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1161 } 1162 if (i == 3) { 1163 alu.last = 1; 1164 } 1165 r = r600_bc_add_alu(ctx->bc, &alu); 1166 if (r) 1167 return r; 1168 } 1169 1170 /* kill must be last in ALU */ 1171 ctx->bc->force_add_cf = 1; 1172 ctx->shader->uses_kill = TRUE; 1173 return 0; 1174} 1175 1176static int tgsi_lit(struct r600_shader_ctx *ctx) 1177{ 1178 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1179 struct r600_bc_alu alu; 1180 int r; 1181 1182 /* dst.x, <- 1.0 */ 1183 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1184 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1185 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1186 alu.src[0].chan = 0; 1187 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1188 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1189 r = r600_bc_add_alu(ctx->bc, &alu); 1190 if (r) 1191 return r; 1192 1193 /* dst.y = max(src.x, 0.0) */ 1194 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1195 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1196 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1197 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1198 alu.src[1].chan = 0; 1199 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1200 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1201 r = r600_bc_add_alu(ctx->bc, &alu); 1202 if (r) 1203 return r; 1204 1205 /* dst.w, <- 1.0 */ 1206 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1207 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1208 alu.src[0].sel = V_SQ_ALU_SRC_1; 1209 alu.src[0].chan = 0; 1210 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1211 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1212 alu.last = 1; 1213 r = r600_bc_add_alu(ctx->bc, &alu); 1214 if (r) 1215 return r; 1216 1217 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1218 { 1219 int chan; 1220 int sel; 1221 1222 /* dst.z = log(src.y) */ 1223 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1224 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1225 r600_bc_src(&alu.src[0], &ctx->src[0], 1); 1226 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1227 alu.last = 1; 1228 r = r600_bc_add_alu(ctx->bc, &alu); 1229 if (r) 1230 return r; 1231 1232 chan = alu.dst.chan; 1233 sel = alu.dst.sel; 1234 1235 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1236 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1237 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1238 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1239 alu.src[1].sel = sel; 1240 alu.src[1].chan = chan; 1241 1242 r600_bc_src(&alu.src[2], &ctx->src[0], 0); 1243 alu.dst.sel = ctx->temp_reg; 1244 alu.dst.chan = 0; 1245 alu.dst.write = 1; 1246 alu.is_op3 = 1; 1247 alu.last = 1; 1248 r = r600_bc_add_alu(ctx->bc, &alu); 1249 if (r) 1250 return r; 1251 1252 /* dst.z = exp(tmp.x) */ 1253 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1254 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1255 alu.src[0].sel = ctx->temp_reg; 1256 alu.src[0].chan = 0; 1257 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1258 alu.last = 1; 1259 r = r600_bc_add_alu(ctx->bc, &alu); 1260 if (r) 1261 return r; 1262 } 1263 return 0; 1264} 1265 1266static int tgsi_rsq(struct r600_shader_ctx *ctx) 1267{ 1268 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1269 struct r600_bc_alu alu; 1270 int i, r; 1271 1272 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1273 1274 /* FIXME: 1275 * For state trackers other than OpenGL, we'll want to use 1276 * _RECIPSQRT_IEEE instead. 1277 */ 1278 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1279 1280 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1281 r600_bc_src(&alu.src[i], &ctx->src[i], 0); 1282 alu.src[i].abs = 1; 1283 } 1284 alu.dst.sel = ctx->temp_reg; 1285 alu.dst.write = 1; 1286 alu.last = 1; 1287 r = r600_bc_add_alu(ctx->bc, &alu); 1288 if (r) 1289 return r; 1290 /* replicate result */ 1291 return tgsi_helper_tempx_replicate(ctx); 1292} 1293 1294static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1295{ 1296 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1297 struct r600_bc_alu alu; 1298 int i, r; 1299 1300 for (i = 0; i < 4; i++) { 1301 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1302 alu.src[0].sel = ctx->temp_reg; 1303 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1304 alu.dst.chan = i; 1305 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1306 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1307 if (i == 3) 1308 alu.last = 1; 1309 r = r600_bc_add_alu(ctx->bc, &alu); 1310 if (r) 1311 return r; 1312 } 1313 return 0; 1314} 1315 1316static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1317{ 1318 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1319 struct r600_bc_alu alu; 1320 int i, r; 1321 1322 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1323 alu.inst = ctx->inst_info->r600_opcode; 1324 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1325 r600_bc_src(&alu.src[i], &ctx->src[i], 0); 1326 } 1327 alu.dst.sel = ctx->temp_reg; 1328 alu.dst.write = 1; 1329 alu.last = 1; 1330 r = r600_bc_add_alu(ctx->bc, &alu); 1331 if (r) 1332 return r; 1333 /* replicate result */ 1334 return tgsi_helper_tempx_replicate(ctx); 1335} 1336 1337static int tgsi_pow(struct r600_shader_ctx *ctx) 1338{ 1339 struct r600_bc_alu alu; 1340 int r; 1341 1342 /* LOG2(a) */ 1343 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1344 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1345 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1346 alu.dst.sel = ctx->temp_reg; 1347 alu.dst.write = 1; 1348 alu.last = 1; 1349 r = r600_bc_add_alu(ctx->bc, &alu); 1350 if (r) 1351 return r; 1352 /* b * LOG2(a) */ 1353 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1354 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1355 r600_bc_src(&alu.src[0], &ctx->src[1], 0); 1356 alu.src[1].sel = ctx->temp_reg; 1357 alu.dst.sel = ctx->temp_reg; 1358 alu.dst.write = 1; 1359 alu.last = 1; 1360 r = r600_bc_add_alu(ctx->bc, &alu); 1361 if (r) 1362 return r; 1363 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1364 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1365 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1366 alu.src[0].sel = ctx->temp_reg; 1367 alu.dst.sel = ctx->temp_reg; 1368 alu.dst.write = 1; 1369 alu.last = 1; 1370 r = r600_bc_add_alu(ctx->bc, &alu); 1371 if (r) 1372 return r; 1373 return tgsi_helper_tempx_replicate(ctx); 1374} 1375 1376static int tgsi_ssg(struct r600_shader_ctx *ctx) 1377{ 1378 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1379 struct r600_bc_alu alu; 1380 int i, r; 1381 1382 /* tmp = (src > 0 ? 1 : src) */ 1383 for (i = 0; i < 4; i++) { 1384 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1385 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1386 alu.is_op3 = 1; 1387 1388 alu.dst.sel = ctx->temp_reg; 1389 alu.dst.chan = i; 1390 1391 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1392 alu.src[1].sel = V_SQ_ALU_SRC_1; 1393 r600_bc_src(&alu.src[2], &ctx->src[0], i); 1394 1395 if (i == 3) 1396 alu.last = 1; 1397 r = r600_bc_add_alu(ctx->bc, &alu); 1398 if (r) 1399 return r; 1400 } 1401 1402 /* dst = (-tmp > 0 ? -1 : tmp) */ 1403 for (i = 0; i < 4; i++) { 1404 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1405 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1406 alu.is_op3 = 1; 1407 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1408 1409 alu.src[0].sel = ctx->temp_reg; 1410 alu.src[0].chan = i; 1411 alu.src[0].neg = 1; 1412 1413 alu.src[1].sel = V_SQ_ALU_SRC_1; 1414 alu.src[1].neg = 1; 1415 1416 alu.src[2].sel = ctx->temp_reg; 1417 alu.src[2].chan = i; 1418 1419 if (i == 3) 1420 alu.last = 1; 1421 r = r600_bc_add_alu(ctx->bc, &alu); 1422 if (r) 1423 return r; 1424 } 1425 return 0; 1426} 1427 1428static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1429{ 1430 struct r600_bc_alu alu; 1431 int i, r; 1432 1433 for (i = 0; i < 4; i++) { 1434 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1435 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1436 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1437 alu.dst.chan = i; 1438 } else { 1439 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1440 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1441 alu.src[0].sel = ctx->temp_reg; 1442 alu.src[0].chan = i; 1443 } 1444 if (i == 3) { 1445 alu.last = 1; 1446 } 1447 r = r600_bc_add_alu(ctx->bc, &alu); 1448 if (r) 1449 return r; 1450 } 1451 return 0; 1452} 1453 1454static int tgsi_op3(struct r600_shader_ctx *ctx) 1455{ 1456 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1457 struct r600_bc_alu alu; 1458 int i, j, r; 1459 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1460 1461 for (i = 0; i < lasti + 1; i++) { 1462 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1463 continue; 1464 1465 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1466 alu.inst = ctx->inst_info->r600_opcode; 1467 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1468 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1469 } 1470 1471 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1472 alu.dst.chan = i; 1473 alu.dst.write = 1; 1474 alu.is_op3 = 1; 1475 if (i == lasti) { 1476 alu.last = 1; 1477 } 1478 r = r600_bc_add_alu(ctx->bc, &alu); 1479 if (r) 1480 return r; 1481 } 1482 return 0; 1483} 1484 1485static int tgsi_dp(struct r600_shader_ctx *ctx) 1486{ 1487 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1488 struct r600_bc_alu alu; 1489 int i, j, r; 1490 1491 for (i = 0; i < 4; i++) { 1492 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1493 alu.inst = ctx->inst_info->r600_opcode; 1494 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1495 r600_bc_src(&alu.src[j], &ctx->src[j], i); 1496 } 1497 1498 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1499 alu.dst.chan = i; 1500 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1501 /* handle some special cases */ 1502 switch (ctx->inst_info->tgsi_opcode) { 1503 case TGSI_OPCODE_DP2: 1504 if (i > 1) { 1505 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1506 alu.src[0].chan = alu.src[1].chan = 0; 1507 } 1508 break; 1509 case TGSI_OPCODE_DP3: 1510 if (i > 2) { 1511 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1512 alu.src[0].chan = alu.src[1].chan = 0; 1513 } 1514 break; 1515 case TGSI_OPCODE_DPH: 1516 if (i == 3) { 1517 alu.src[0].sel = V_SQ_ALU_SRC_1; 1518 alu.src[0].chan = 0; 1519 alu.src[0].neg = 0; 1520 } 1521 break; 1522 default: 1523 break; 1524 } 1525 if (i == 3) { 1526 alu.last = 1; 1527 } 1528 r = r600_bc_add_alu(ctx->bc, &alu); 1529 if (r) 1530 return r; 1531 } 1532 return 0; 1533} 1534 1535static int tgsi_tex(struct r600_shader_ctx *ctx) 1536{ 1537 static float one_point_five = 1.5f; 1538 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1539 struct r600_bc_tex tex; 1540 struct r600_bc_alu alu; 1541 unsigned src_gpr; 1542 int r, i; 1543 int opcode; 1544 /* Texture fetch instructions can only use gprs as source. 1545 * Also they cannot negate the source or take the absolute value */ 1546 const boolean src_requires_loading = 1547 (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && 1548 inst->Src[0].Register.File != TGSI_FILE_INPUT) || 1549 ctx->src[0].neg || ctx->src[0].abs; 1550 boolean src_loaded = FALSE; 1551 1552 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1553 1554 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1555 /* Add perspective divide */ 1556 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1557 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1558 r600_bc_src(&alu.src[0], &ctx->src[0], 3); 1559 1560 alu.dst.sel = ctx->temp_reg; 1561 alu.dst.chan = 3; 1562 alu.last = 1; 1563 alu.dst.write = 1; 1564 r = r600_bc_add_alu(ctx->bc, &alu); 1565 if (r) 1566 return r; 1567 1568 for (i = 0; i < 3; i++) { 1569 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1570 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1571 alu.src[0].sel = ctx->temp_reg; 1572 alu.src[0].chan = 3; 1573 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1574 alu.dst.sel = ctx->temp_reg; 1575 alu.dst.chan = i; 1576 alu.dst.write = 1; 1577 r = r600_bc_add_alu(ctx->bc, &alu); 1578 if (r) 1579 return r; 1580 } 1581 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1582 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1583 alu.src[0].sel = V_SQ_ALU_SRC_1; 1584 alu.src[0].chan = 0; 1585 alu.dst.sel = ctx->temp_reg; 1586 alu.dst.chan = 3; 1587 alu.last = 1; 1588 alu.dst.write = 1; 1589 r = r600_bc_add_alu(ctx->bc, &alu); 1590 if (r) 1591 return r; 1592 src_loaded = TRUE; 1593 src_gpr = ctx->temp_reg; 1594 } 1595 1596 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1597 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 1598 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 1599 1600 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1601 for (i = 0; i < 4; i++) { 1602 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1603 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1604 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 1605 r600_bc_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 1606 alu.dst.sel = ctx->temp_reg; 1607 alu.dst.chan = i; 1608 if (i == 3) 1609 alu.last = 1; 1610 alu.dst.write = 1; 1611 r = r600_bc_add_alu(ctx->bc, &alu); 1612 if (r) 1613 return r; 1614 } 1615 1616 /* tmp1.z = RCP_e(|tmp1.z|) */ 1617 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1618 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1619 alu.src[0].sel = ctx->temp_reg; 1620 alu.src[0].chan = 2; 1621 alu.src[0].abs = 1; 1622 alu.dst.sel = ctx->temp_reg; 1623 alu.dst.chan = 2; 1624 alu.dst.write = 1; 1625 alu.last = 1; 1626 r = r600_bc_add_alu(ctx->bc, &alu); 1627 if (r) 1628 return r; 1629 1630 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1631 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1632 * muladd has no writemask, have to use another temp 1633 */ 1634 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1635 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1636 alu.is_op3 = 1; 1637 1638 alu.src[0].sel = ctx->temp_reg; 1639 alu.src[0].chan = 0; 1640 alu.src[1].sel = ctx->temp_reg; 1641 alu.src[1].chan = 2; 1642 1643 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1644 alu.src[2].chan = 0; 1645 alu.src[2].value = *(uint32_t *)&one_point_five; 1646 1647 alu.dst.sel = ctx->temp_reg; 1648 alu.dst.chan = 0; 1649 alu.dst.write = 1; 1650 1651 r = r600_bc_add_alu(ctx->bc, &alu); 1652 if (r) 1653 return r; 1654 1655 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1656 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1657 alu.is_op3 = 1; 1658 1659 alu.src[0].sel = ctx->temp_reg; 1660 alu.src[0].chan = 1; 1661 alu.src[1].sel = ctx->temp_reg; 1662 alu.src[1].chan = 2; 1663 1664 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1665 alu.src[2].chan = 0; 1666 alu.src[2].value = *(uint32_t *)&one_point_five; 1667 1668 alu.dst.sel = ctx->temp_reg; 1669 alu.dst.chan = 1; 1670 alu.dst.write = 1; 1671 1672 alu.last = 1; 1673 r = r600_bc_add_alu(ctx->bc, &alu); 1674 if (r) 1675 return r; 1676 1677 src_loaded = TRUE; 1678 src_gpr = ctx->temp_reg; 1679 } 1680 1681 if (src_requires_loading && !src_loaded) { 1682 for (i = 0; i < 4; i++) { 1683 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1684 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1685 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1686 alu.dst.sel = ctx->temp_reg; 1687 alu.dst.chan = i; 1688 if (i == 3) 1689 alu.last = 1; 1690 alu.dst.write = 1; 1691 r = r600_bc_add_alu(ctx->bc, &alu); 1692 if (r) 1693 return r; 1694 } 1695 src_loaded = TRUE; 1696 src_gpr = ctx->temp_reg; 1697 } 1698 1699 opcode = ctx->inst_info->r600_opcode; 1700 if (opcode == SQ_TEX_INST_SAMPLE && 1701 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1702 opcode = SQ_TEX_INST_SAMPLE_C; 1703 1704 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1705 tex.inst = opcode; 1706 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1707 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 1708 tex.src_gpr = src_gpr; 1709 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1710 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 1711 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 1712 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 1713 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 1714 if (src_loaded) { 1715 tex.src_sel_x = 0; 1716 tex.src_sel_y = 1; 1717 tex.src_sel_z = 2; 1718 tex.src_sel_w = 3; 1719 } else { 1720 tex.src_sel_x = ctx->src[0].swizzle[0]; 1721 tex.src_sel_y = ctx->src[0].swizzle[1]; 1722 tex.src_sel_z = ctx->src[0].swizzle[2]; 1723 tex.src_sel_w = ctx->src[0].swizzle[3]; 1724 tex.src_rel = ctx->src[0].rel; 1725 } 1726 1727 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1728 tex.src_sel_x = 1; 1729 tex.src_sel_y = 0; 1730 tex.src_sel_z = 3; 1731 tex.src_sel_w = 1; 1732 } 1733 1734 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1735 tex.coord_type_x = 1; 1736 tex.coord_type_y = 1; 1737 tex.coord_type_z = 1; 1738 tex.coord_type_w = 1; 1739 } 1740 1741 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { 1742 tex.coord_type_z = 0; 1743 tex.src_sel_z = tex.src_sel_y; 1744 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) 1745 tex.coord_type_z = 0; 1746 1747 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 1748 tex.src_sel_w = tex.src_sel_z; 1749 1750 r = r600_bc_add_tex(ctx->bc, &tex); 1751 if (r) 1752 return r; 1753 1754 /* add shadow ambient support - gallium doesn't do it yet */ 1755 return 0; 1756} 1757 1758static int tgsi_lrp(struct r600_shader_ctx *ctx) 1759{ 1760 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1761 struct r600_bc_alu alu; 1762 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1763 unsigned i; 1764 int r; 1765 1766 /* optimize if it's just an equal balance */ 1767 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 1768 for (i = 0; i < lasti + 1; i++) { 1769 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1770 continue; 1771 1772 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1773 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1774 r600_bc_src(&alu.src[0], &ctx->src[1], i); 1775 r600_bc_src(&alu.src[1], &ctx->src[2], i); 1776 alu.omod = 3; 1777 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1778 alu.dst.chan = i; 1779 if (i == lasti) { 1780 alu.last = 1; 1781 } 1782 r = r600_bc_add_alu(ctx->bc, &alu); 1783 if (r) 1784 return r; 1785 } 1786 return 0; 1787 } 1788 1789 /* 1 - src0 */ 1790 for (i = 0; i < lasti + 1; i++) { 1791 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1792 continue; 1793 1794 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1795 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1796 alu.src[0].sel = V_SQ_ALU_SRC_1; 1797 alu.src[0].chan = 0; 1798 r600_bc_src(&alu.src[1], &ctx->src[0], i); 1799 alu.src[1].neg = 1; 1800 alu.dst.sel = ctx->temp_reg; 1801 alu.dst.chan = i; 1802 if (i == lasti) { 1803 alu.last = 1; 1804 } 1805 alu.dst.write = 1; 1806 r = r600_bc_add_alu(ctx->bc, &alu); 1807 if (r) 1808 return r; 1809 } 1810 1811 /* (1 - src0) * src2 */ 1812 for (i = 0; i < lasti + 1; i++) { 1813 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1814 continue; 1815 1816 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1817 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1818 alu.src[0].sel = ctx->temp_reg; 1819 alu.src[0].chan = i; 1820 r600_bc_src(&alu.src[1], &ctx->src[2], i); 1821 alu.dst.sel = ctx->temp_reg; 1822 alu.dst.chan = i; 1823 if (i == lasti) { 1824 alu.last = 1; 1825 } 1826 alu.dst.write = 1; 1827 r = r600_bc_add_alu(ctx->bc, &alu); 1828 if (r) 1829 return r; 1830 } 1831 1832 /* src0 * src1 + (1 - src0) * src2 */ 1833 for (i = 0; i < lasti + 1; i++) { 1834 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1835 continue; 1836 1837 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1838 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1839 alu.is_op3 = 1; 1840 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1841 r600_bc_src(&alu.src[1], &ctx->src[1], i); 1842 alu.src[2].sel = ctx->temp_reg; 1843 alu.src[2].chan = i; 1844 1845 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1846 alu.dst.chan = i; 1847 if (i == lasti) { 1848 alu.last = 1; 1849 } 1850 r = r600_bc_add_alu(ctx->bc, &alu); 1851 if (r) 1852 return r; 1853 } 1854 return 0; 1855} 1856 1857static int tgsi_cmp(struct r600_shader_ctx *ctx) 1858{ 1859 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1860 struct r600_bc_alu alu; 1861 int i, r; 1862 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1863 1864 for (i = 0; i < lasti + 1; i++) { 1865 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1866 continue; 1867 1868 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1869 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 1870 r600_bc_src(&alu.src[0], &ctx->src[0], i); 1871 r600_bc_src(&alu.src[1], &ctx->src[2], i); 1872 r600_bc_src(&alu.src[2], &ctx->src[1], i); 1873 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1874 alu.dst.chan = i; 1875 alu.dst.write = 1; 1876 alu.is_op3 = 1; 1877 if (i == lasti) 1878 alu.last = 1; 1879 r = r600_bc_add_alu(ctx->bc, &alu); 1880 if (r) 1881 return r; 1882 } 1883 return 0; 1884} 1885 1886static int tgsi_xpd(struct r600_shader_ctx *ctx) 1887{ 1888 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1889 static const unsigned int src0_swizzle[] = {2, 0, 1}; 1890 static const unsigned int src1_swizzle[] = {1, 2, 0}; 1891 struct r600_bc_alu alu; 1892 uint32_t use_temp = 0; 1893 int i, r; 1894 1895 if (inst->Dst[0].Register.WriteMask != 0xf) 1896 use_temp = 1; 1897 1898 for (i = 0; i < 4; i++) { 1899 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1900 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1901 if (i < 3) { 1902 r600_bc_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 1903 r600_bc_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 1904 } else { 1905 alu.src[0].sel = V_SQ_ALU_SRC_0; 1906 alu.src[0].chan = i; 1907 alu.src[1].sel = V_SQ_ALU_SRC_0; 1908 alu.src[1].chan = i; 1909 } 1910 1911 alu.dst.sel = ctx->temp_reg; 1912 alu.dst.chan = i; 1913 alu.dst.write = 1; 1914 1915 if (i == 3) 1916 alu.last = 1; 1917 r = r600_bc_add_alu(ctx->bc, &alu); 1918 if (r) 1919 return r; 1920 } 1921 1922 for (i = 0; i < 4; i++) { 1923 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1924 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1925 1926 if (i < 3) { 1927 r600_bc_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 1928 r600_bc_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 1929 } else { 1930 alu.src[0].sel = V_SQ_ALU_SRC_0; 1931 alu.src[0].chan = i; 1932 alu.src[1].sel = V_SQ_ALU_SRC_0; 1933 alu.src[1].chan = i; 1934 } 1935 1936 alu.src[2].sel = ctx->temp_reg; 1937 alu.src[2].neg = 1; 1938 alu.src[2].chan = i; 1939 1940 if (use_temp) 1941 alu.dst.sel = ctx->temp_reg; 1942 else 1943 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1944 alu.dst.chan = i; 1945 alu.dst.write = 1; 1946 alu.is_op3 = 1; 1947 if (i == 3) 1948 alu.last = 1; 1949 r = r600_bc_add_alu(ctx->bc, &alu); 1950 if (r) 1951 return r; 1952 } 1953 if (use_temp) 1954 return tgsi_helper_copy(ctx, inst); 1955 return 0; 1956} 1957 1958static int tgsi_exp(struct r600_shader_ctx *ctx) 1959{ 1960 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1961 struct r600_bc_alu alu; 1962 int r; 1963 1964 /* result.x = 2^floor(src); */ 1965 if (inst->Dst[0].Register.WriteMask & 1) { 1966 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1967 1968 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 1969 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1970 1971 alu.dst.sel = ctx->temp_reg; 1972 alu.dst.chan = 0; 1973 alu.dst.write = 1; 1974 alu.last = 1; 1975 r = r600_bc_add_alu(ctx->bc, &alu); 1976 if (r) 1977 return r; 1978 1979 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1980 alu.src[0].sel = ctx->temp_reg; 1981 alu.src[0].chan = 0; 1982 1983 alu.dst.sel = ctx->temp_reg; 1984 alu.dst.chan = 0; 1985 alu.dst.write = 1; 1986 alu.last = 1; 1987 r = r600_bc_add_alu(ctx->bc, &alu); 1988 if (r) 1989 return r; 1990 } 1991 1992 /* result.y = tmp - floor(tmp); */ 1993 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 1994 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1995 1996 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1997 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 1998 1999 alu.dst.sel = ctx->temp_reg; 2000// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2001// if (r) 2002// return r; 2003 alu.dst.write = 1; 2004 alu.dst.chan = 1; 2005 2006 alu.last = 1; 2007 2008 r = r600_bc_add_alu(ctx->bc, &alu); 2009 if (r) 2010 return r; 2011 } 2012 2013 /* result.z = RoughApprox2ToX(tmp);*/ 2014 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2015 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2016 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2017 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2018 2019 alu.dst.sel = ctx->temp_reg; 2020 alu.dst.write = 1; 2021 alu.dst.chan = 2; 2022 2023 alu.last = 1; 2024 2025 r = r600_bc_add_alu(ctx->bc, &alu); 2026 if (r) 2027 return r; 2028 } 2029 2030 /* result.w = 1.0;*/ 2031 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2032 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2033 2034 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2035 alu.src[0].sel = V_SQ_ALU_SRC_1; 2036 alu.src[0].chan = 0; 2037 2038 alu.dst.sel = ctx->temp_reg; 2039 alu.dst.chan = 3; 2040 alu.dst.write = 1; 2041 alu.last = 1; 2042 r = r600_bc_add_alu(ctx->bc, &alu); 2043 if (r) 2044 return r; 2045 } 2046 return tgsi_helper_copy(ctx, inst); 2047} 2048 2049static int tgsi_log(struct r600_shader_ctx *ctx) 2050{ 2051 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2052 struct r600_bc_alu alu; 2053 int r; 2054 2055 /* result.x = floor(log2(src)); */ 2056 if (inst->Dst[0].Register.WriteMask & 1) { 2057 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2058 2059 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2060 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2061 2062 alu.dst.sel = ctx->temp_reg; 2063 alu.dst.chan = 0; 2064 alu.dst.write = 1; 2065 alu.last = 1; 2066 r = r600_bc_add_alu(ctx->bc, &alu); 2067 if (r) 2068 return r; 2069 2070 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2071 alu.src[0].sel = ctx->temp_reg; 2072 alu.src[0].chan = 0; 2073 2074 alu.dst.sel = ctx->temp_reg; 2075 alu.dst.chan = 0; 2076 alu.dst.write = 1; 2077 alu.last = 1; 2078 2079 r = r600_bc_add_alu(ctx->bc, &alu); 2080 if (r) 2081 return r; 2082 } 2083 2084 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2085 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2086 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2087 2088 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2089 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2090 2091 alu.dst.sel = ctx->temp_reg; 2092 alu.dst.chan = 1; 2093 alu.dst.write = 1; 2094 alu.last = 1; 2095 2096 r = r600_bc_add_alu(ctx->bc, &alu); 2097 if (r) 2098 return r; 2099 2100 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2101 2102 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2103 alu.src[0].sel = ctx->temp_reg; 2104 alu.src[0].chan = 1; 2105 2106 alu.dst.sel = ctx->temp_reg; 2107 alu.dst.chan = 1; 2108 alu.dst.write = 1; 2109 alu.last = 1; 2110 2111 r = r600_bc_add_alu(ctx->bc, &alu); 2112 if (r) 2113 return r; 2114 2115 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2116 2117 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2118 alu.src[0].sel = ctx->temp_reg; 2119 alu.src[0].chan = 1; 2120 2121 alu.dst.sel = ctx->temp_reg; 2122 alu.dst.chan = 1; 2123 alu.dst.write = 1; 2124 alu.last = 1; 2125 2126 r = r600_bc_add_alu(ctx->bc, &alu); 2127 if (r) 2128 return r; 2129 2130 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2131 2132 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2133 alu.src[0].sel = ctx->temp_reg; 2134 alu.src[0].chan = 1; 2135 2136 alu.dst.sel = ctx->temp_reg; 2137 alu.dst.chan = 1; 2138 alu.dst.write = 1; 2139 alu.last = 1; 2140 2141 r = r600_bc_add_alu(ctx->bc, &alu); 2142 if (r) 2143 return r; 2144 2145 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2146 2147 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2148 2149 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2150 2151 alu.src[1].sel = ctx->temp_reg; 2152 alu.src[1].chan = 1; 2153 2154 alu.dst.sel = ctx->temp_reg; 2155 alu.dst.chan = 1; 2156 alu.dst.write = 1; 2157 alu.last = 1; 2158 2159 r = r600_bc_add_alu(ctx->bc, &alu); 2160 if (r) 2161 return r; 2162 } 2163 2164 /* result.z = log2(src);*/ 2165 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2166 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2167 2168 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2169 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2170 2171 alu.dst.sel = ctx->temp_reg; 2172 alu.dst.write = 1; 2173 alu.dst.chan = 2; 2174 alu.last = 1; 2175 2176 r = r600_bc_add_alu(ctx->bc, &alu); 2177 if (r) 2178 return r; 2179 } 2180 2181 /* result.w = 1.0; */ 2182 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2183 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2184 2185 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2186 alu.src[0].sel = V_SQ_ALU_SRC_1; 2187 alu.src[0].chan = 0; 2188 2189 alu.dst.sel = ctx->temp_reg; 2190 alu.dst.chan = 3; 2191 alu.dst.write = 1; 2192 alu.last = 1; 2193 2194 r = r600_bc_add_alu(ctx->bc, &alu); 2195 if (r) 2196 return r; 2197 } 2198 2199 return tgsi_helper_copy(ctx, inst); 2200} 2201 2202static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2203{ 2204 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2205 struct r600_bc_alu alu; 2206 int r; 2207 2208 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2209 2210 switch (inst->Instruction.Opcode) { 2211 case TGSI_OPCODE_ARL: 2212 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2213 break; 2214 case TGSI_OPCODE_ARR: 2215 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2216 break; 2217 default: 2218 assert(0); 2219 return -1; 2220 } 2221 2222 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2223 alu.last = 1; 2224 alu.dst.sel = ctx->ar_reg; 2225 alu.dst.write = 1; 2226 r = r600_bc_add_alu(ctx->bc, &alu); 2227 if (r) 2228 return r; 2229 2230 /* TODO: Note that the MOVA can be avoided if we never use AR for 2231 * indexing non-CB registers in the current ALU clause. Similarly, we 2232 * need to load AR from ar_reg again if we started a new clause 2233 * between ARL and AR usage. The easy way to do that is to remove 2234 * the MOVA here, and load it for the first AR access after ar_reg 2235 * has been modified in each clause. */ 2236 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2237 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2238 alu.src[0].sel = ctx->ar_reg; 2239 alu.src[0].chan = 0; 2240 alu.last = 1; 2241 r = r600_bc_add_alu(ctx->bc, &alu); 2242 if (r) 2243 return r; 2244 return 0; 2245} 2246static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2247{ 2248 /* TODO from r600c, ar values don't persist between clauses */ 2249 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2250 struct r600_bc_alu alu; 2251 int r; 2252 2253 switch (inst->Instruction.Opcode) { 2254 case TGSI_OPCODE_ARL: 2255 memset(&alu, 0, sizeof(alu)); 2256 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 2257 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2258 alu.dst.sel = ctx->ar_reg; 2259 alu.dst.write = 1; 2260 alu.last = 1; 2261 2262 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2263 return r; 2264 2265 memset(&alu, 0, sizeof(alu)); 2266 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2267 alu.src[0].sel = ctx->ar_reg; 2268 alu.dst.sel = ctx->ar_reg; 2269 alu.dst.write = 1; 2270 alu.last = 1; 2271 2272 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2273 return r; 2274 break; 2275 case TGSI_OPCODE_ARR: 2276 memset(&alu, 0, sizeof(alu)); 2277 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2278 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2279 alu.dst.sel = ctx->ar_reg; 2280 alu.dst.write = 1; 2281 alu.last = 1; 2282 2283 if ((r = r600_bc_add_alu(ctx->bc, &alu))) 2284 return r; 2285 break; 2286 default: 2287 assert(0); 2288 return -1; 2289 } 2290 2291 memset(&alu, 0, sizeof(alu)); 2292 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2293 alu.src[0].sel = ctx->ar_reg; 2294 alu.last = 1; 2295 2296 r = r600_bc_add_alu(ctx->bc, &alu); 2297 if (r) 2298 return r; 2299 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2300 return 0; 2301} 2302 2303static int tgsi_opdst(struct r600_shader_ctx *ctx) 2304{ 2305 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2306 struct r600_bc_alu alu; 2307 int i, r = 0; 2308 2309 for (i = 0; i < 4; i++) { 2310 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2311 2312 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2313 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2314 2315 if (i == 0 || i == 3) { 2316 alu.src[0].sel = V_SQ_ALU_SRC_1; 2317 } else { 2318 r600_bc_src(&alu.src[0], &ctx->src[0], i); 2319 } 2320 2321 if (i == 0 || i == 2) { 2322 alu.src[1].sel = V_SQ_ALU_SRC_1; 2323 } else { 2324 r600_bc_src(&alu.src[1], &ctx->src[1], i); 2325 } 2326 if (i == 3) 2327 alu.last = 1; 2328 r = r600_bc_add_alu(ctx->bc, &alu); 2329 if (r) 2330 return r; 2331 } 2332 return 0; 2333} 2334 2335static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2336{ 2337 struct r600_bc_alu alu; 2338 int r; 2339 2340 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2341 alu.inst = opcode; 2342 alu.predicate = 1; 2343 2344 alu.dst.sel = ctx->temp_reg; 2345 alu.dst.write = 1; 2346 alu.dst.chan = 0; 2347 2348 r600_bc_src(&alu.src[0], &ctx->src[0], 0); 2349 alu.src[1].sel = V_SQ_ALU_SRC_0; 2350 alu.src[1].chan = 0; 2351 2352 alu.last = 1; 2353 2354 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2355 if (r) 2356 return r; 2357 return 0; 2358} 2359 2360static int pops(struct r600_shader_ctx *ctx, int pops) 2361{ 2362 int alu_pop = 3; 2363 if (ctx->bc->cf_last) { 2364 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) 2365 alu_pop = 0; 2366 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) 2367 alu_pop = 1; 2368 } 2369 alu_pop += pops; 2370 if (alu_pop == 1) { 2371 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; 2372 ctx->bc->force_add_cf = 1; 2373 } else if (alu_pop == 2) { 2374 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; 2375 ctx->bc->force_add_cf = 1; 2376 } else { 2377 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2378 ctx->bc->cf_last->pop_count = pops; 2379 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 2380 } 2381 return 0; 2382} 2383 2384static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2385{ 2386 switch(reason) { 2387 case FC_PUSH_VPM: 2388 ctx->bc->callstack[ctx->bc->call_sp].current--; 2389 break; 2390 case FC_PUSH_WQM: 2391 case FC_LOOP: 2392 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2393 break; 2394 case FC_REP: 2395 /* TOODO : for 16 vp asic should -= 2; */ 2396 ctx->bc->callstack[ctx->bc->call_sp].current --; 2397 break; 2398 } 2399} 2400 2401static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2402{ 2403 if (check_max_only) { 2404 int diff; 2405 switch (reason) { 2406 case FC_PUSH_VPM: 2407 diff = 1; 2408 break; 2409 case FC_PUSH_WQM: 2410 diff = 4; 2411 break; 2412 default: 2413 assert(0); 2414 diff = 0; 2415 } 2416 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2417 ctx->bc->callstack[ctx->bc->call_sp].max) { 2418 ctx->bc->callstack[ctx->bc->call_sp].max = 2419 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2420 } 2421 return; 2422 } 2423 switch (reason) { 2424 case FC_PUSH_VPM: 2425 ctx->bc->callstack[ctx->bc->call_sp].current++; 2426 break; 2427 case FC_PUSH_WQM: 2428 case FC_LOOP: 2429 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2430 break; 2431 case FC_REP: 2432 ctx->bc->callstack[ctx->bc->call_sp].current++; 2433 break; 2434 } 2435 2436 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2437 ctx->bc->callstack[ctx->bc->call_sp].max) { 2438 ctx->bc->callstack[ctx->bc->call_sp].max = 2439 ctx->bc->callstack[ctx->bc->call_sp].current; 2440 } 2441} 2442 2443static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2444{ 2445 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2446 2447 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2448 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2449 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2450 sp->num_mid++; 2451} 2452 2453static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2454{ 2455 ctx->bc->fc_sp++; 2456 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2457 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2458} 2459 2460static void fc_poplevel(struct r600_shader_ctx *ctx) 2461{ 2462 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2463 if (sp->mid) { 2464 free(sp->mid); 2465 sp->mid = NULL; 2466 } 2467 sp->num_mid = 0; 2468 sp->start = NULL; 2469 sp->type = 0; 2470 ctx->bc->fc_sp--; 2471} 2472 2473#if 0 2474static int emit_return(struct r600_shader_ctx *ctx) 2475{ 2476 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2477 return 0; 2478} 2479 2480static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2481{ 2482 2483 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2484 ctx->bc->cf_last->pop_count = pops; 2485 /* TODO work out offset */ 2486 return 0; 2487} 2488 2489static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2490{ 2491 return 0; 2492} 2493 2494static void emit_testflag(struct r600_shader_ctx *ctx) 2495{ 2496 2497} 2498 2499static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2500{ 2501 emit_testflag(ctx); 2502 emit_jump_to_offset(ctx, 1, 4); 2503 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2504 pops(ctx, ifidx + 1); 2505 emit_return(ctx); 2506} 2507 2508static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2509{ 2510 emit_testflag(ctx); 2511 2512 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2513 ctx->bc->cf_last->pop_count = 1; 2514 2515 fc_set_mid(ctx, fc_sp); 2516 2517 pops(ctx, 1); 2518} 2519#endif 2520 2521static int tgsi_if(struct r600_shader_ctx *ctx) 2522{ 2523 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2524 2525 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2526 2527 fc_pushlevel(ctx, FC_IF); 2528 2529 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2530 return 0; 2531} 2532 2533static int tgsi_else(struct r600_shader_ctx *ctx) 2534{ 2535 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2536 ctx->bc->cf_last->pop_count = 1; 2537 2538 fc_set_mid(ctx, ctx->bc->fc_sp); 2539 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2540 return 0; 2541} 2542 2543static int tgsi_endif(struct r600_shader_ctx *ctx) 2544{ 2545 pops(ctx, 1); 2546 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2547 R600_ERR("if/endif unbalanced in shader\n"); 2548 return -1; 2549 } 2550 2551 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2552 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2553 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2554 } else { 2555 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2556 } 2557 fc_poplevel(ctx); 2558 2559 callstack_decrease_current(ctx, FC_PUSH_VPM); 2560 return 0; 2561} 2562 2563static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2564{ 2565 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2566 2567 fc_pushlevel(ctx, FC_LOOP); 2568 2569 /* check stack depth */ 2570 callstack_check_depth(ctx, FC_LOOP, 0); 2571 return 0; 2572} 2573 2574static int tgsi_endloop(struct r600_shader_ctx *ctx) 2575{ 2576 int i; 2577 2578 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2579 2580 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2581 R600_ERR("loop/endloop in shader code are not paired.\n"); 2582 return -EINVAL; 2583 } 2584 2585 /* fixup loop pointers - from r600isa 2586 LOOP END points to CF after LOOP START, 2587 LOOP START point to CF after LOOP END 2588 BRK/CONT point to LOOP END CF 2589 */ 2590 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2591 2592 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2593 2594 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2595 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2596 } 2597 /* TODO add LOOPRET support */ 2598 fc_poplevel(ctx); 2599 callstack_decrease_current(ctx, FC_LOOP); 2600 return 0; 2601} 2602 2603static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2604{ 2605 unsigned int fscp; 2606 2607 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2608 { 2609 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2610 break; 2611 } 2612 2613 if (fscp == 0) { 2614 R600_ERR("Break not inside loop/endloop pair\n"); 2615 return -EINVAL; 2616 } 2617 2618 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2619 ctx->bc->cf_last->pop_count = 1; 2620 2621 fc_set_mid(ctx, fscp); 2622 2623 pops(ctx, 1); 2624 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2625 return 0; 2626} 2627 2628static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 2629 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 2630 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2631 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2632 2633 /* FIXME: 2634 * For state trackers other than OpenGL, we'll want to use 2635 * _RECIP_IEEE instead. 2636 */ 2637 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 2638 2639 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 2640 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2641 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2642 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2643 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2644 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2645 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2646 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2647 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2648 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2649 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2650 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2651 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2652 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2653 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2654 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2655 /* gap */ 2656 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2657 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2658 /* gap */ 2659 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2660 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2661 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2662 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2663 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2664 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2665 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2666 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2667 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2668 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2669 /* gap */ 2670 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2671 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2672 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2673 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2674 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2675 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2676 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2677 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2678 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2679 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2680 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2681 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2682 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2683 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2684 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2685 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2686 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2687 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2688 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2689 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2690 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2691 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2692 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2693 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2694 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2695 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2696 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2697 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2698 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2699 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 2700 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2701 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2702 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2703 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2704 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2705 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2706 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2707 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2708 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2709 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2710 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2711 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2712 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2713 /* gap */ 2714 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2715 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2716 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2717 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2718 /* gap */ 2719 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2720 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2721 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2722 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2723 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2724 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2725 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2726 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 2727 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2728 /* gap */ 2729 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2730 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2731 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2732 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2733 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2734 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2735 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2736 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2737 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2738 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2739 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2740 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2741 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2742 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2743 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2744 /* gap */ 2745 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2746 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2747 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2748 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2749 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2750 /* gap */ 2751 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2752 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2753 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2754 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2755 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2756 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2757 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2758 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2759 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2760 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2761 /* gap */ 2762 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2763 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2764 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2765 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2766 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2767 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2768 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2769 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2770 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2771 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2772 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2773 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2774 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2775 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2776 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2777 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2778 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2779 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2780 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2781 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2782 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2783 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2784 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2785 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2786 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2787 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2788 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2789 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2790}; 2791 2792static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 2793 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 2794 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2795 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2796 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 2797 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 2798 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2799 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2800 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2801 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2802 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2803 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2804 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2805 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2806 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2807 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2808 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2809 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2810 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2811 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2812 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2813 /* gap */ 2814 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2815 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2816 /* gap */ 2817 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2818 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2819 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2820 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2821 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2822 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2823 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2824 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2825 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2826 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2827 /* gap */ 2828 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2829 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2830 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2831 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2832 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2833 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2834 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2835 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2836 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2837 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2838 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2839 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2840 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2841 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2842 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2843 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2844 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2845 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2846 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2847 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2848 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2849 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2850 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2851 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2852 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2853 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2854 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2855 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2856 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2857 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 2858 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2859 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2860 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2861 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2862 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2863 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2864 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2865 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2866 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2867 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2868 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2869 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2870 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2871 /* gap */ 2872 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2873 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2874 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2875 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2876 /* gap */ 2877 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2878 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2879 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2880 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2881 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2882 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2883 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2884 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 2885 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2886 /* gap */ 2887 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2888 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2889 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2890 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2891 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2892 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2893 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2894 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2895 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2896 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2897 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2898 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2899 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2900 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2901 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2902 /* gap */ 2903 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2904 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2905 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2906 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2907 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2908 /* gap */ 2909 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2910 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2911 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2912 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2913 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2914 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2915 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2916 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2917 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2918 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2919 /* gap */ 2920 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2921 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2922 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2923 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2924 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2925 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2926 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2927 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2928 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2929 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2930 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2931 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2932 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2933 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2934 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2935 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2936 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2937 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2938 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2939 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2940 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2941 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2942 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2943 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2944 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2945 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2946 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2947 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2948}; 2949