r600_shader.c revision cc9a8915f093c57d2748370d18ed47f66c933013
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_info.h" 25#include "tgsi/tgsi_parse.h" 26#include "tgsi/tgsi_scan.h" 27#include "tgsi/tgsi_dump.h" 28#include "util/u_format.h" 29#include "r600_pipe.h" 30#include "r600_asm.h" 31#include "r600_sq.h" 32#include "r600_formats.h" 33#include "r600_opcodes.h" 34#include "r600d.h" 35#include <stdio.h> 36#include <errno.h> 37#include <byteswap.h> 38 39/* CAYMAN notes 40Why CAYMAN got loops for lots of instructions is explained here. 41 42-These 8xx t-slot only ops are implemented in all vector slots. 43MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 44These 8xx t-slot only opcodes become vector ops, with all four 45slots expecting the arguments on sources a and b. Result is 46broadcast to all channels. 47MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT 48These 8xx t-slot only opcodes become vector ops in the z, y, and 49x slots. 50EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 51RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 52SQRT_IEEE/_64 53SIN/COS 54The w slot may have an independent co-issued operation, or if the 55result is required to be in the w slot, the opcode above may be 56issued in the w slot as well. 57The compiler must issue the source argument to slots z, y, and x 58*/ 59 60 61int r600_find_vs_semantic_index(struct r600_shader *vs, 62 struct r600_shader *ps, int id) 63{ 64 struct r600_shader_io *input = &ps->input[id]; 65 66 for (int i = 0; i < vs->noutput; i++) { 67 if (input->name == vs->output[i].name && 68 input->sid == vs->output[i].sid) { 69 return i - 1; 70 } 71 } 72 return 0; 73} 74 75static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 76{ 77 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 78 struct r600_shader *rshader = &shader->shader; 79 uint32_t *ptr; 80 int i; 81 82 /* copy new shader */ 83 if (shader->bo == NULL) { 84 /* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */ 85 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_IMMUTABLE); 86 if (shader->bo == NULL) { 87 return -ENOMEM; 88 } 89 ptr = (uint32_t*)r600_bo_map(rctx->radeon, shader->bo, rctx->ctx.cs, PIPE_TRANSFER_WRITE); 90 if (R600_BIG_ENDIAN) { 91 for (i = 0; i < rshader->bc.ndw; ++i) { 92 ptr[i] = bswap_32(rshader->bc.bytecode[i]); 93 } 94 } else { 95 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * sizeof(*ptr)); 96 } 97 r600_bo_unmap(rctx->radeon, shader->bo); 98 } 99 /* build state */ 100 switch (rshader->processor_type) { 101 case TGSI_PROCESSOR_VERTEX: 102 if (rctx->chip_class >= EVERGREEN) { 103 evergreen_pipe_shader_vs(ctx, shader); 104 } else { 105 r600_pipe_shader_vs(ctx, shader); 106 } 107 break; 108 case TGSI_PROCESSOR_FRAGMENT: 109 if (rctx->chip_class >= EVERGREEN) { 110 evergreen_pipe_shader_ps(ctx, shader); 111 } else { 112 r600_pipe_shader_ps(ctx, shader); 113 } 114 break; 115 default: 116 return -EINVAL; 117 } 118 return 0; 119} 120 121static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader); 122 123int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader) 124{ 125 static int dump_shaders = -1; 126 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 127 int r; 128 129 /* Would like some magic "get_bool_option_once" routine. 130 */ 131 if (dump_shaders == -1) 132 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 133 134 if (dump_shaders) { 135 fprintf(stderr, "--------------------------------------------------------------\n"); 136 tgsi_dump(shader->tokens, 0); 137 } 138 r = r600_shader_from_tgsi(rctx, shader); 139 if (r) { 140 R600_ERR("translation from TGSI failed !\n"); 141 return r; 142 } 143 r = r600_bytecode_build(&shader->shader.bc); 144 if (r) { 145 R600_ERR("building bytecode failed !\n"); 146 return r; 147 } 148 if (dump_shaders) { 149 r600_bytecode_dump(&shader->shader.bc); 150 fprintf(stderr, "______________________________________________________________\n"); 151 } 152 return r600_pipe_shader(ctx, shader); 153} 154 155void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 156{ 157 r600_bo_reference(&shader->bo, NULL); 158 r600_bytecode_clear(&shader->shader.bc); 159 160 memset(&shader->shader,0,sizeof(struct r600_shader)); 161} 162 163/* 164 * tgsi -> r600 shader 165 */ 166struct r600_shader_tgsi_instruction; 167 168struct r600_shader_src { 169 unsigned sel; 170 unsigned swizzle[4]; 171 unsigned neg; 172 unsigned abs; 173 unsigned rel; 174 uint32_t value[4]; 175}; 176 177struct r600_shader_ctx { 178 struct tgsi_shader_info info; 179 struct tgsi_parse_context parse; 180 const struct tgsi_token *tokens; 181 unsigned type; 182 unsigned file_offset[TGSI_FILE_COUNT]; 183 unsigned temp_reg; 184 unsigned ar_reg; 185 struct r600_shader_tgsi_instruction *inst_info; 186 struct r600_bytecode *bc; 187 struct r600_shader *shader; 188 struct r600_shader_src src[4]; 189 u32 *literals; 190 u32 nliterals; 191 u32 max_driver_temp_used; 192 /* needed for evergreen interpolation */ 193 boolean input_centroid; 194 boolean input_linear; 195 boolean input_perspective; 196 int num_interp_gpr; 197}; 198 199struct r600_shader_tgsi_instruction { 200 unsigned tgsi_opcode; 201 unsigned is_op3; 202 unsigned r600_opcode; 203 int (*process)(struct r600_shader_ctx *ctx); 204}; 205 206static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 207static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 208 209static int tgsi_is_supported(struct r600_shader_ctx *ctx) 210{ 211 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 212 int j; 213 214 if (i->Instruction.NumDstRegs > 1) { 215 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 216 return -EINVAL; 217 } 218 if (i->Instruction.Predicate) { 219 R600_ERR("predicate unsupported\n"); 220 return -EINVAL; 221 } 222#if 0 223 if (i->Instruction.Label) { 224 R600_ERR("label unsupported\n"); 225 return -EINVAL; 226 } 227#endif 228 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 229 if (i->Src[j].Register.Dimension) { 230 R600_ERR("unsupported src %d (dimension %d)\n", j, 231 i->Src[j].Register.Dimension); 232 return -EINVAL; 233 } 234 } 235 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 236 if (i->Dst[j].Register.Dimension) { 237 R600_ERR("unsupported dst (dimension)\n"); 238 return -EINVAL; 239 } 240 } 241 return 0; 242} 243 244static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 245{ 246 int i, r; 247 struct r600_bytecode_alu alu; 248 int gpr = 0, base_chan = 0; 249 int ij_index = 0; 250 251 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 252 ij_index = 0; 253 if (ctx->shader->input[input].centroid) 254 ij_index++; 255 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 256 ij_index = 0; 257 /* if we have perspective add one */ 258 if (ctx->input_perspective) { 259 ij_index++; 260 /* if we have perspective centroid */ 261 if (ctx->input_centroid) 262 ij_index++; 263 } 264 if (ctx->shader->input[input].centroid) 265 ij_index++; 266 } 267 268 /* work out gpr and base_chan from index */ 269 gpr = ij_index / 2; 270 base_chan = (2 * (ij_index % 2)) + 1; 271 272 for (i = 0; i < 8; i++) { 273 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 274 275 if (i < 4) 276 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 277 else 278 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 279 280 if ((i > 1) && (i < 6)) { 281 alu.dst.sel = ctx->shader->input[input].gpr; 282 alu.dst.write = 1; 283 } 284 285 alu.dst.chan = i % 4; 286 287 alu.src[0].sel = gpr; 288 alu.src[0].chan = (base_chan - (i % 2)); 289 290 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 291 292 alu.bank_swizzle_force = SQ_ALU_VEC_210; 293 if ((i % 4) == 3) 294 alu.last = 1; 295 r = r600_bytecode_add_alu(ctx->bc, &alu); 296 if (r) 297 return r; 298 } 299 return 0; 300} 301 302 303static int tgsi_declaration(struct r600_shader_ctx *ctx) 304{ 305 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 306 unsigned i; 307 int r; 308 309 switch (d->Declaration.File) { 310 case TGSI_FILE_INPUT: 311 i = ctx->shader->ninput++; 312 ctx->shader->input[i].name = d->Semantic.Name; 313 ctx->shader->input[i].sid = d->Semantic.Index; 314 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 315 ctx->shader->input[i].centroid = d->Declaration.Centroid; 316 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 317 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chip_class >= EVERGREEN) { 318 /* turn input into interpolate on EG */ 319 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 320 if (ctx->shader->input[i].interpolate > 0) { 321 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 322 evergreen_interp_alu(ctx, i); 323 } 324 } 325 } 326 break; 327 case TGSI_FILE_OUTPUT: 328 i = ctx->shader->noutput++; 329 ctx->shader->output[i].name = d->Semantic.Name; 330 ctx->shader->output[i].sid = d->Semantic.Index; 331 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 332 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 333 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 334 /* these don't count as vertex param exports */ 335 if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) || 336 (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE)) 337 ctx->shader->npos++; 338 } 339 break; 340 case TGSI_FILE_CONSTANT: 341 case TGSI_FILE_TEMPORARY: 342 case TGSI_FILE_SAMPLER: 343 case TGSI_FILE_ADDRESS: 344 break; 345 346 case TGSI_FILE_SYSTEM_VALUE: 347 if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 348 struct r600_bytecode_alu alu; 349 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 350 351 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT); 352 alu.src[0].sel = 0; 353 alu.src[0].chan = 3; 354 355 alu.dst.sel = 0; 356 alu.dst.chan = 3; 357 alu.dst.write = 1; 358 alu.last = 1; 359 360 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 361 return r; 362 break; 363 } 364 365 default: 366 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 367 return -EINVAL; 368 } 369 return 0; 370} 371 372static int r600_get_temp(struct r600_shader_ctx *ctx) 373{ 374 return ctx->temp_reg + ctx->max_driver_temp_used++; 375} 376 377/* 378 * for evergreen we need to scan the shader to find the number of GPRs we need to 379 * reserve for interpolation. 380 * 381 * we need to know if we are going to emit 382 * any centroid inputs 383 * if perspective and linear are required 384*/ 385static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 386{ 387 int i; 388 int num_baryc; 389 390 ctx->input_linear = FALSE; 391 ctx->input_perspective = FALSE; 392 ctx->input_centroid = FALSE; 393 ctx->num_interp_gpr = 1; 394 395 /* any centroid inputs */ 396 for (i = 0; i < ctx->info.num_inputs; i++) { 397 /* skip position/face */ 398 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 399 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 400 continue; 401 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 402 ctx->input_linear = TRUE; 403 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 404 ctx->input_perspective = TRUE; 405 if (ctx->info.input_centroid[i]) 406 ctx->input_centroid = TRUE; 407 } 408 409 num_baryc = 0; 410 /* ignoring sample for now */ 411 if (ctx->input_perspective) 412 num_baryc++; 413 if (ctx->input_linear) 414 num_baryc++; 415 if (ctx->input_centroid) 416 num_baryc *= 2; 417 418 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 419 420 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 421 return ctx->num_interp_gpr; 422} 423 424static void tgsi_src(struct r600_shader_ctx *ctx, 425 const struct tgsi_full_src_register *tgsi_src, 426 struct r600_shader_src *r600_src) 427{ 428 memset(r600_src, 0, sizeof(*r600_src)); 429 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 430 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 431 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 432 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 433 r600_src->neg = tgsi_src->Register.Negate; 434 r600_src->abs = tgsi_src->Register.Absolute; 435 436 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 437 int index; 438 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 439 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 440 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 441 442 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 443 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg); 444 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 445 return; 446 } 447 index = tgsi_src->Register.Index; 448 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 449 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 450 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 451 /* assume we wan't TGSI_SEMANTIC_INSTANCEID here */ 452 r600_src->swizzle[0] = 3; 453 r600_src->swizzle[1] = 3; 454 r600_src->swizzle[2] = 3; 455 r600_src->swizzle[3] = 3; 456 r600_src->sel = 0; 457 } else { 458 if (tgsi_src->Register.Indirect) 459 r600_src->rel = V_SQ_REL_RELATIVE; 460 r600_src->sel = tgsi_src->Register.Index; 461 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 462 } 463} 464 465static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, unsigned int offset, unsigned int dst_reg) 466{ 467 struct r600_bytecode_vtx vtx; 468 unsigned int ar_reg; 469 int r; 470 471 if (offset) { 472 struct r600_bytecode_alu alu; 473 474 memset(&alu, 0, sizeof(alu)); 475 476 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT); 477 alu.src[0].sel = ctx->ar_reg; 478 479 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 480 alu.src[1].value = offset; 481 482 alu.dst.sel = dst_reg; 483 alu.dst.write = 1; 484 alu.last = 1; 485 486 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 487 return r; 488 489 ar_reg = dst_reg; 490 } else { 491 ar_reg = ctx->ar_reg; 492 } 493 494 memset(&vtx, 0, sizeof(vtx)); 495 vtx.fetch_type = 2; /* VTX_FETCH_NO_INDEX_OFFSET */ 496 vtx.src_gpr = ar_reg; 497 vtx.mega_fetch_count = 16; 498 vtx.dst_gpr = dst_reg; 499 vtx.dst_sel_x = 0; /* SEL_X */ 500 vtx.dst_sel_y = 1; /* SEL_Y */ 501 vtx.dst_sel_z = 2; /* SEL_Z */ 502 vtx.dst_sel_w = 3; /* SEL_W */ 503 vtx.data_format = FMT_32_32_32_32_FLOAT; 504 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 505 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 506 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 507 vtx.endian = r600_endian_swap(32); 508 509 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 510 return r; 511 512 return 0; 513} 514 515static int tgsi_split_constant(struct r600_shader_ctx *ctx) 516{ 517 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 518 struct r600_bytecode_alu alu; 519 int i, j, k, nconst, r; 520 521 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 522 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 523 nconst++; 524 } 525 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 526 } 527 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 528 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 529 continue; 530 } 531 532 if (ctx->src[i].rel) { 533 int treg = r600_get_temp(ctx); 534 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].sel - 512, treg))) 535 return r; 536 537 ctx->src[i].sel = treg; 538 ctx->src[i].rel = 0; 539 j--; 540 } else if (j > 0) { 541 int treg = r600_get_temp(ctx); 542 for (k = 0; k < 4; k++) { 543 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 545 alu.src[0].sel = ctx->src[i].sel; 546 alu.src[0].chan = k; 547 alu.src[0].rel = ctx->src[i].rel; 548 alu.dst.sel = treg; 549 alu.dst.chan = k; 550 alu.dst.write = 1; 551 if (k == 3) 552 alu.last = 1; 553 r = r600_bytecode_add_alu(ctx->bc, &alu); 554 if (r) 555 return r; 556 } 557 ctx->src[i].sel = treg; 558 ctx->src[i].rel =0; 559 j--; 560 } 561 } 562 return 0; 563} 564 565/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 566static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 567{ 568 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 569 struct r600_bytecode_alu alu; 570 int i, j, k, nliteral, r; 571 572 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 573 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 574 nliteral++; 575 } 576 } 577 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 578 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 579 int treg = r600_get_temp(ctx); 580 for (k = 0; k < 4; k++) { 581 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 582 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 583 alu.src[0].sel = ctx->src[i].sel; 584 alu.src[0].chan = k; 585 alu.src[0].value = ctx->src[i].value[k]; 586 alu.dst.sel = treg; 587 alu.dst.chan = k; 588 alu.dst.write = 1; 589 if (k == 3) 590 alu.last = 1; 591 r = r600_bytecode_add_alu(ctx->bc, &alu); 592 if (r) 593 return r; 594 } 595 ctx->src[i].sel = treg; 596 j--; 597 } 598 } 599 return 0; 600} 601 602static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pipe_shader *pipeshader) 603{ 604 struct r600_shader *shader = &pipeshader->shader; 605 struct tgsi_token *tokens = pipeshader->tokens; 606 struct tgsi_full_immediate *immediate; 607 struct tgsi_full_property *property; 608 struct r600_shader_ctx ctx; 609 struct r600_bytecode_output output[32]; 610 unsigned output_done, noutput; 611 unsigned opcode; 612 int i, j, r = 0, pos0; 613 614 ctx.bc = &shader->bc; 615 ctx.shader = shader; 616 r600_bytecode_init(ctx.bc, rctx->chip_class); 617 ctx.tokens = tokens; 618 tgsi_scan_shader(tokens, &ctx.info); 619 tgsi_parse_init(&ctx.parse, tokens); 620 ctx.type = ctx.parse.FullHeader.Processor.Processor; 621 shader->processor_type = ctx.type; 622 ctx.bc->type = shader->processor_type; 623 624 shader->clamp_color = (((ctx.type == TGSI_PROCESSOR_FRAGMENT) && rctx->clamp_fragment_color) || 625 ((ctx.type == TGSI_PROCESSOR_VERTEX) && rctx->clamp_vertex_color)); 626 627 shader->nr_cbufs = rctx->nr_cbufs; 628 629 /* register allocations */ 630 /* Values [0,127] correspond to GPR[0..127]. 631 * Values [128,159] correspond to constant buffer bank 0 632 * Values [160,191] correspond to constant buffer bank 1 633 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 634 * Values [256,287] correspond to constant buffer bank 2 (EG) 635 * Values [288,319] correspond to constant buffer bank 3 (EG) 636 * Other special values are shown in the list below. 637 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 638 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 639 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 640 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 641 * 248 SQ_ALU_SRC_0: special constant 0.0. 642 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 643 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 644 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 645 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 646 * 253 SQ_ALU_SRC_LITERAL: literal constant. 647 * 254 SQ_ALU_SRC_PV: previous vector result. 648 * 255 SQ_ALU_SRC_PS: previous scalar result. 649 */ 650 for (i = 0; i < TGSI_FILE_COUNT; i++) { 651 ctx.file_offset[i] = 0; 652 } 653 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 654 ctx.file_offset[TGSI_FILE_INPUT] = 1; 655 if (ctx.bc->chip_class >= EVERGREEN) { 656 r600_bytecode_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 657 } else { 658 r600_bytecode_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 659 } 660 } 661 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chip_class >= EVERGREEN) { 662 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 663 } 664 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 665 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 666 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 667 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 668 669 /* Outside the GPR range. This will be translated to one of the 670 * kcache banks later. */ 671 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 672 673 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 674 ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 675 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 676 ctx.temp_reg = ctx.ar_reg + 1; 677 678 ctx.nliterals = 0; 679 ctx.literals = NULL; 680 shader->fs_write_all = FALSE; 681 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 682 tgsi_parse_token(&ctx.parse); 683 switch (ctx.parse.FullToken.Token.Type) { 684 case TGSI_TOKEN_TYPE_IMMEDIATE: 685 immediate = &ctx.parse.FullToken.FullImmediate; 686 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 687 if(ctx.literals == NULL) { 688 r = -ENOMEM; 689 goto out_err; 690 } 691 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 692 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 693 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 694 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 695 ctx.nliterals++; 696 break; 697 case TGSI_TOKEN_TYPE_DECLARATION: 698 r = tgsi_declaration(&ctx); 699 if (r) 700 goto out_err; 701 break; 702 case TGSI_TOKEN_TYPE_INSTRUCTION: 703 r = tgsi_is_supported(&ctx); 704 if (r) 705 goto out_err; 706 ctx.max_driver_temp_used = 0; 707 /* reserve first tmp for everyone */ 708 r600_get_temp(&ctx); 709 710 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 711 if ((r = tgsi_split_constant(&ctx))) 712 goto out_err; 713 if ((r = tgsi_split_literal_constant(&ctx))) 714 goto out_err; 715 if (ctx.bc->chip_class == CAYMAN) 716 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 717 else if (ctx.bc->chip_class >= EVERGREEN) 718 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 719 else 720 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 721 r = ctx.inst_info->process(&ctx); 722 if (r) 723 goto out_err; 724 break; 725 case TGSI_TOKEN_TYPE_PROPERTY: 726 property = &ctx.parse.FullToken.FullProperty; 727 if (property->Property.PropertyName == TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) { 728 if (property->u[0].Data == 1) 729 shader->fs_write_all = TRUE; 730 } 731 break; 732 default: 733 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 734 r = -EINVAL; 735 goto out_err; 736 } 737 } 738 739 noutput = shader->noutput; 740 741 /* clamp color outputs */ 742 if (shader->clamp_color) { 743 for (i = 0; i < noutput; i++) { 744 if (shader->output[i].name == TGSI_SEMANTIC_COLOR || 745 shader->output[i].name == TGSI_SEMANTIC_BCOLOR) { 746 747 int j; 748 for (j = 0; j < 4; j++) { 749 struct r600_bytecode_alu alu; 750 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 751 752 /* MOV_SAT R, R */ 753 alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 754 alu.dst.sel = shader->output[i].gpr; 755 alu.dst.chan = j; 756 alu.dst.write = 1; 757 alu.dst.clamp = 1; 758 alu.src[0].sel = alu.dst.sel; 759 alu.src[0].chan = j; 760 761 if (j == 3) { 762 alu.last = 1; 763 } 764 r = r600_bytecode_add_alu(ctx.bc, &alu); 765 if (r) 766 return r; 767 } 768 } 769 } 770 } 771 772 /* export output */ 773 j = 0; 774 for (i = 0, pos0 = 0; i < noutput; i++) { 775 memset(&output[i], 0, sizeof(struct r600_bytecode_output)); 776 output[i + j].gpr = shader->output[i].gpr; 777 output[i + j].elem_size = 3; 778 output[i + j].swizzle_x = 0; 779 output[i + j].swizzle_y = 1; 780 output[i + j].swizzle_z = 2; 781 output[i + j].swizzle_w = 3; 782 output[i + j].burst_count = 1; 783 output[i + j].barrier = 1; 784 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 785 output[i + j].array_base = i - pos0; 786 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 787 switch (ctx.type) { 788 case TGSI_PROCESSOR_VERTEX: 789 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 790 output[i + j].array_base = 60; 791 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 792 /* position doesn't count in array_base */ 793 pos0++; 794 } 795 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 796 output[i + j].array_base = 61; 797 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 798 /* position doesn't count in array_base */ 799 pos0++; 800 } 801 break; 802 case TGSI_PROCESSOR_FRAGMENT: 803 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 804 output[i + j].array_base = shader->output[i].sid; 805 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 806 if (shader->fs_write_all && (rctx->chip_class >= EVERGREEN)) { 807 for (j = 1; j < shader->nr_cbufs; j++) { 808 memset(&output[i + j], 0, sizeof(struct r600_bytecode_output)); 809 output[i + j].gpr = shader->output[i].gpr; 810 output[i + j].elem_size = 3; 811 output[i + j].swizzle_x = 0; 812 output[i + j].swizzle_y = 1; 813 output[i + j].swizzle_z = 2; 814 output[i + j].swizzle_w = 3; 815 output[i + j].burst_count = 1; 816 output[i + j].barrier = 1; 817 output[i + j].array_base = shader->output[i].sid + j; 818 output[i + j].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 819 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 820 } 821 j--; 822 } 823 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 824 output[i + j].array_base = 61; 825 output[i + j].swizzle_x = 2; 826 output[i + j].swizzle_y = 7; 827 output[i + j].swizzle_z = output[i + j].swizzle_w = 7; 828 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 829 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 830 output[i + j].array_base = 61; 831 output[i + j].swizzle_x = 7; 832 output[i + j].swizzle_y = 1; 833 output[i + j].swizzle_z = output[i + j].swizzle_w = 7; 834 output[i + j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 835 } else { 836 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 837 r = -EINVAL; 838 goto out_err; 839 } 840 break; 841 default: 842 R600_ERR("unsupported processor type %d\n", ctx.type); 843 r = -EINVAL; 844 goto out_err; 845 } 846 } 847 noutput += j; 848 /* add fake param output for vertex shader if no param is exported */ 849 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 850 for (i = 0, pos0 = 0; i < noutput; i++) { 851 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 852 pos0 = 1; 853 break; 854 } 855 } 856 if (!pos0) { 857 memset(&output[i], 0, sizeof(struct r600_bytecode_output)); 858 output[i].gpr = 0; 859 output[i].elem_size = 3; 860 output[i].swizzle_x = 0; 861 output[i].swizzle_y = 1; 862 output[i].swizzle_z = 2; 863 output[i].swizzle_w = 3; 864 output[i].burst_count = 1; 865 output[i].barrier = 1; 866 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 867 output[i].array_base = 0; 868 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 869 noutput++; 870 } 871 } 872 /* add fake pixel export */ 873 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 874 memset(&output[0], 0, sizeof(struct r600_bytecode_output)); 875 output[0].gpr = 0; 876 output[0].elem_size = 3; 877 output[0].swizzle_x = 7; 878 output[0].swizzle_y = 7; 879 output[0].swizzle_z = 7; 880 output[0].swizzle_w = 7; 881 output[0].burst_count = 1; 882 output[0].barrier = 1; 883 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 884 output[0].array_base = 0; 885 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 886 noutput++; 887 } 888 /* set export done on last export of each type */ 889 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 890 if (ctx.bc->chip_class < CAYMAN) { 891 if (i == (noutput - 1)) { 892 output[i].end_of_program = 1; 893 } 894 } 895 if (!(output_done & (1 << output[i].type))) { 896 output_done |= (1 << output[i].type); 897 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 898 } 899 } 900 /* add output to bytecode */ 901 for (i = 0; i < noutput; i++) { 902 r = r600_bytecode_add_output(ctx.bc, &output[i]); 903 if (r) 904 goto out_err; 905 } 906 /* add program end */ 907 if (ctx.bc->chip_class == CAYMAN) 908 cm_bytecode_add_cf_end(ctx.bc); 909 910 free(ctx.literals); 911 tgsi_parse_free(&ctx.parse); 912 return 0; 913out_err: 914 free(ctx.literals); 915 tgsi_parse_free(&ctx.parse); 916 return r; 917} 918 919static int tgsi_unsupported(struct r600_shader_ctx *ctx) 920{ 921 R600_ERR("%s tgsi opcode unsupported\n", 922 tgsi_get_opcode_name(ctx->inst_info->tgsi_opcode)); 923 return -EINVAL; 924} 925 926static int tgsi_end(struct r600_shader_ctx *ctx) 927{ 928 return 0; 929} 930 931static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 932 const struct r600_shader_src *shader_src, 933 unsigned chan) 934{ 935 bc_src->sel = shader_src->sel; 936 bc_src->chan = shader_src->swizzle[chan]; 937 bc_src->neg = shader_src->neg; 938 bc_src->abs = shader_src->abs; 939 bc_src->rel = shader_src->rel; 940 bc_src->value = shader_src->value[bc_src->chan]; 941} 942 943static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 944{ 945 bc_src->abs = 1; 946 bc_src->neg = 0; 947} 948 949static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 950{ 951 bc_src->neg = !bc_src->neg; 952} 953 954static void tgsi_dst(struct r600_shader_ctx *ctx, 955 const struct tgsi_full_dst_register *tgsi_dst, 956 unsigned swizzle, 957 struct r600_bytecode_alu_dst *r600_dst) 958{ 959 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 960 961 r600_dst->sel = tgsi_dst->Register.Index; 962 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 963 r600_dst->chan = swizzle; 964 r600_dst->write = 1; 965 if (tgsi_dst->Register.Indirect) 966 r600_dst->rel = V_SQ_REL_RELATIVE; 967 if (inst->Instruction.Saturate) { 968 r600_dst->clamp = 1; 969 } 970} 971 972static int tgsi_last_instruction(unsigned writemask) 973{ 974 int i, lasti = 0; 975 976 for (i = 0; i < 4; i++) { 977 if (writemask & (1 << i)) { 978 lasti = i; 979 } 980 } 981 return lasti; 982} 983 984static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 985{ 986 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 987 struct r600_bytecode_alu alu; 988 int i, j, r; 989 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 990 991 for (i = 0; i < lasti + 1; i++) { 992 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 993 continue; 994 995 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 996 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 997 998 alu.inst = ctx->inst_info->r600_opcode; 999 if (!swap) { 1000 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1001 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1002 } 1003 } else { 1004 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 1005 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1006 } 1007 /* handle some special cases */ 1008 switch (ctx->inst_info->tgsi_opcode) { 1009 case TGSI_OPCODE_SUB: 1010 r600_bytecode_src_toggle_neg(&alu.src[1]); 1011 break; 1012 case TGSI_OPCODE_ABS: 1013 r600_bytecode_src_set_abs(&alu.src[0]); 1014 break; 1015 default: 1016 break; 1017 } 1018 if (i == lasti) { 1019 alu.last = 1; 1020 } 1021 r = r600_bytecode_add_alu(ctx->bc, &alu); 1022 if (r) 1023 return r; 1024 } 1025 return 0; 1026} 1027 1028static int tgsi_op2(struct r600_shader_ctx *ctx) 1029{ 1030 return tgsi_op2_s(ctx, 0); 1031} 1032 1033static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1034{ 1035 return tgsi_op2_s(ctx, 1); 1036} 1037 1038static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 1039{ 1040 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1041 int i, j, r; 1042 struct r600_bytecode_alu alu; 1043 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1044 1045 for (i = 0 ; i < last_slot; i++) { 1046 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1047 alu.inst = ctx->inst_info->r600_opcode; 1048 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1049 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 1050 } 1051 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1052 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1053 1054 if (i == last_slot - 1) 1055 alu.last = 1; 1056 r = r600_bytecode_add_alu(ctx->bc, &alu); 1057 if (r) 1058 return r; 1059 } 1060 return 0; 1061} 1062 1063/* 1064 * r600 - trunc to -PI..PI range 1065 * r700 - normalize by dividing by 2PI 1066 * see fdo bug 27901 1067 */ 1068static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 1069{ 1070 static float half_inv_pi = 1.0 /(3.1415926535 * 2); 1071 static float double_pi = 3.1415926535 * 2; 1072 static float neg_pi = -3.1415926535; 1073 1074 int r; 1075 struct r600_bytecode_alu alu; 1076 1077 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1078 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1079 alu.is_op3 = 1; 1080 1081 alu.dst.chan = 0; 1082 alu.dst.sel = ctx->temp_reg; 1083 alu.dst.write = 1; 1084 1085 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1086 1087 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1088 alu.src[1].chan = 0; 1089 alu.src[1].value = *(uint32_t *)&half_inv_pi; 1090 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1091 alu.src[2].chan = 0; 1092 alu.last = 1; 1093 r = r600_bytecode_add_alu(ctx->bc, &alu); 1094 if (r) 1095 return r; 1096 1097 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1098 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1099 1100 alu.dst.chan = 0; 1101 alu.dst.sel = ctx->temp_reg; 1102 alu.dst.write = 1; 1103 1104 alu.src[0].sel = ctx->temp_reg; 1105 alu.src[0].chan = 0; 1106 alu.last = 1; 1107 r = r600_bytecode_add_alu(ctx->bc, &alu); 1108 if (r) 1109 return r; 1110 1111 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1112 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1113 alu.is_op3 = 1; 1114 1115 alu.dst.chan = 0; 1116 alu.dst.sel = ctx->temp_reg; 1117 alu.dst.write = 1; 1118 1119 alu.src[0].sel = ctx->temp_reg; 1120 alu.src[0].chan = 0; 1121 1122 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1123 alu.src[1].chan = 0; 1124 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1125 alu.src[2].chan = 0; 1126 1127 if (ctx->bc->chip_class == R600) { 1128 alu.src[1].value = *(uint32_t *)&double_pi; 1129 alu.src[2].value = *(uint32_t *)&neg_pi; 1130 } else { 1131 alu.src[1].sel = V_SQ_ALU_SRC_1; 1132 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 1133 alu.src[2].neg = 1; 1134 } 1135 1136 alu.last = 1; 1137 r = r600_bytecode_add_alu(ctx->bc, &alu); 1138 if (r) 1139 return r; 1140 return 0; 1141} 1142 1143static int cayman_trig(struct r600_shader_ctx *ctx) 1144{ 1145 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1146 struct r600_bytecode_alu alu; 1147 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1148 int i, r; 1149 1150 r = tgsi_setup_trig(ctx); 1151 if (r) 1152 return r; 1153 1154 1155 for (i = 0; i < last_slot; i++) { 1156 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1157 alu.inst = ctx->inst_info->r600_opcode; 1158 alu.dst.chan = i; 1159 1160 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1161 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1162 1163 alu.src[0].sel = ctx->temp_reg; 1164 alu.src[0].chan = 0; 1165 if (i == last_slot - 1) 1166 alu.last = 1; 1167 r = r600_bytecode_add_alu(ctx->bc, &alu); 1168 if (r) 1169 return r; 1170 } 1171 return 0; 1172} 1173 1174static int tgsi_trig(struct r600_shader_ctx *ctx) 1175{ 1176 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1177 struct r600_bytecode_alu alu; 1178 int i, r; 1179 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1180 1181 r = tgsi_setup_trig(ctx); 1182 if (r) 1183 return r; 1184 1185 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1186 alu.inst = ctx->inst_info->r600_opcode; 1187 alu.dst.chan = 0; 1188 alu.dst.sel = ctx->temp_reg; 1189 alu.dst.write = 1; 1190 1191 alu.src[0].sel = ctx->temp_reg; 1192 alu.src[0].chan = 0; 1193 alu.last = 1; 1194 r = r600_bytecode_add_alu(ctx->bc, &alu); 1195 if (r) 1196 return r; 1197 1198 /* replicate result */ 1199 for (i = 0; i < lasti + 1; i++) { 1200 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1201 continue; 1202 1203 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1204 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1205 1206 alu.src[0].sel = ctx->temp_reg; 1207 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1208 if (i == lasti) 1209 alu.last = 1; 1210 r = r600_bytecode_add_alu(ctx->bc, &alu); 1211 if (r) 1212 return r; 1213 } 1214 return 0; 1215} 1216 1217static int tgsi_scs(struct r600_shader_ctx *ctx) 1218{ 1219 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1220 struct r600_bytecode_alu alu; 1221 int i, r; 1222 1223 /* We'll only need the trig stuff if we are going to write to the 1224 * X or Y components of the destination vector. 1225 */ 1226 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1227 r = tgsi_setup_trig(ctx); 1228 if (r) 1229 return r; 1230 } 1231 1232 /* dst.x = COS */ 1233 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1234 if (ctx->bc->chip_class == CAYMAN) { 1235 for (i = 0 ; i < 3; i++) { 1236 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1237 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1238 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1239 1240 if (i == 0) 1241 alu.dst.write = 1; 1242 else 1243 alu.dst.write = 0; 1244 alu.src[0].sel = ctx->temp_reg; 1245 alu.src[0].chan = 0; 1246 if (i == 2) 1247 alu.last = 1; 1248 r = r600_bytecode_add_alu(ctx->bc, &alu); 1249 if (r) 1250 return r; 1251 } 1252 } else { 1253 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1254 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1255 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1256 1257 alu.src[0].sel = ctx->temp_reg; 1258 alu.src[0].chan = 0; 1259 alu.last = 1; 1260 r = r600_bytecode_add_alu(ctx->bc, &alu); 1261 if (r) 1262 return r; 1263 } 1264 } 1265 1266 /* dst.y = SIN */ 1267 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1268 if (ctx->bc->chip_class == CAYMAN) { 1269 for (i = 0 ; i < 3; i++) { 1270 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1271 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1272 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1273 if (i == 1) 1274 alu.dst.write = 1; 1275 else 1276 alu.dst.write = 0; 1277 alu.src[0].sel = ctx->temp_reg; 1278 alu.src[0].chan = 0; 1279 if (i == 2) 1280 alu.last = 1; 1281 r = r600_bytecode_add_alu(ctx->bc, &alu); 1282 if (r) 1283 return r; 1284 } 1285 } else { 1286 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1287 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1288 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1289 1290 alu.src[0].sel = ctx->temp_reg; 1291 alu.src[0].chan = 0; 1292 alu.last = 1; 1293 r = r600_bytecode_add_alu(ctx->bc, &alu); 1294 if (r) 1295 return r; 1296 } 1297 } 1298 1299 /* dst.z = 0.0; */ 1300 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1301 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1302 1303 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1304 1305 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1306 1307 alu.src[0].sel = V_SQ_ALU_SRC_0; 1308 alu.src[0].chan = 0; 1309 1310 alu.last = 1; 1311 1312 r = r600_bytecode_add_alu(ctx->bc, &alu); 1313 if (r) 1314 return r; 1315 } 1316 1317 /* dst.w = 1.0; */ 1318 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1319 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1320 1321 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1322 1323 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1324 1325 alu.src[0].sel = V_SQ_ALU_SRC_1; 1326 alu.src[0].chan = 0; 1327 1328 alu.last = 1; 1329 1330 r = r600_bytecode_add_alu(ctx->bc, &alu); 1331 if (r) 1332 return r; 1333 } 1334 1335 return 0; 1336} 1337 1338static int tgsi_kill(struct r600_shader_ctx *ctx) 1339{ 1340 struct r600_bytecode_alu alu; 1341 int i, r; 1342 1343 for (i = 0; i < 4; i++) { 1344 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1345 alu.inst = ctx->inst_info->r600_opcode; 1346 1347 alu.dst.chan = i; 1348 1349 alu.src[0].sel = V_SQ_ALU_SRC_0; 1350 1351 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1352 alu.src[1].sel = V_SQ_ALU_SRC_1; 1353 alu.src[1].neg = 1; 1354 } else { 1355 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1356 } 1357 if (i == 3) { 1358 alu.last = 1; 1359 } 1360 r = r600_bytecode_add_alu(ctx->bc, &alu); 1361 if (r) 1362 return r; 1363 } 1364 1365 /* kill must be last in ALU */ 1366 ctx->bc->force_add_cf = 1; 1367 ctx->shader->uses_kill = TRUE; 1368 return 0; 1369} 1370 1371static int tgsi_lit(struct r600_shader_ctx *ctx) 1372{ 1373 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1374 struct r600_bytecode_alu alu; 1375 int r; 1376 1377 /* tmp.x = max(src.y, 0.0) */ 1378 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1379 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1380 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 1381 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1382 alu.src[1].chan = 1; 1383 1384 alu.dst.sel = ctx->temp_reg; 1385 alu.dst.chan = 0; 1386 alu.dst.write = 1; 1387 1388 alu.last = 1; 1389 r = r600_bytecode_add_alu(ctx->bc, &alu); 1390 if (r) 1391 return r; 1392 1393 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1394 { 1395 int chan; 1396 int sel; 1397 int i; 1398 1399 if (ctx->bc->chip_class == CAYMAN) { 1400 for (i = 0; i < 3; i++) { 1401 /* tmp.z = log(tmp.x) */ 1402 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1403 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1404 alu.src[0].sel = ctx->temp_reg; 1405 alu.src[0].chan = 0; 1406 alu.dst.sel = ctx->temp_reg; 1407 alu.dst.chan = i; 1408 if (i == 2) { 1409 alu.dst.write = 1; 1410 alu.last = 1; 1411 } else 1412 alu.dst.write = 0; 1413 1414 r = r600_bytecode_add_alu(ctx->bc, &alu); 1415 if (r) 1416 return r; 1417 } 1418 } else { 1419 /* tmp.z = log(tmp.x) */ 1420 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1421 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1422 alu.src[0].sel = ctx->temp_reg; 1423 alu.src[0].chan = 0; 1424 alu.dst.sel = ctx->temp_reg; 1425 alu.dst.chan = 2; 1426 alu.dst.write = 1; 1427 alu.last = 1; 1428 r = r600_bytecode_add_alu(ctx->bc, &alu); 1429 if (r) 1430 return r; 1431 } 1432 1433 chan = alu.dst.chan; 1434 sel = alu.dst.sel; 1435 1436 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 1437 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1438 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1439 alu.src[0].sel = sel; 1440 alu.src[0].chan = chan; 1441 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 1442 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 1443 alu.dst.sel = ctx->temp_reg; 1444 alu.dst.chan = 0; 1445 alu.dst.write = 1; 1446 alu.is_op3 = 1; 1447 alu.last = 1; 1448 r = r600_bytecode_add_alu(ctx->bc, &alu); 1449 if (r) 1450 return r; 1451 1452 if (ctx->bc->chip_class == CAYMAN) { 1453 for (i = 0; i < 3; i++) { 1454 /* dst.z = exp(tmp.x) */ 1455 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1456 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1457 alu.src[0].sel = ctx->temp_reg; 1458 alu.src[0].chan = 0; 1459 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1460 if (i == 2) { 1461 alu.dst.write = 1; 1462 alu.last = 1; 1463 } else 1464 alu.dst.write = 0; 1465 r = r600_bytecode_add_alu(ctx->bc, &alu); 1466 if (r) 1467 return r; 1468 } 1469 } else { 1470 /* dst.z = exp(tmp.x) */ 1471 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1472 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1473 alu.src[0].sel = ctx->temp_reg; 1474 alu.src[0].chan = 0; 1475 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1476 alu.last = 1; 1477 r = r600_bytecode_add_alu(ctx->bc, &alu); 1478 if (r) 1479 return r; 1480 } 1481 } 1482 1483 /* dst.x, <- 1.0 */ 1484 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1485 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1486 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1487 alu.src[0].chan = 0; 1488 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1489 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1490 r = r600_bytecode_add_alu(ctx->bc, &alu); 1491 if (r) 1492 return r; 1493 1494 /* dst.y = max(src.x, 0.0) */ 1495 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1496 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1497 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1498 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1499 alu.src[1].chan = 0; 1500 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1501 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1502 r = r600_bytecode_add_alu(ctx->bc, &alu); 1503 if (r) 1504 return r; 1505 1506 /* dst.w, <- 1.0 */ 1507 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1508 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1509 alu.src[0].sel = V_SQ_ALU_SRC_1; 1510 alu.src[0].chan = 0; 1511 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1512 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1513 alu.last = 1; 1514 r = r600_bytecode_add_alu(ctx->bc, &alu); 1515 if (r) 1516 return r; 1517 1518 return 0; 1519} 1520 1521static int tgsi_rsq(struct r600_shader_ctx *ctx) 1522{ 1523 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1524 struct r600_bytecode_alu alu; 1525 int i, r; 1526 1527 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1528 1529 /* FIXME: 1530 * For state trackers other than OpenGL, we'll want to use 1531 * _RECIPSQRT_IEEE instead. 1532 */ 1533 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1534 1535 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1536 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 1537 r600_bytecode_src_set_abs(&alu.src[i]); 1538 } 1539 alu.dst.sel = ctx->temp_reg; 1540 alu.dst.write = 1; 1541 alu.last = 1; 1542 r = r600_bytecode_add_alu(ctx->bc, &alu); 1543 if (r) 1544 return r; 1545 /* replicate result */ 1546 return tgsi_helper_tempx_replicate(ctx); 1547} 1548 1549static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1550{ 1551 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1552 struct r600_bytecode_alu alu; 1553 int i, r; 1554 1555 for (i = 0; i < 4; i++) { 1556 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1557 alu.src[0].sel = ctx->temp_reg; 1558 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1559 alu.dst.chan = i; 1560 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1561 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1562 if (i == 3) 1563 alu.last = 1; 1564 r = r600_bytecode_add_alu(ctx->bc, &alu); 1565 if (r) 1566 return r; 1567 } 1568 return 0; 1569} 1570 1571static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1572{ 1573 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1574 struct r600_bytecode_alu alu; 1575 int i, r; 1576 1577 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1578 alu.inst = ctx->inst_info->r600_opcode; 1579 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1580 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 1581 } 1582 alu.dst.sel = ctx->temp_reg; 1583 alu.dst.write = 1; 1584 alu.last = 1; 1585 r = r600_bytecode_add_alu(ctx->bc, &alu); 1586 if (r) 1587 return r; 1588 /* replicate result */ 1589 return tgsi_helper_tempx_replicate(ctx); 1590} 1591 1592static int cayman_pow(struct r600_shader_ctx *ctx) 1593{ 1594 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1595 int i, r; 1596 struct r600_bytecode_alu alu; 1597 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 1598 1599 for (i = 0; i < 3; i++) { 1600 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1601 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1602 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1603 alu.dst.sel = ctx->temp_reg; 1604 alu.dst.chan = i; 1605 alu.dst.write = 1; 1606 if (i == 2) 1607 alu.last = 1; 1608 r = r600_bytecode_add_alu(ctx->bc, &alu); 1609 if (r) 1610 return r; 1611 } 1612 1613 /* b * LOG2(a) */ 1614 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1615 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1616 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 1617 alu.src[1].sel = ctx->temp_reg; 1618 alu.dst.sel = ctx->temp_reg; 1619 alu.dst.write = 1; 1620 alu.last = 1; 1621 r = r600_bytecode_add_alu(ctx->bc, &alu); 1622 if (r) 1623 return r; 1624 1625 for (i = 0; i < last_slot; i++) { 1626 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1627 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1628 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1629 alu.src[0].sel = ctx->temp_reg; 1630 1631 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1632 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1633 if (i == last_slot - 1) 1634 alu.last = 1; 1635 r = r600_bytecode_add_alu(ctx->bc, &alu); 1636 if (r) 1637 return r; 1638 } 1639 return 0; 1640} 1641 1642static int tgsi_pow(struct r600_shader_ctx *ctx) 1643{ 1644 struct r600_bytecode_alu alu; 1645 int r; 1646 1647 /* LOG2(a) */ 1648 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1649 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1650 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 1651 alu.dst.sel = ctx->temp_reg; 1652 alu.dst.write = 1; 1653 alu.last = 1; 1654 r = r600_bytecode_add_alu(ctx->bc, &alu); 1655 if (r) 1656 return r; 1657 /* b * LOG2(a) */ 1658 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1659 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1660 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 1661 alu.src[1].sel = ctx->temp_reg; 1662 alu.dst.sel = ctx->temp_reg; 1663 alu.dst.write = 1; 1664 alu.last = 1; 1665 r = r600_bytecode_add_alu(ctx->bc, &alu); 1666 if (r) 1667 return r; 1668 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1669 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1670 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1671 alu.src[0].sel = ctx->temp_reg; 1672 alu.dst.sel = ctx->temp_reg; 1673 alu.dst.write = 1; 1674 alu.last = 1; 1675 r = r600_bytecode_add_alu(ctx->bc, &alu); 1676 if (r) 1677 return r; 1678 return tgsi_helper_tempx_replicate(ctx); 1679} 1680 1681static int tgsi_ssg(struct r600_shader_ctx *ctx) 1682{ 1683 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1684 struct r600_bytecode_alu alu; 1685 int i, r; 1686 1687 /* tmp = (src > 0 ? 1 : src) */ 1688 for (i = 0; i < 4; i++) { 1689 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1690 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1691 alu.is_op3 = 1; 1692 1693 alu.dst.sel = ctx->temp_reg; 1694 alu.dst.chan = i; 1695 1696 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 1697 alu.src[1].sel = V_SQ_ALU_SRC_1; 1698 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 1699 1700 if (i == 3) 1701 alu.last = 1; 1702 r = r600_bytecode_add_alu(ctx->bc, &alu); 1703 if (r) 1704 return r; 1705 } 1706 1707 /* dst = (-tmp > 0 ? -1 : tmp) */ 1708 for (i = 0; i < 4; i++) { 1709 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1710 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1711 alu.is_op3 = 1; 1712 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1713 1714 alu.src[0].sel = ctx->temp_reg; 1715 alu.src[0].chan = i; 1716 alu.src[0].neg = 1; 1717 1718 alu.src[1].sel = V_SQ_ALU_SRC_1; 1719 alu.src[1].neg = 1; 1720 1721 alu.src[2].sel = ctx->temp_reg; 1722 alu.src[2].chan = i; 1723 1724 if (i == 3) 1725 alu.last = 1; 1726 r = r600_bytecode_add_alu(ctx->bc, &alu); 1727 if (r) 1728 return r; 1729 } 1730 return 0; 1731} 1732 1733static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1734{ 1735 struct r600_bytecode_alu alu; 1736 int i, r; 1737 1738 for (i = 0; i < 4; i++) { 1739 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1740 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1741 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1742 alu.dst.chan = i; 1743 } else { 1744 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1745 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1746 alu.src[0].sel = ctx->temp_reg; 1747 alu.src[0].chan = i; 1748 } 1749 if (i == 3) { 1750 alu.last = 1; 1751 } 1752 r = r600_bytecode_add_alu(ctx->bc, &alu); 1753 if (r) 1754 return r; 1755 } 1756 return 0; 1757} 1758 1759static int tgsi_op3(struct r600_shader_ctx *ctx) 1760{ 1761 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1762 struct r600_bytecode_alu alu; 1763 int i, j, r; 1764 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1765 1766 for (i = 0; i < lasti + 1; i++) { 1767 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1768 continue; 1769 1770 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1771 alu.inst = ctx->inst_info->r600_opcode; 1772 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1773 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1774 } 1775 1776 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1777 alu.dst.chan = i; 1778 alu.dst.write = 1; 1779 alu.is_op3 = 1; 1780 if (i == lasti) { 1781 alu.last = 1; 1782 } 1783 r = r600_bytecode_add_alu(ctx->bc, &alu); 1784 if (r) 1785 return r; 1786 } 1787 return 0; 1788} 1789 1790static int tgsi_dp(struct r600_shader_ctx *ctx) 1791{ 1792 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1793 struct r600_bytecode_alu alu; 1794 int i, j, r; 1795 1796 for (i = 0; i < 4; i++) { 1797 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1798 alu.inst = ctx->inst_info->r600_opcode; 1799 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1800 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 1801 } 1802 1803 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1804 alu.dst.chan = i; 1805 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1806 /* handle some special cases */ 1807 switch (ctx->inst_info->tgsi_opcode) { 1808 case TGSI_OPCODE_DP2: 1809 if (i > 1) { 1810 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1811 alu.src[0].chan = alu.src[1].chan = 0; 1812 } 1813 break; 1814 case TGSI_OPCODE_DP3: 1815 if (i > 2) { 1816 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1817 alu.src[0].chan = alu.src[1].chan = 0; 1818 } 1819 break; 1820 case TGSI_OPCODE_DPH: 1821 if (i == 3) { 1822 alu.src[0].sel = V_SQ_ALU_SRC_1; 1823 alu.src[0].chan = 0; 1824 alu.src[0].neg = 0; 1825 } 1826 break; 1827 default: 1828 break; 1829 } 1830 if (i == 3) { 1831 alu.last = 1; 1832 } 1833 r = r600_bytecode_add_alu(ctx->bc, &alu); 1834 if (r) 1835 return r; 1836 } 1837 return 0; 1838} 1839 1840static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 1841 unsigned index) 1842{ 1843 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1844 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 1845 inst->Src[index].Register.File != TGSI_FILE_INPUT) || 1846 ctx->src[index].neg || ctx->src[index].abs; 1847} 1848 1849static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 1850 unsigned index) 1851{ 1852 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1853 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 1854} 1855 1856static int tgsi_tex(struct r600_shader_ctx *ctx) 1857{ 1858 static float one_point_five = 1.5f; 1859 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1860 struct r600_bytecode_tex tex; 1861 struct r600_bytecode_alu alu; 1862 unsigned src_gpr; 1863 int r, i, j; 1864 int opcode; 1865 /* Texture fetch instructions can only use gprs as source. 1866 * Also they cannot negate the source or take the absolute value */ 1867 const boolean src_requires_loading = tgsi_tex_src_requires_loading(ctx, 0); 1868 boolean src_loaded = FALSE; 1869 unsigned sampler_src_reg = 1; 1870 1871 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 1872 1873 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 1874 /* TGSI moves the sampler to src reg 3 for TXD */ 1875 sampler_src_reg = 3; 1876 1877 for (i = 1; i < 3; i++) { 1878 /* set gradients h/v */ 1879 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 1880 tex.inst = (i == 1) ? SQ_TEX_INST_SET_GRADIENTS_H : 1881 SQ_TEX_INST_SET_GRADIENTS_V; 1882 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 1883 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 1884 1885 if (tgsi_tex_src_requires_loading(ctx, i)) { 1886 tex.src_gpr = r600_get_temp(ctx); 1887 tex.src_sel_x = 0; 1888 tex.src_sel_y = 1; 1889 tex.src_sel_z = 2; 1890 tex.src_sel_w = 3; 1891 1892 for (j = 0; j < 4; j++) { 1893 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1894 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1895 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 1896 alu.dst.sel = tex.src_gpr; 1897 alu.dst.chan = j; 1898 if (j == 3) 1899 alu.last = 1; 1900 alu.dst.write = 1; 1901 r = r600_bytecode_add_alu(ctx->bc, &alu); 1902 if (r) 1903 return r; 1904 } 1905 1906 } else { 1907 tex.src_gpr = tgsi_tex_get_src_gpr(ctx, i); 1908 tex.src_sel_x = ctx->src[i].swizzle[0]; 1909 tex.src_sel_y = ctx->src[i].swizzle[1]; 1910 tex.src_sel_z = ctx->src[i].swizzle[2]; 1911 tex.src_sel_w = ctx->src[i].swizzle[3]; 1912 tex.src_rel = ctx->src[i].rel; 1913 } 1914 tex.dst_gpr = ctx->temp_reg; /* just to avoid confusing the asm scheduler */ 1915 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 1916 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1917 tex.coord_type_x = 1; 1918 tex.coord_type_y = 1; 1919 tex.coord_type_z = 1; 1920 tex.coord_type_w = 1; 1921 } 1922 r = r600_bytecode_add_tex(ctx->bc, &tex); 1923 if (r) 1924 return r; 1925 } 1926 } else if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1927 int out_chan; 1928 /* Add perspective divide */ 1929 if (ctx->bc->chip_class == CAYMAN) { 1930 out_chan = 2; 1931 for (i = 0; i < 3; i++) { 1932 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1933 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1934 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 1935 1936 alu.dst.sel = ctx->temp_reg; 1937 alu.dst.chan = i; 1938 if (i == 2) 1939 alu.last = 1; 1940 if (out_chan == i) 1941 alu.dst.write = 1; 1942 r = r600_bytecode_add_alu(ctx->bc, &alu); 1943 if (r) 1944 return r; 1945 } 1946 1947 } else { 1948 out_chan = 3; 1949 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1950 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1951 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 1952 1953 alu.dst.sel = ctx->temp_reg; 1954 alu.dst.chan = out_chan; 1955 alu.last = 1; 1956 alu.dst.write = 1; 1957 r = r600_bytecode_add_alu(ctx->bc, &alu); 1958 if (r) 1959 return r; 1960 } 1961 1962 for (i = 0; i < 3; i++) { 1963 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1964 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1965 alu.src[0].sel = ctx->temp_reg; 1966 alu.src[0].chan = out_chan; 1967 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 1968 alu.dst.sel = ctx->temp_reg; 1969 alu.dst.chan = i; 1970 alu.dst.write = 1; 1971 r = r600_bytecode_add_alu(ctx->bc, &alu); 1972 if (r) 1973 return r; 1974 } 1975 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1976 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1977 alu.src[0].sel = V_SQ_ALU_SRC_1; 1978 alu.src[0].chan = 0; 1979 alu.dst.sel = ctx->temp_reg; 1980 alu.dst.chan = 3; 1981 alu.last = 1; 1982 alu.dst.write = 1; 1983 r = r600_bytecode_add_alu(ctx->bc, &alu); 1984 if (r) 1985 return r; 1986 src_loaded = TRUE; 1987 src_gpr = ctx->temp_reg; 1988 } 1989 1990 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1991 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 1992 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 1993 1994 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1995 for (i = 0; i < 4; i++) { 1996 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1997 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1998 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 1999 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 2000 alu.dst.sel = ctx->temp_reg; 2001 alu.dst.chan = i; 2002 if (i == 3) 2003 alu.last = 1; 2004 alu.dst.write = 1; 2005 r = r600_bytecode_add_alu(ctx->bc, &alu); 2006 if (r) 2007 return r; 2008 } 2009 2010 /* tmp1.z = RCP_e(|tmp1.z|) */ 2011 if (ctx->bc->chip_class == CAYMAN) { 2012 for (i = 0; i < 3; i++) { 2013 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2014 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2015 alu.src[0].sel = ctx->temp_reg; 2016 alu.src[0].chan = 2; 2017 alu.src[0].abs = 1; 2018 alu.dst.sel = ctx->temp_reg; 2019 alu.dst.chan = i; 2020 if (i == 2) 2021 alu.dst.write = 1; 2022 if (i == 2) 2023 alu.last = 1; 2024 r = r600_bytecode_add_alu(ctx->bc, &alu); 2025 if (r) 2026 return r; 2027 } 2028 } else { 2029 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2030 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2031 alu.src[0].sel = ctx->temp_reg; 2032 alu.src[0].chan = 2; 2033 alu.src[0].abs = 1; 2034 alu.dst.sel = ctx->temp_reg; 2035 alu.dst.chan = 2; 2036 alu.dst.write = 1; 2037 alu.last = 1; 2038 r = r600_bytecode_add_alu(ctx->bc, &alu); 2039 if (r) 2040 return r; 2041 } 2042 2043 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 2044 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 2045 * muladd has no writemask, have to use another temp 2046 */ 2047 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2048 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2049 alu.is_op3 = 1; 2050 2051 alu.src[0].sel = ctx->temp_reg; 2052 alu.src[0].chan = 0; 2053 alu.src[1].sel = ctx->temp_reg; 2054 alu.src[1].chan = 2; 2055 2056 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 2057 alu.src[2].chan = 0; 2058 alu.src[2].value = *(uint32_t *)&one_point_five; 2059 2060 alu.dst.sel = ctx->temp_reg; 2061 alu.dst.chan = 0; 2062 alu.dst.write = 1; 2063 2064 r = r600_bytecode_add_alu(ctx->bc, &alu); 2065 if (r) 2066 return r; 2067 2068 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2069 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2070 alu.is_op3 = 1; 2071 2072 alu.src[0].sel = ctx->temp_reg; 2073 alu.src[0].chan = 1; 2074 alu.src[1].sel = ctx->temp_reg; 2075 alu.src[1].chan = 2; 2076 2077 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 2078 alu.src[2].chan = 0; 2079 alu.src[2].value = *(uint32_t *)&one_point_five; 2080 2081 alu.dst.sel = ctx->temp_reg; 2082 alu.dst.chan = 1; 2083 alu.dst.write = 1; 2084 2085 alu.last = 1; 2086 r = r600_bytecode_add_alu(ctx->bc, &alu); 2087 if (r) 2088 return r; 2089 2090 src_loaded = TRUE; 2091 src_gpr = ctx->temp_reg; 2092 } 2093 2094 if (src_requires_loading && !src_loaded) { 2095 for (i = 0; i < 4; i++) { 2096 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2097 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2098 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2099 alu.dst.sel = ctx->temp_reg; 2100 alu.dst.chan = i; 2101 if (i == 3) 2102 alu.last = 1; 2103 alu.dst.write = 1; 2104 r = r600_bytecode_add_alu(ctx->bc, &alu); 2105 if (r) 2106 return r; 2107 } 2108 src_loaded = TRUE; 2109 src_gpr = ctx->temp_reg; 2110 } 2111 2112 opcode = ctx->inst_info->r600_opcode; 2113 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) { 2114 switch (opcode) { 2115 case SQ_TEX_INST_SAMPLE: 2116 opcode = SQ_TEX_INST_SAMPLE_C; 2117 break; 2118 case SQ_TEX_INST_SAMPLE_L: 2119 opcode = SQ_TEX_INST_SAMPLE_C_L; 2120 break; 2121 case SQ_TEX_INST_SAMPLE_G: 2122 opcode = SQ_TEX_INST_SAMPLE_C_G; 2123 break; 2124 } 2125 } 2126 2127 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 2128 tex.inst = opcode; 2129 2130 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 2131 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 2132 tex.src_gpr = src_gpr; 2133 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 2134 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 2135 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 2136 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 2137 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 2138 if (src_loaded) { 2139 tex.src_sel_x = 0; 2140 tex.src_sel_y = 1; 2141 tex.src_sel_z = 2; 2142 tex.src_sel_w = 3; 2143 } else { 2144 tex.src_sel_x = ctx->src[0].swizzle[0]; 2145 tex.src_sel_y = ctx->src[0].swizzle[1]; 2146 tex.src_sel_z = ctx->src[0].swizzle[2]; 2147 tex.src_sel_w = ctx->src[0].swizzle[3]; 2148 tex.src_rel = ctx->src[0].rel; 2149 } 2150 2151 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2152 tex.src_sel_x = 1; 2153 tex.src_sel_y = 0; 2154 tex.src_sel_z = 3; 2155 tex.src_sel_w = 1; 2156 } 2157 2158 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 2159 tex.coord_type_x = 1; 2160 tex.coord_type_y = 1; 2161 tex.coord_type_z = 1; 2162 tex.coord_type_w = 1; 2163 } 2164 2165 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY) { 2166 tex.coord_type_z = 0; 2167 tex.src_sel_z = tex.src_sel_y; 2168 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY) 2169 tex.coord_type_z = 0; 2170 2171 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 2172 tex.src_sel_w = tex.src_sel_z; 2173 2174 r = r600_bytecode_add_tex(ctx->bc, &tex); 2175 if (r) 2176 return r; 2177 2178 /* add shadow ambient support - gallium doesn't do it yet */ 2179 return 0; 2180} 2181 2182static int tgsi_lrp(struct r600_shader_ctx *ctx) 2183{ 2184 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2185 struct r600_bytecode_alu alu; 2186 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2187 unsigned i; 2188 int r; 2189 2190 /* optimize if it's just an equal balance */ 2191 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 2192 for (i = 0; i < lasti + 1; i++) { 2193 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2194 continue; 2195 2196 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2197 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2198 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 2199 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 2200 alu.omod = 3; 2201 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2202 alu.dst.chan = i; 2203 if (i == lasti) { 2204 alu.last = 1; 2205 } 2206 r = r600_bytecode_add_alu(ctx->bc, &alu); 2207 if (r) 2208 return r; 2209 } 2210 return 0; 2211 } 2212 2213 /* 1 - src0 */ 2214 for (i = 0; i < lasti + 1; i++) { 2215 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2216 continue; 2217 2218 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2219 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2220 alu.src[0].sel = V_SQ_ALU_SRC_1; 2221 alu.src[0].chan = 0; 2222 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 2223 r600_bytecode_src_toggle_neg(&alu.src[1]); 2224 alu.dst.sel = ctx->temp_reg; 2225 alu.dst.chan = i; 2226 if (i == lasti) { 2227 alu.last = 1; 2228 } 2229 alu.dst.write = 1; 2230 r = r600_bytecode_add_alu(ctx->bc, &alu); 2231 if (r) 2232 return r; 2233 } 2234 2235 /* (1 - src0) * src2 */ 2236 for (i = 0; i < lasti + 1; i++) { 2237 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2238 continue; 2239 2240 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2241 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2242 alu.src[0].sel = ctx->temp_reg; 2243 alu.src[0].chan = i; 2244 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 2245 alu.dst.sel = ctx->temp_reg; 2246 alu.dst.chan = i; 2247 if (i == lasti) { 2248 alu.last = 1; 2249 } 2250 alu.dst.write = 1; 2251 r = r600_bytecode_add_alu(ctx->bc, &alu); 2252 if (r) 2253 return r; 2254 } 2255 2256 /* src0 * src1 + (1 - src0) * src2 */ 2257 for (i = 0; i < lasti + 1; i++) { 2258 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2259 continue; 2260 2261 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2262 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2263 alu.is_op3 = 1; 2264 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2265 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2266 alu.src[2].sel = ctx->temp_reg; 2267 alu.src[2].chan = i; 2268 2269 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2270 alu.dst.chan = i; 2271 if (i == lasti) { 2272 alu.last = 1; 2273 } 2274 r = r600_bytecode_add_alu(ctx->bc, &alu); 2275 if (r) 2276 return r; 2277 } 2278 return 0; 2279} 2280 2281static int tgsi_cmp(struct r600_shader_ctx *ctx) 2282{ 2283 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2284 struct r600_bytecode_alu alu; 2285 int i, r; 2286 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2287 2288 for (i = 0; i < lasti + 1; i++) { 2289 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2290 continue; 2291 2292 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2293 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2294 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2295 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 2296 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 2297 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2298 alu.dst.chan = i; 2299 alu.dst.write = 1; 2300 alu.is_op3 = 1; 2301 if (i == lasti) 2302 alu.last = 1; 2303 r = r600_bytecode_add_alu(ctx->bc, &alu); 2304 if (r) 2305 return r; 2306 } 2307 return 0; 2308} 2309 2310static int tgsi_xpd(struct r600_shader_ctx *ctx) 2311{ 2312 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2313 static const unsigned int src0_swizzle[] = {2, 0, 1}; 2314 static const unsigned int src1_swizzle[] = {1, 2, 0}; 2315 struct r600_bytecode_alu alu; 2316 uint32_t use_temp = 0; 2317 int i, r; 2318 2319 if (inst->Dst[0].Register.WriteMask != 0xf) 2320 use_temp = 1; 2321 2322 for (i = 0; i < 4; i++) { 2323 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2324 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2325 if (i < 3) { 2326 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 2327 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 2328 } else { 2329 alu.src[0].sel = V_SQ_ALU_SRC_0; 2330 alu.src[0].chan = i; 2331 alu.src[1].sel = V_SQ_ALU_SRC_0; 2332 alu.src[1].chan = i; 2333 } 2334 2335 alu.dst.sel = ctx->temp_reg; 2336 alu.dst.chan = i; 2337 alu.dst.write = 1; 2338 2339 if (i == 3) 2340 alu.last = 1; 2341 r = r600_bytecode_add_alu(ctx->bc, &alu); 2342 if (r) 2343 return r; 2344 } 2345 2346 for (i = 0; i < 4; i++) { 2347 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2348 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2349 2350 if (i < 3) { 2351 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 2352 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 2353 } else { 2354 alu.src[0].sel = V_SQ_ALU_SRC_0; 2355 alu.src[0].chan = i; 2356 alu.src[1].sel = V_SQ_ALU_SRC_0; 2357 alu.src[1].chan = i; 2358 } 2359 2360 alu.src[2].sel = ctx->temp_reg; 2361 alu.src[2].neg = 1; 2362 alu.src[2].chan = i; 2363 2364 if (use_temp) 2365 alu.dst.sel = ctx->temp_reg; 2366 else 2367 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2368 alu.dst.chan = i; 2369 alu.dst.write = 1; 2370 alu.is_op3 = 1; 2371 if (i == 3) 2372 alu.last = 1; 2373 r = r600_bytecode_add_alu(ctx->bc, &alu); 2374 if (r) 2375 return r; 2376 } 2377 if (use_temp) 2378 return tgsi_helper_copy(ctx, inst); 2379 return 0; 2380} 2381 2382static int tgsi_exp(struct r600_shader_ctx *ctx) 2383{ 2384 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2385 struct r600_bytecode_alu alu; 2386 int r; 2387 int i; 2388 2389 /* result.x = 2^floor(src); */ 2390 if (inst->Dst[0].Register.WriteMask & 1) { 2391 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2392 2393 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2394 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2395 2396 alu.dst.sel = ctx->temp_reg; 2397 alu.dst.chan = 0; 2398 alu.dst.write = 1; 2399 alu.last = 1; 2400 r = r600_bytecode_add_alu(ctx->bc, &alu); 2401 if (r) 2402 return r; 2403 2404 if (ctx->bc->chip_class == CAYMAN) { 2405 for (i = 0; i < 3; i++) { 2406 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2407 alu.src[0].sel = ctx->temp_reg; 2408 alu.src[0].chan = 0; 2409 2410 alu.dst.sel = ctx->temp_reg; 2411 alu.dst.chan = i; 2412 if (i == 0) 2413 alu.dst.write = 1; 2414 if (i == 2) 2415 alu.last = 1; 2416 r = r600_bytecode_add_alu(ctx->bc, &alu); 2417 if (r) 2418 return r; 2419 } 2420 } else { 2421 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2422 alu.src[0].sel = ctx->temp_reg; 2423 alu.src[0].chan = 0; 2424 2425 alu.dst.sel = ctx->temp_reg; 2426 alu.dst.chan = 0; 2427 alu.dst.write = 1; 2428 alu.last = 1; 2429 r = r600_bytecode_add_alu(ctx->bc, &alu); 2430 if (r) 2431 return r; 2432 } 2433 } 2434 2435 /* result.y = tmp - floor(tmp); */ 2436 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2437 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2438 2439 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2440 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2441 2442 alu.dst.sel = ctx->temp_reg; 2443#if 0 2444 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2445 if (r) 2446 return r; 2447#endif 2448 alu.dst.write = 1; 2449 alu.dst.chan = 1; 2450 2451 alu.last = 1; 2452 2453 r = r600_bytecode_add_alu(ctx->bc, &alu); 2454 if (r) 2455 return r; 2456 } 2457 2458 /* result.z = RoughApprox2ToX(tmp);*/ 2459 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2460 if (ctx->bc->chip_class == CAYMAN) { 2461 for (i = 0; i < 3; i++) { 2462 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2463 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2464 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2465 2466 alu.dst.sel = ctx->temp_reg; 2467 alu.dst.chan = i; 2468 if (i == 2) { 2469 alu.dst.write = 1; 2470 alu.last = 1; 2471 } 2472 2473 r = r600_bytecode_add_alu(ctx->bc, &alu); 2474 if (r) 2475 return r; 2476 } 2477 } else { 2478 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2479 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2480 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2481 2482 alu.dst.sel = ctx->temp_reg; 2483 alu.dst.write = 1; 2484 alu.dst.chan = 2; 2485 2486 alu.last = 1; 2487 2488 r = r600_bytecode_add_alu(ctx->bc, &alu); 2489 if (r) 2490 return r; 2491 } 2492 } 2493 2494 /* result.w = 1.0;*/ 2495 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2496 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2497 2498 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2499 alu.src[0].sel = V_SQ_ALU_SRC_1; 2500 alu.src[0].chan = 0; 2501 2502 alu.dst.sel = ctx->temp_reg; 2503 alu.dst.chan = 3; 2504 alu.dst.write = 1; 2505 alu.last = 1; 2506 r = r600_bytecode_add_alu(ctx->bc, &alu); 2507 if (r) 2508 return r; 2509 } 2510 return tgsi_helper_copy(ctx, inst); 2511} 2512 2513static int tgsi_log(struct r600_shader_ctx *ctx) 2514{ 2515 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2516 struct r600_bytecode_alu alu; 2517 int r; 2518 int i; 2519 2520 /* result.x = floor(log2(|src|)); */ 2521 if (inst->Dst[0].Register.WriteMask & 1) { 2522 if (ctx->bc->chip_class == CAYMAN) { 2523 for (i = 0; i < 3; i++) { 2524 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2525 2526 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2527 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2528 r600_bytecode_src_set_abs(&alu.src[0]); 2529 2530 alu.dst.sel = ctx->temp_reg; 2531 alu.dst.chan = i; 2532 if (i == 0) 2533 alu.dst.write = 1; 2534 if (i == 2) 2535 alu.last = 1; 2536 r = r600_bytecode_add_alu(ctx->bc, &alu); 2537 if (r) 2538 return r; 2539 } 2540 2541 } else { 2542 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2543 2544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2545 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2546 r600_bytecode_src_set_abs(&alu.src[0]); 2547 2548 alu.dst.sel = ctx->temp_reg; 2549 alu.dst.chan = 0; 2550 alu.dst.write = 1; 2551 alu.last = 1; 2552 r = r600_bytecode_add_alu(ctx->bc, &alu); 2553 if (r) 2554 return r; 2555 } 2556 2557 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2558 alu.src[0].sel = ctx->temp_reg; 2559 alu.src[0].chan = 0; 2560 2561 alu.dst.sel = ctx->temp_reg; 2562 alu.dst.chan = 0; 2563 alu.dst.write = 1; 2564 alu.last = 1; 2565 2566 r = r600_bytecode_add_alu(ctx->bc, &alu); 2567 if (r) 2568 return r; 2569 } 2570 2571 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 2572 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2573 2574 if (ctx->bc->chip_class == CAYMAN) { 2575 for (i = 0; i < 3; i++) { 2576 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2577 2578 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2579 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2580 r600_bytecode_src_set_abs(&alu.src[0]); 2581 2582 alu.dst.sel = ctx->temp_reg; 2583 alu.dst.chan = i; 2584 if (i == 1) 2585 alu.dst.write = 1; 2586 if (i == 2) 2587 alu.last = 1; 2588 2589 r = r600_bytecode_add_alu(ctx->bc, &alu); 2590 if (r) 2591 return r; 2592 } 2593 } else { 2594 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2595 2596 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2597 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2598 r600_bytecode_src_set_abs(&alu.src[0]); 2599 2600 alu.dst.sel = ctx->temp_reg; 2601 alu.dst.chan = 1; 2602 alu.dst.write = 1; 2603 alu.last = 1; 2604 2605 r = r600_bytecode_add_alu(ctx->bc, &alu); 2606 if (r) 2607 return r; 2608 } 2609 2610 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2611 2612 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2613 alu.src[0].sel = ctx->temp_reg; 2614 alu.src[0].chan = 1; 2615 2616 alu.dst.sel = ctx->temp_reg; 2617 alu.dst.chan = 1; 2618 alu.dst.write = 1; 2619 alu.last = 1; 2620 2621 r = r600_bytecode_add_alu(ctx->bc, &alu); 2622 if (r) 2623 return r; 2624 2625 if (ctx->bc->chip_class == CAYMAN) { 2626 for (i = 0; i < 3; i++) { 2627 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2628 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2629 alu.src[0].sel = ctx->temp_reg; 2630 alu.src[0].chan = 1; 2631 2632 alu.dst.sel = ctx->temp_reg; 2633 alu.dst.chan = i; 2634 if (i == 1) 2635 alu.dst.write = 1; 2636 if (i == 2) 2637 alu.last = 1; 2638 2639 r = r600_bytecode_add_alu(ctx->bc, &alu); 2640 if (r) 2641 return r; 2642 } 2643 } else { 2644 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2645 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2646 alu.src[0].sel = ctx->temp_reg; 2647 alu.src[0].chan = 1; 2648 2649 alu.dst.sel = ctx->temp_reg; 2650 alu.dst.chan = 1; 2651 alu.dst.write = 1; 2652 alu.last = 1; 2653 2654 r = r600_bytecode_add_alu(ctx->bc, &alu); 2655 if (r) 2656 return r; 2657 } 2658 2659 if (ctx->bc->chip_class == CAYMAN) { 2660 for (i = 0; i < 3; i++) { 2661 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2662 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2663 alu.src[0].sel = ctx->temp_reg; 2664 alu.src[0].chan = 1; 2665 2666 alu.dst.sel = ctx->temp_reg; 2667 alu.dst.chan = i; 2668 if (i == 1) 2669 alu.dst.write = 1; 2670 if (i == 2) 2671 alu.last = 1; 2672 2673 r = r600_bytecode_add_alu(ctx->bc, &alu); 2674 if (r) 2675 return r; 2676 } 2677 } else { 2678 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2679 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2680 alu.src[0].sel = ctx->temp_reg; 2681 alu.src[0].chan = 1; 2682 2683 alu.dst.sel = ctx->temp_reg; 2684 alu.dst.chan = 1; 2685 alu.dst.write = 1; 2686 alu.last = 1; 2687 2688 r = r600_bytecode_add_alu(ctx->bc, &alu); 2689 if (r) 2690 return r; 2691 } 2692 2693 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2694 2695 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2696 2697 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2698 r600_bytecode_src_set_abs(&alu.src[0]); 2699 2700 alu.src[1].sel = ctx->temp_reg; 2701 alu.src[1].chan = 1; 2702 2703 alu.dst.sel = ctx->temp_reg; 2704 alu.dst.chan = 1; 2705 alu.dst.write = 1; 2706 alu.last = 1; 2707 2708 r = r600_bytecode_add_alu(ctx->bc, &alu); 2709 if (r) 2710 return r; 2711 } 2712 2713 /* result.z = log2(|src|);*/ 2714 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2715 if (ctx->bc->chip_class == CAYMAN) { 2716 for (i = 0; i < 3; i++) { 2717 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2718 2719 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2720 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2721 r600_bytecode_src_set_abs(&alu.src[0]); 2722 2723 alu.dst.sel = ctx->temp_reg; 2724 if (i == 2) 2725 alu.dst.write = 1; 2726 alu.dst.chan = i; 2727 if (i == 2) 2728 alu.last = 1; 2729 2730 r = r600_bytecode_add_alu(ctx->bc, &alu); 2731 if (r) 2732 return r; 2733 } 2734 } else { 2735 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2736 2737 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2738 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2739 r600_bytecode_src_set_abs(&alu.src[0]); 2740 2741 alu.dst.sel = ctx->temp_reg; 2742 alu.dst.write = 1; 2743 alu.dst.chan = 2; 2744 alu.last = 1; 2745 2746 r = r600_bytecode_add_alu(ctx->bc, &alu); 2747 if (r) 2748 return r; 2749 } 2750 } 2751 2752 /* result.w = 1.0; */ 2753 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2754 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2755 2756 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2757 alu.src[0].sel = V_SQ_ALU_SRC_1; 2758 alu.src[0].chan = 0; 2759 2760 alu.dst.sel = ctx->temp_reg; 2761 alu.dst.chan = 3; 2762 alu.dst.write = 1; 2763 alu.last = 1; 2764 2765 r = r600_bytecode_add_alu(ctx->bc, &alu); 2766 if (r) 2767 return r; 2768 } 2769 2770 return tgsi_helper_copy(ctx, inst); 2771} 2772 2773static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2774{ 2775 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2776 struct r600_bytecode_alu alu; 2777 int r; 2778 2779 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2780 2781 switch (inst->Instruction.Opcode) { 2782 case TGSI_OPCODE_ARL: 2783 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2784 break; 2785 case TGSI_OPCODE_ARR: 2786 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2787 break; 2788 default: 2789 assert(0); 2790 return -1; 2791 } 2792 2793 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2794 alu.last = 1; 2795 alu.dst.sel = ctx->ar_reg; 2796 alu.dst.write = 1; 2797 r = r600_bytecode_add_alu(ctx->bc, &alu); 2798 if (r) 2799 return r; 2800 2801 /* TODO: Note that the MOVA can be avoided if we never use AR for 2802 * indexing non-CB registers in the current ALU clause. Similarly, we 2803 * need to load AR from ar_reg again if we started a new clause 2804 * between ARL and AR usage. The easy way to do that is to remove 2805 * the MOVA here, and load it for the first AR access after ar_reg 2806 * has been modified in each clause. */ 2807 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2808 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2809 alu.src[0].sel = ctx->ar_reg; 2810 alu.src[0].chan = 0; 2811 alu.last = 1; 2812 r = r600_bytecode_add_alu(ctx->bc, &alu); 2813 if (r) 2814 return r; 2815 return 0; 2816} 2817static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2818{ 2819 /* TODO from r600c, ar values don't persist between clauses */ 2820 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2821 struct r600_bytecode_alu alu; 2822 int r; 2823 2824 switch (inst->Instruction.Opcode) { 2825 case TGSI_OPCODE_ARL: 2826 memset(&alu, 0, sizeof(alu)); 2827 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR; 2828 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2829 alu.dst.sel = ctx->ar_reg; 2830 alu.dst.write = 1; 2831 alu.last = 1; 2832 2833 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2834 return r; 2835 2836 memset(&alu, 0, sizeof(alu)); 2837 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2838 alu.src[0].sel = ctx->ar_reg; 2839 alu.dst.sel = ctx->ar_reg; 2840 alu.dst.write = 1; 2841 alu.last = 1; 2842 2843 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2844 return r; 2845 break; 2846 case TGSI_OPCODE_ARR: 2847 memset(&alu, 0, sizeof(alu)); 2848 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2849 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2850 alu.dst.sel = ctx->ar_reg; 2851 alu.dst.write = 1; 2852 alu.last = 1; 2853 2854 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 2855 return r; 2856 break; 2857 default: 2858 assert(0); 2859 return -1; 2860 } 2861 2862 memset(&alu, 0, sizeof(alu)); 2863 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2864 alu.src[0].sel = ctx->ar_reg; 2865 alu.last = 1; 2866 2867 r = r600_bytecode_add_alu(ctx->bc, &alu); 2868 if (r) 2869 return r; 2870 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2871 return 0; 2872} 2873 2874static int tgsi_opdst(struct r600_shader_ctx *ctx) 2875{ 2876 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2877 struct r600_bytecode_alu alu; 2878 int i, r = 0; 2879 2880 for (i = 0; i < 4; i++) { 2881 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2882 2883 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2884 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2885 2886 if (i == 0 || i == 3) { 2887 alu.src[0].sel = V_SQ_ALU_SRC_1; 2888 } else { 2889 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 2890 } 2891 2892 if (i == 0 || i == 2) { 2893 alu.src[1].sel = V_SQ_ALU_SRC_1; 2894 } else { 2895 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 2896 } 2897 if (i == 3) 2898 alu.last = 1; 2899 r = r600_bytecode_add_alu(ctx->bc, &alu); 2900 if (r) 2901 return r; 2902 } 2903 return 0; 2904} 2905 2906static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2907{ 2908 struct r600_bytecode_alu alu; 2909 int r; 2910 2911 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2912 alu.inst = opcode; 2913 alu.predicate = 1; 2914 2915 alu.dst.sel = ctx->temp_reg; 2916 alu.dst.write = 1; 2917 alu.dst.chan = 0; 2918 2919 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 2920 alu.src[1].sel = V_SQ_ALU_SRC_0; 2921 alu.src[1].chan = 0; 2922 2923 alu.last = 1; 2924 2925 r = r600_bytecode_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2926 if (r) 2927 return r; 2928 return 0; 2929} 2930 2931static int pops(struct r600_shader_ctx *ctx, int pops) 2932{ 2933 unsigned force_pop = ctx->bc->force_add_cf; 2934 2935 if (!force_pop) { 2936 int alu_pop = 3; 2937 if (ctx->bc->cf_last) { 2938 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) 2939 alu_pop = 0; 2940 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) 2941 alu_pop = 1; 2942 } 2943 alu_pop += pops; 2944 if (alu_pop == 1) { 2945 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; 2946 ctx->bc->force_add_cf = 1; 2947 } else if (alu_pop == 2) { 2948 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; 2949 ctx->bc->force_add_cf = 1; 2950 } else { 2951 force_pop = 1; 2952 } 2953 } 2954 2955 if (force_pop) { 2956 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2957 ctx->bc->cf_last->pop_count = pops; 2958 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 2959 } 2960 2961 return 0; 2962} 2963 2964static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2965{ 2966 switch(reason) { 2967 case FC_PUSH_VPM: 2968 ctx->bc->callstack[ctx->bc->call_sp].current--; 2969 break; 2970 case FC_PUSH_WQM: 2971 case FC_LOOP: 2972 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2973 break; 2974 case FC_REP: 2975 /* TOODO : for 16 vp asic should -= 2; */ 2976 ctx->bc->callstack[ctx->bc->call_sp].current --; 2977 break; 2978 } 2979} 2980 2981static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2982{ 2983 if (check_max_only) { 2984 int diff; 2985 switch (reason) { 2986 case FC_PUSH_VPM: 2987 diff = 1; 2988 break; 2989 case FC_PUSH_WQM: 2990 diff = 4; 2991 break; 2992 default: 2993 assert(0); 2994 diff = 0; 2995 } 2996 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2997 ctx->bc->callstack[ctx->bc->call_sp].max) { 2998 ctx->bc->callstack[ctx->bc->call_sp].max = 2999 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 3000 } 3001 return; 3002 } 3003 switch (reason) { 3004 case FC_PUSH_VPM: 3005 ctx->bc->callstack[ctx->bc->call_sp].current++; 3006 break; 3007 case FC_PUSH_WQM: 3008 case FC_LOOP: 3009 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 3010 break; 3011 case FC_REP: 3012 ctx->bc->callstack[ctx->bc->call_sp].current++; 3013 break; 3014 } 3015 3016 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 3017 ctx->bc->callstack[ctx->bc->call_sp].max) { 3018 ctx->bc->callstack[ctx->bc->call_sp].max = 3019 ctx->bc->callstack[ctx->bc->call_sp].current; 3020 } 3021} 3022 3023static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 3024{ 3025 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 3026 3027 sp->mid = (struct r600_bytecode_cf **)realloc((void *)sp->mid, 3028 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 3029 sp->mid[sp->num_mid] = ctx->bc->cf_last; 3030 sp->num_mid++; 3031} 3032 3033static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 3034{ 3035 ctx->bc->fc_sp++; 3036 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 3037 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 3038} 3039 3040static void fc_poplevel(struct r600_shader_ctx *ctx) 3041{ 3042 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 3043 if (sp->mid) { 3044 free(sp->mid); 3045 sp->mid = NULL; 3046 } 3047 sp->num_mid = 0; 3048 sp->start = NULL; 3049 sp->type = 0; 3050 ctx->bc->fc_sp--; 3051} 3052 3053#if 0 3054static int emit_return(struct r600_shader_ctx *ctx) 3055{ 3056 r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 3057 return 0; 3058} 3059 3060static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 3061{ 3062 3063 r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 3064 ctx->bc->cf_last->pop_count = pops; 3065 /* TODO work out offset */ 3066 return 0; 3067} 3068 3069static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 3070{ 3071 return 0; 3072} 3073 3074static void emit_testflag(struct r600_shader_ctx *ctx) 3075{ 3076 3077} 3078 3079static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 3080{ 3081 emit_testflag(ctx); 3082 emit_jump_to_offset(ctx, 1, 4); 3083 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 3084 pops(ctx, ifidx + 1); 3085 emit_return(ctx); 3086} 3087 3088static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 3089{ 3090 emit_testflag(ctx); 3091 3092 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3093 ctx->bc->cf_last->pop_count = 1; 3094 3095 fc_set_mid(ctx, fc_sp); 3096 3097 pops(ctx, 1); 3098} 3099#endif 3100 3101static int tgsi_if(struct r600_shader_ctx *ctx) 3102{ 3103 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 3104 3105 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 3106 3107 fc_pushlevel(ctx, FC_IF); 3108 3109 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 3110 return 0; 3111} 3112 3113static int tgsi_else(struct r600_shader_ctx *ctx) 3114{ 3115 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 3116 ctx->bc->cf_last->pop_count = 1; 3117 3118 fc_set_mid(ctx, ctx->bc->fc_sp); 3119 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 3120 return 0; 3121} 3122 3123static int tgsi_endif(struct r600_shader_ctx *ctx) 3124{ 3125 pops(ctx, 1); 3126 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 3127 R600_ERR("if/endif unbalanced in shader\n"); 3128 return -1; 3129 } 3130 3131 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 3132 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3133 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 3134 } else { 3135 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 3136 } 3137 fc_poplevel(ctx); 3138 3139 callstack_decrease_current(ctx, FC_PUSH_VPM); 3140 return 0; 3141} 3142 3143static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 3144{ 3145 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 3146 3147 fc_pushlevel(ctx, FC_LOOP); 3148 3149 /* check stack depth */ 3150 callstack_check_depth(ctx, FC_LOOP, 0); 3151 return 0; 3152} 3153 3154static int tgsi_endloop(struct r600_shader_ctx *ctx) 3155{ 3156 int i; 3157 3158 r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 3159 3160 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 3161 R600_ERR("loop/endloop in shader code are not paired.\n"); 3162 return -EINVAL; 3163 } 3164 3165 /* fixup loop pointers - from r600isa 3166 LOOP END points to CF after LOOP START, 3167 LOOP START point to CF after LOOP END 3168 BRK/CONT point to LOOP END CF 3169 */ 3170 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 3171 3172 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3173 3174 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 3175 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 3176 } 3177 /* TODO add LOOPRET support */ 3178 fc_poplevel(ctx); 3179 callstack_decrease_current(ctx, FC_LOOP); 3180 return 0; 3181} 3182 3183static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 3184{ 3185 unsigned int fscp; 3186 3187 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 3188 { 3189 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 3190 break; 3191 } 3192 3193 if (fscp == 0) { 3194 R600_ERR("Break not inside loop/endloop pair\n"); 3195 return -EINVAL; 3196 } 3197 3198 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3199 ctx->bc->cf_last->pop_count = 1; 3200 3201 fc_set_mid(ctx, fscp); 3202 3203 pops(ctx, 1); 3204 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 3205 return 0; 3206} 3207 3208static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 3209 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3210 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3211 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3212 3213 /* FIXME: 3214 * For state trackers other than OpenGL, we'll want to use 3215 * _RECIP_IEEE instead. 3216 */ 3217 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 3218 3219 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 3220 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3221 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3222 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3223 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3224 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3225 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3226 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3227 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3228 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3229 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3230 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3231 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3232 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3233 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3234 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3235 /* gap */ 3236 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3237 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3238 /* gap */ 3239 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3240 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3241 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3242 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3243 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3244 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 3245 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3246 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3247 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3248 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3249 /* gap */ 3250 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3251 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3252 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3253 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3254 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3255 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3256 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3257 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3258 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3259 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3260 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3261 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3262 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3263 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3264 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3265 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3266 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3267 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3268 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3269 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3270 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3271 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3272 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3273 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3274 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3275 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3276 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3277 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3278 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3279 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3280 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3281 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3282 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3283 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3284 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3285 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3286 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3287 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3288 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3289 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3290 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3291 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3292 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3293 /* gap */ 3294 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3295 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3296 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3297 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3298 /* gap */ 3299 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3300 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3301 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3302 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3303 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3304 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3305 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3306 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3307 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3308 /* gap */ 3309 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3310 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3311 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3312 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3313 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3314 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3315 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3316 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3317 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3318 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3319 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3320 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3321 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3322 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3323 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3324 /* gap */ 3325 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3326 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3327 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3328 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3329 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3330 /* gap */ 3331 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3332 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3333 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3334 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3335 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3336 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3337 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3338 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3339 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3340 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3341 /* gap */ 3342 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3343 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3344 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3345 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3346 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3347 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3348 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3349 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3350 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3351 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3352 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3353 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3354 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3355 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3356 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3357 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3358 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3359 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3360 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3361 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3362 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3363 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3364 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3365 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3366 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3367 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3368 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3369 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 3370 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 3371 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 3372 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 3373 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 3374 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 3375 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 3376 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 3377 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 3378 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 3379 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 3380 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 3381 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3382}; 3383 3384static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 3385 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3386 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3387 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3388 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3389 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_rsq}, 3390 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3391 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3392 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3393 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3394 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3395 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3396 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3397 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3398 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3399 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3400 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3401 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3402 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3403 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3404 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3405 /* gap */ 3406 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3407 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3408 /* gap */ 3409 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3410 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3411 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3412 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3413 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3414 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 3415 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3416 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3417 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3418 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3419 /* gap */ 3420 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3421 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3422 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3423 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3424 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3425 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3426 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3427 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3428 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3429 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3430 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3431 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3432 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3433 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3434 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3435 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3436 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3437 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3438 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3439 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3440 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3441 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3442 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3443 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3444 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3445 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3446 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3447 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3448 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3449 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3450 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3451 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3452 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3453 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3454 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3455 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3456 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3457 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3458 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3459 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3460 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3461 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3462 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3463 /* gap */ 3464 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3465 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3466 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3467 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3468 /* gap */ 3469 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3470 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3471 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3472 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3473 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3474 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3475 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3476 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3477 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3478 /* gap */ 3479 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3480 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3481 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3482 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3483 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3484 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3485 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3486 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3487 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3488 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3489 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3490 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3491 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3492 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3493 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3494 /* gap */ 3495 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3496 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3497 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3498 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3499 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3500 /* gap */ 3501 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3502 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3503 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3504 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3505 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3506 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3507 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3508 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3509 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3510 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3511 /* gap */ 3512 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3513 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3514 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3515 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3516 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3517 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3518 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3519 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3520 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3521 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3522 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3523 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3524 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3525 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3526 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3527 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3528 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3529 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3530 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3531 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3532 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3533 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3534 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3535 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3536 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3537 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3538 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3539 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 3540 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 3541 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 3542 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 3543 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 3544 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 3545 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 3546 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 3547 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 3548 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 3549 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 3550 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 3551 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3552}; 3553 3554static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 3555 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3556 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3557 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3558 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, 3559 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, 3560 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3561 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3562 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3563 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3564 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3565 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3566 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3567 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3568 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3569 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3570 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3571 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3572 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3573 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3574 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3575 /* gap */ 3576 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3577 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3578 /* gap */ 3579 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3580 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3581 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3582 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3583 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3584 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RNDNE, tgsi_op2}, 3585 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, 3586 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, 3587 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, 3588 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3589 /* gap */ 3590 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3591 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3592 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3593 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3594 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, 3595 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3596 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3597 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3598 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3599 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3600 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3601 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3602 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3603 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3604 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3605 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3606 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, 3607 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3608 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3609 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3610 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3611 {TGSI_OPCODE_TXD, 0, SQ_TEX_INST_SAMPLE_G, tgsi_tex}, 3612 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3613 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3614 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3615 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3616 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3617 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3618 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3619 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3620 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3621 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3622 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3623 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3624 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3625 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3626 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3627 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3628 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3629 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3630 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3631 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3632 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3633 /* gap */ 3634 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3635 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3636 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3637 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3638 /* gap */ 3639 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3640 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3641 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3642 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3643 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3644 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3645 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3646 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, 3647 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3648 /* gap */ 3649 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3650 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3651 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3652 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3653 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3654 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3655 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3656 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3657 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3658 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3659 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3660 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3661 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3662 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3663 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3664 /* gap */ 3665 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3666 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3667 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3668 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3669 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3670 /* gap */ 3671 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3672 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3673 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3674 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3675 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3676 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3677 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3678 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3679 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3680 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3681 /* gap */ 3682 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3683 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3684 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3685 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3686 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3687 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3688 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3689 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3690 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3691 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3692 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3693 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3694 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3695 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3696 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3697 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3698 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3699 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3700 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3701 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3702 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3703 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3704 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3705 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3706 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3707 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3708 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3709 {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, 3710 {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, 3711 {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, 3712 {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, 3713 {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, 3714 {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, 3715 {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, 3716 {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, 3717 {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, 3718 {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, 3719 {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, 3720 {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, 3721 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3722}; 3723