r600_shader.c revision 7e5173d065f0da450cf553e3e3084a0f774919a3
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_screen.h" 29#include "r600_context.h" 30#include "r600_shader.h" 31#include "r600_asm.h" 32#include "r600_sq.h" 33#include "r600_opcodes.h" 34#include "r600d.h" 35#include <stdio.h> 36#include <errno.h> 37 38 39struct r600_shader_tgsi_instruction; 40 41struct r600_shader_ctx { 42 struct tgsi_shader_info info; 43 struct tgsi_parse_context parse; 44 const struct tgsi_token *tokens; 45 unsigned type; 46 unsigned file_offset[TGSI_FILE_COUNT]; 47 unsigned temp_reg; 48 struct r600_shader_tgsi_instruction *inst_info; 49 struct r600_bc *bc; 50 struct r600_shader *shader; 51 u32 value[4]; 52 u32 *literals; 53 u32 nliterals; 54 u32 max_driver_temp_used; 55}; 56 57struct r600_shader_tgsi_instruction { 58 unsigned tgsi_opcode; 59 unsigned is_op3; 60 unsigned r600_opcode; 61 int (*process)(struct r600_shader_ctx *ctx); 62}; 63 64static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 65static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 66 67static int r600_shader_update(struct pipe_context *ctx, struct r600_shader *shader) 68{ 69 struct r600_context *rctx = r600_context(ctx); 70 const struct util_format_description *desc; 71 enum pipe_format resource_format[160]; 72 unsigned i, nresources = 0; 73 struct r600_bc *bc = &shader->bc; 74 struct r600_bc_cf *cf; 75 struct r600_bc_vtx *vtx; 76 77 if (shader->processor_type != TGSI_PROCESSOR_VERTEX) 78 return 0; 79 for (i = 0; i < rctx->vertex_elements->count; i++) { 80 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; 81 } 82 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 83 switch (cf->inst) { 84 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 85 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 86 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 87 desc = util_format_description(resource_format[vtx->buffer_id]); 88 if (desc == NULL) { 89 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); 90 return -EINVAL; 91 } 92 vtx->dst_sel_x = desc->swizzle[0]; 93 vtx->dst_sel_y = desc->swizzle[1]; 94 vtx->dst_sel_z = desc->swizzle[2]; 95 vtx->dst_sel_w = desc->swizzle[3]; 96 } 97 break; 98 default: 99 break; 100 } 101 } 102 return r600_bc_build(&shader->bc); 103} 104 105int r600_pipe_shader_create(struct pipe_context *ctx, 106 struct r600_context_state *rpshader, 107 const struct tgsi_token *tokens) 108{ 109 struct r600_screen *rscreen = r600_screen(ctx->screen); 110 int r; 111 112//fprintf(stderr, "--------------------------------------------------------------\n"); 113//tgsi_dump(tokens, 0); 114 if (rpshader == NULL) 115 return -ENOMEM; 116 rpshader->shader.family = radeon_get_family(rscreen->rw); 117 rpshader->shader.use_mem_constant = rscreen->use_mem_constant; 118 r = r600_shader_from_tgsi(tokens, &rpshader->shader); 119 if (r) { 120 R600_ERR("translation from TGSI failed !\n"); 121 return r; 122 } 123 r = r600_bc_build(&rpshader->shader.bc); 124 if (r) { 125 R600_ERR("building bytecode failed !\n"); 126 return r; 127 } 128//fprintf(stderr, "______________________________________________________________\n"); 129 return 0; 130} 131 132static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_state *rpshader) 133{ 134 struct r600_context *rctx = r600_context(ctx); 135 struct radeon_state *state; 136 137 state = &rpshader->rstate[0]; 138 radeon_state_fini(&rpshader->rstate[0]); 139 140 return rctx->vtbl->vs_shader(rctx, rpshader, state); 141} 142 143static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader) 144{ 145 struct r600_context *rctx = r600_context(ctx); 146 struct radeon_state *state; 147 148 state = &rpshader->rstate[0]; 149 radeon_state_fini(state); 150 151 return rctx->vtbl->ps_shader(rctx, rpshader, state); 152} 153 154static int r600_pipe_shader(struct pipe_context *ctx, struct r600_context_state *rpshader) 155{ 156 struct r600_screen *rscreen = r600_screen(ctx->screen); 157 struct r600_context *rctx = r600_context(ctx); 158 struct r600_shader *rshader = &rpshader->shader; 159 int r; 160 void *data; 161 162 /* copy new shader */ 163 radeon_ws_bo_reference(rscreen->rw, &rpshader->bo, NULL); 164 rpshader->bo = NULL; 165 rpshader->bo = radeon_ws_bo(rscreen->rw, rshader->bc.ndw * 4, 166 4096, 0); 167 if (rpshader->bo == NULL) { 168 return -ENOMEM; 169 } 170 data = radeon_ws_bo_map(rscreen->rw, rpshader->bo, 0, rctx); 171 memcpy(data, rshader->bc.bytecode, rshader->bc.ndw * 4); 172 radeon_ws_bo_unmap(rscreen->rw, rpshader->bo); 173 /* build state */ 174 rshader->flat_shade = rctx->flat_shade; 175 switch (rshader->processor_type) { 176 case TGSI_PROCESSOR_VERTEX: 177 r = r600_pipe_shader_vs(ctx, rpshader); 178 break; 179 case TGSI_PROCESSOR_FRAGMENT: 180 r = r600_pipe_shader_ps(ctx, rpshader); 181 break; 182 default: 183 r = -EINVAL; 184 break; 185 } 186 return r; 187} 188 189int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_context_state *rpshader) 190{ 191 struct r600_context *rctx = r600_context(ctx); 192 int r; 193 194 if (rpshader == NULL) 195 return -EINVAL; 196 /* there should be enough input */ 197 if (rctx->vertex_elements->count < rpshader->shader.bc.nresource) { 198 R600_ERR("%d resources provided, expecting %d\n", 199 rctx->vertex_elements->count, rpshader->shader.bc.nresource); 200 return -EINVAL; 201 } 202 r = r600_shader_update(ctx, &rpshader->shader); 203 if (r) 204 return r; 205 return r600_pipe_shader(ctx, rpshader); 206} 207 208static int tgsi_is_supported(struct r600_shader_ctx *ctx) 209{ 210 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 211 int j; 212 213 if (i->Instruction.NumDstRegs > 1) { 214 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 215 return -EINVAL; 216 } 217 if (i->Instruction.Predicate) { 218 R600_ERR("predicate unsupported\n"); 219 return -EINVAL; 220 } 221#if 0 222 if (i->Instruction.Label) { 223 R600_ERR("label unsupported\n"); 224 return -EINVAL; 225 } 226#endif 227 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 228 if (i->Src[j].Register.Dimension || 229 i->Src[j].Register.Absolute) { 230 R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j, 231 i->Src[j].Register.Dimension, 232 i->Src[j].Register.Absolute); 233 return -EINVAL; 234 } 235 } 236 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 237 if (i->Dst[j].Register.Dimension) { 238 R600_ERR("unsupported dst (dimension)\n"); 239 return -EINVAL; 240 } 241 } 242 return 0; 243} 244 245static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) 246{ 247 int i, r; 248 struct r600_bc_alu alu; 249 250 for (i = 0; i < 8; i++) { 251 memset(&alu, 0, sizeof(struct r600_bc_alu)); 252 253 if (i < 4) 254 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 255 else 256 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 257 258 if ((i > 1) && (i < 6)) { 259 alu.dst.sel = ctx->shader->input[gpr].gpr; 260 alu.dst.write = 1; 261 } 262 263 alu.dst.chan = i % 4; 264 alu.src[0].chan = (1 - (i % 2)); 265 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr; 266 267 alu.bank_swizzle_force = SQ_ALU_VEC_210; 268 if ((i % 4) == 3) 269 alu.last = 1; 270 r = r600_bc_add_alu(ctx->bc, &alu); 271 if (r) 272 return r; 273 } 274 return 0; 275} 276 277 278static int tgsi_declaration(struct r600_shader_ctx *ctx) 279{ 280 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 281 struct r600_bc_vtx vtx; 282 unsigned i; 283 int r; 284 285 switch (d->Declaration.File) { 286 case TGSI_FILE_INPUT: 287 i = ctx->shader->ninput++; 288 ctx->shader->input[i].name = d->Semantic.Name; 289 ctx->shader->input[i].sid = d->Semantic.Index; 290 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 291 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 292 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 293 /* turn input into fetch */ 294 memset(&vtx, 0, sizeof(struct r600_bc_vtx)); 295 vtx.inst = 0; 296 vtx.fetch_type = 0; 297 vtx.buffer_id = i; 298 /* register containing the index into the buffer */ 299 vtx.src_gpr = 0; 300 vtx.src_sel_x = 0; 301 vtx.mega_fetch_count = 0x1F; 302 vtx.dst_gpr = ctx->shader->input[i].gpr; 303 vtx.dst_sel_x = 0; 304 vtx.dst_sel_y = 1; 305 vtx.dst_sel_z = 2; 306 vtx.dst_sel_w = 3; 307 r = r600_bc_add_vtx(ctx->bc, &vtx); 308 if (r) 309 return r; 310 } 311 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { 312 /* turn input into interpolate on EG */ 313 evergreen_interp_alu(ctx, i); 314 } 315 break; 316 case TGSI_FILE_OUTPUT: 317 i = ctx->shader->noutput++; 318 ctx->shader->output[i].name = d->Semantic.Name; 319 ctx->shader->output[i].sid = d->Semantic.Index; 320 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 321 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 322 break; 323 case TGSI_FILE_CONSTANT: 324 case TGSI_FILE_TEMPORARY: 325 case TGSI_FILE_SAMPLER: 326 case TGSI_FILE_ADDRESS: 327 break; 328 default: 329 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 330 return -EINVAL; 331 } 332 return 0; 333} 334 335static int r600_get_temp(struct r600_shader_ctx *ctx) 336{ 337 return ctx->temp_reg + ctx->max_driver_temp_used++; 338} 339 340int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 341{ 342 struct tgsi_full_immediate *immediate; 343 struct r600_shader_ctx ctx; 344 struct r600_bc_output output[32]; 345 unsigned output_done, noutput; 346 unsigned opcode; 347 int i, r = 0, pos0; 348 349 ctx.bc = &shader->bc; 350 ctx.shader = shader; 351 r = r600_bc_init(ctx.bc, shader->family); 352 if (r) 353 return r; 354 ctx.bc->use_mem_constant = shader->use_mem_constant; 355 ctx.tokens = tokens; 356 tgsi_scan_shader(tokens, &ctx.info); 357 tgsi_parse_init(&ctx.parse, tokens); 358 ctx.type = ctx.parse.FullHeader.Processor.Processor; 359 shader->processor_type = ctx.type; 360 361 /* register allocations */ 362 /* Values [0,127] correspond to GPR[0..127]. 363 * Values [128,159] correspond to constant buffer bank 0 364 * Values [160,191] correspond to constant buffer bank 1 365 * Values [256,511] correspond to cfile constants c[0..255]. 366 * Other special values are shown in the list below. 367 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 368 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 369 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 370 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 371 * 248 SQ_ALU_SRC_0: special constant 0.0. 372 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 373 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 374 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 375 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 376 * 253 SQ_ALU_SRC_LITERAL: literal constant. 377 * 254 SQ_ALU_SRC_PV: previous vector result. 378 * 255 SQ_ALU_SRC_PS: previous scalar result. 379 */ 380 for (i = 0; i < TGSI_FILE_COUNT; i++) { 381 ctx.file_offset[i] = 0; 382 } 383 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 384 ctx.file_offset[TGSI_FILE_INPUT] = 1; 385 } 386 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 387 ctx.info.file_count[TGSI_FILE_INPUT]; 388 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 389 ctx.info.file_count[TGSI_FILE_OUTPUT]; 390 if (ctx.shader->use_mem_constant) 391 ctx.file_offset[TGSI_FILE_CONSTANT] = 128; 392 else 393 ctx.file_offset[TGSI_FILE_CONSTANT] = 256; 394 395 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 396 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 397 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 398 399 ctx.nliterals = 0; 400 ctx.literals = NULL; 401 402 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 403 tgsi_parse_token(&ctx.parse); 404 switch (ctx.parse.FullToken.Token.Type) { 405 case TGSI_TOKEN_TYPE_IMMEDIATE: 406 immediate = &ctx.parse.FullToken.FullImmediate; 407 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 408 if(ctx.literals == NULL) { 409 r = -ENOMEM; 410 goto out_err; 411 } 412 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 413 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 414 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 415 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 416 ctx.nliterals++; 417 break; 418 case TGSI_TOKEN_TYPE_DECLARATION: 419 r = tgsi_declaration(&ctx); 420 if (r) 421 goto out_err; 422 break; 423 case TGSI_TOKEN_TYPE_INSTRUCTION: 424 r = tgsi_is_supported(&ctx); 425 if (r) 426 goto out_err; 427 ctx.max_driver_temp_used = 0; 428 /* reserve first tmp for everyone */ 429 r600_get_temp(&ctx); 430 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 431 if (ctx.bc->chiprev == 2) 432 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 433 else 434 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 435 r = ctx.inst_info->process(&ctx); 436 if (r) 437 goto out_err; 438 r = r600_bc_add_literal(ctx.bc, ctx.value); 439 if (r) 440 goto out_err; 441 break; 442 default: 443 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 444 r = -EINVAL; 445 goto out_err; 446 } 447 } 448 /* export output */ 449 noutput = shader->noutput; 450 for (i = 0, pos0 = 0; i < noutput; i++) { 451 memset(&output[i], 0, sizeof(struct r600_bc_output)); 452 output[i].gpr = shader->output[i].gpr; 453 output[i].elem_size = 3; 454 output[i].swizzle_x = 0; 455 output[i].swizzle_y = 1; 456 output[i].swizzle_z = 2; 457 output[i].swizzle_w = 3; 458 output[i].barrier = 1; 459 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 460 output[i].array_base = i - pos0; 461 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 462 switch (ctx.type) { 463 case TGSI_PROCESSOR_VERTEX: 464 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 465 output[i].array_base = 60; 466 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 467 /* position doesn't count in array_base */ 468 pos0++; 469 } 470 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 471 output[i].array_base = 61; 472 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 473 /* position doesn't count in array_base */ 474 pos0++; 475 } 476 break; 477 case TGSI_PROCESSOR_FRAGMENT: 478 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 479 output[i].array_base = shader->output[i].sid; 480 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 481 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 482 output[i].array_base = 61; 483 output[i].swizzle_x = 2; 484 output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7; 485 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 486 } else { 487 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 488 r = -EINVAL; 489 goto out_err; 490 } 491 break; 492 default: 493 R600_ERR("unsupported processor type %d\n", ctx.type); 494 r = -EINVAL; 495 goto out_err; 496 } 497 } 498 /* add fake param output for vertex shader if no param is exported */ 499 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 500 for (i = 0, pos0 = 0; i < noutput; i++) { 501 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 502 pos0 = 1; 503 break; 504 } 505 } 506 if (!pos0) { 507 memset(&output[i], 0, sizeof(struct r600_bc_output)); 508 output[i].gpr = 0; 509 output[i].elem_size = 3; 510 output[i].swizzle_x = 0; 511 output[i].swizzle_y = 1; 512 output[i].swizzle_z = 2; 513 output[i].swizzle_w = 3; 514 output[i].barrier = 1; 515 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 516 output[i].array_base = 0; 517 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 518 noutput++; 519 } 520 } 521 /* add fake pixel export */ 522 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 523 memset(&output[0], 0, sizeof(struct r600_bc_output)); 524 output[0].gpr = 0; 525 output[0].elem_size = 3; 526 output[0].swizzle_x = 7; 527 output[0].swizzle_y = 7; 528 output[0].swizzle_z = 7; 529 output[0].swizzle_w = 7; 530 output[0].barrier = 1; 531 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 532 output[0].array_base = 0; 533 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 534 noutput++; 535 } 536 /* set export done on last export of each type */ 537 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 538 if (i == (noutput - 1)) { 539 output[i].end_of_program = 1; 540 } 541 if (!(output_done & (1 << output[i].type))) { 542 output_done |= (1 << output[i].type); 543 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 544 } 545 } 546 /* add output to bytecode */ 547 for (i = 0; i < noutput; i++) { 548 r = r600_bc_add_output(ctx.bc, &output[i]); 549 if (r) 550 goto out_err; 551 } 552 free(ctx.literals); 553 tgsi_parse_free(&ctx.parse); 554 return 0; 555out_err: 556 free(ctx.literals); 557 tgsi_parse_free(&ctx.parse); 558 return r; 559} 560 561static int tgsi_unsupported(struct r600_shader_ctx *ctx) 562{ 563 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 564 return -EINVAL; 565} 566 567static int tgsi_end(struct r600_shader_ctx *ctx) 568{ 569 return 0; 570} 571 572static int tgsi_src(struct r600_shader_ctx *ctx, 573 const struct tgsi_full_src_register *tgsi_src, 574 struct r600_bc_alu_src *r600_src) 575{ 576 int index; 577 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 578 r600_src->sel = tgsi_src->Register.Index; 579 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 580 r600_src->sel = 0; 581 index = tgsi_src->Register.Index; 582 ctx->value[0] = ctx->literals[index * 4 + 0]; 583 ctx->value[1] = ctx->literals[index * 4 + 1]; 584 ctx->value[2] = ctx->literals[index * 4 + 2]; 585 ctx->value[3] = ctx->literals[index * 4 + 3]; 586 } 587 if (tgsi_src->Register.Indirect) 588 r600_src->rel = V_SQ_REL_RELATIVE; 589 r600_src->neg = tgsi_src->Register.Negate; 590 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 591 return 0; 592} 593 594static int tgsi_dst(struct r600_shader_ctx *ctx, 595 const struct tgsi_full_dst_register *tgsi_dst, 596 unsigned swizzle, 597 struct r600_bc_alu_dst *r600_dst) 598{ 599 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 600 601 r600_dst->sel = tgsi_dst->Register.Index; 602 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 603 r600_dst->chan = swizzle; 604 r600_dst->write = 1; 605 if (tgsi_dst->Register.Indirect) 606 r600_dst->rel = V_SQ_REL_RELATIVE; 607 if (inst->Instruction.Saturate) { 608 r600_dst->clamp = 1; 609 } 610 return 0; 611} 612 613static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 614{ 615 switch (swizzle) { 616 case 0: 617 return tgsi_src->Register.SwizzleX; 618 case 1: 619 return tgsi_src->Register.SwizzleY; 620 case 2: 621 return tgsi_src->Register.SwizzleZ; 622 case 3: 623 return tgsi_src->Register.SwizzleW; 624 default: 625 return 0; 626 } 627} 628 629static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 630{ 631 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 632 struct r600_bc_alu alu; 633 int i, j, k, nconst, r; 634 635 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 636 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 637 nconst++; 638 } 639 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 640 if (r) { 641 return r; 642 } 643 } 644 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 645 if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) { 646 int treg = r600_get_temp(ctx); 647 for (k = 0; k < 4; k++) { 648 memset(&alu, 0, sizeof(struct r600_bc_alu)); 649 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 650 alu.src[0].sel = r600_src[j].sel; 651 alu.src[0].chan = k; 652 alu.dst.sel = treg; 653 alu.dst.chan = k; 654 alu.dst.write = 1; 655 if (k == 3) 656 alu.last = 1; 657 r = r600_bc_add_alu(ctx->bc, &alu); 658 if (r) 659 return r; 660 } 661 r600_src[j].sel = treg; 662 j--; 663 } 664 } 665 return 0; 666} 667 668/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 669static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 670{ 671 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 672 struct r600_bc_alu alu; 673 int i, j, k, nliteral, r; 674 675 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 676 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 677 nliteral++; 678 } 679 } 680 for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) { 681 if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) { 682 int treg = r600_get_temp(ctx); 683 for (k = 0; k < 4; k++) { 684 memset(&alu, 0, sizeof(struct r600_bc_alu)); 685 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 686 alu.src[0].sel = r600_src[j].sel; 687 alu.src[0].chan = k; 688 alu.dst.sel = treg; 689 alu.dst.chan = k; 690 alu.dst.write = 1; 691 if (k == 3) 692 alu.last = 1; 693 r = r600_bc_add_alu(ctx->bc, &alu); 694 if (r) 695 return r; 696 } 697 r = r600_bc_add_literal(ctx->bc, ctx->value); 698 if (r) 699 return r; 700 r600_src[j].sel = treg; 701 j++; 702 } 703 } 704 return 0; 705} 706 707static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 708{ 709 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 710 struct r600_bc_alu_src r600_src[3]; 711 struct r600_bc_alu alu; 712 int i, j, r; 713 int lasti = 0; 714 715 for (i = 0; i < 4; i++) { 716 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 717 lasti = i; 718 } 719 } 720 721 r = tgsi_split_constant(ctx, r600_src); 722 if (r) 723 return r; 724 for (i = 0; i < lasti + 1; i++) { 725 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 726 continue; 727 728 memset(&alu, 0, sizeof(struct r600_bc_alu)); 729 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 730 if (r) 731 return r; 732 733 alu.inst = ctx->inst_info->r600_opcode; 734 if (!swap) { 735 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 736 alu.src[j] = r600_src[j]; 737 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 738 } 739 } else { 740 alu.src[0] = r600_src[1]; 741 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 742 743 alu.src[1] = r600_src[0]; 744 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 745 } 746 /* handle some special cases */ 747 switch (ctx->inst_info->tgsi_opcode) { 748 case TGSI_OPCODE_SUB: 749 alu.src[1].neg = 1; 750 break; 751 case TGSI_OPCODE_ABS: 752 alu.src[0].abs = 1; 753 break; 754 default: 755 break; 756 } 757 if (i == lasti) { 758 alu.last = 1; 759 } 760 r = r600_bc_add_alu(ctx->bc, &alu); 761 if (r) 762 return r; 763 } 764 return 0; 765} 766 767static int tgsi_op2(struct r600_shader_ctx *ctx) 768{ 769 return tgsi_op2_s(ctx, 0); 770} 771 772static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 773{ 774 return tgsi_op2_s(ctx, 1); 775} 776 777/* 778 * r600 - trunc to -PI..PI range 779 * r700 - normalize by dividing by 2PI 780 * see fdo bug 27901 781 */ 782static int tgsi_setup_trig(struct r600_shader_ctx *ctx, 783 struct r600_bc_alu_src r600_src[3]) 784{ 785 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 786 int r; 787 uint32_t lit_vals[4]; 788 struct r600_bc_alu alu; 789 790 memset(lit_vals, 0, 4*4); 791 r = tgsi_split_constant(ctx, r600_src); 792 if (r) 793 return r; 794 795 r = tgsi_split_literal_constant(ctx, r600_src); 796 if (r) 797 return r; 798 799 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 800 lit_vals[1] = fui(0.5f); 801 802 memset(&alu, 0, sizeof(struct r600_bc_alu)); 803 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 804 alu.is_op3 = 1; 805 806 alu.dst.chan = 0; 807 alu.dst.sel = ctx->temp_reg; 808 alu.dst.write = 1; 809 810 alu.src[0] = r600_src[0]; 811 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 812 813 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 814 alu.src[1].chan = 0; 815 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 816 alu.src[2].chan = 1; 817 alu.last = 1; 818 r = r600_bc_add_alu(ctx->bc, &alu); 819 if (r) 820 return r; 821 r = r600_bc_add_literal(ctx->bc, lit_vals); 822 if (r) 823 return r; 824 825 memset(&alu, 0, sizeof(struct r600_bc_alu)); 826 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 827 828 alu.dst.chan = 0; 829 alu.dst.sel = ctx->temp_reg; 830 alu.dst.write = 1; 831 832 alu.src[0].sel = ctx->temp_reg; 833 alu.src[0].chan = 0; 834 alu.last = 1; 835 r = r600_bc_add_alu(ctx->bc, &alu); 836 if (r) 837 return r; 838 839 if (ctx->bc->chiprev == 0) { 840 lit_vals[0] = fui(3.1415926535897f * 2.0f); 841 lit_vals[1] = fui(-3.1415926535897f); 842 } else { 843 lit_vals[0] = fui(1.0f); 844 lit_vals[1] = fui(-0.5f); 845 } 846 847 memset(&alu, 0, sizeof(struct r600_bc_alu)); 848 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 849 alu.is_op3 = 1; 850 851 alu.dst.chan = 0; 852 alu.dst.sel = ctx->temp_reg; 853 alu.dst.write = 1; 854 855 alu.src[0].sel = ctx->temp_reg; 856 alu.src[0].chan = 0; 857 858 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 859 alu.src[1].chan = 0; 860 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 861 alu.src[2].chan = 1; 862 alu.last = 1; 863 r = r600_bc_add_alu(ctx->bc, &alu); 864 if (r) 865 return r; 866 r = r600_bc_add_literal(ctx->bc, lit_vals); 867 if (r) 868 return r; 869 return 0; 870} 871 872static int tgsi_trig(struct r600_shader_ctx *ctx) 873{ 874 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 875 struct r600_bc_alu_src r600_src[3]; 876 struct r600_bc_alu alu; 877 int i, r; 878 int lasti = 0; 879 880 r = tgsi_setup_trig(ctx, r600_src); 881 if (r) 882 return r; 883 884 memset(&alu, 0, sizeof(struct r600_bc_alu)); 885 alu.inst = ctx->inst_info->r600_opcode; 886 alu.dst.chan = 0; 887 alu.dst.sel = ctx->temp_reg; 888 alu.dst.write = 1; 889 890 alu.src[0].sel = ctx->temp_reg; 891 alu.src[0].chan = 0; 892 alu.last = 1; 893 r = r600_bc_add_alu(ctx->bc, &alu); 894 if (r) 895 return r; 896 897 /* replicate result */ 898 for (i = 0; i < 4; i++) { 899 if (inst->Dst[0].Register.WriteMask & (1 << i)) 900 lasti = i; 901 } 902 for (i = 0; i < lasti + 1; i++) { 903 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 904 continue; 905 906 memset(&alu, 0, sizeof(struct r600_bc_alu)); 907 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 908 909 alu.src[0].sel = ctx->temp_reg; 910 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 911 if (r) 912 return r; 913 if (i == lasti) 914 alu.last = 1; 915 r = r600_bc_add_alu(ctx->bc, &alu); 916 if (r) 917 return r; 918 } 919 return 0; 920} 921 922static int tgsi_scs(struct r600_shader_ctx *ctx) 923{ 924 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 925 struct r600_bc_alu_src r600_src[3]; 926 struct r600_bc_alu alu; 927 int r; 928 929 r = tgsi_setup_trig(ctx, r600_src); 930 if (r) 931 return r; 932 933 934 /* dst.x = COS */ 935 memset(&alu, 0, sizeof(struct r600_bc_alu)); 936 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 937 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 938 if (r) 939 return r; 940 941 alu.src[0].sel = ctx->temp_reg; 942 alu.src[0].chan = 0; 943 alu.last = 1; 944 r = r600_bc_add_alu(ctx->bc, &alu); 945 if (r) 946 return r; 947 948 /* dst.y = SIN */ 949 memset(&alu, 0, sizeof(struct r600_bc_alu)); 950 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 951 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 952 if (r) 953 return r; 954 955 alu.src[0].sel = ctx->temp_reg; 956 alu.src[0].chan = 0; 957 alu.last = 1; 958 r = r600_bc_add_alu(ctx->bc, &alu); 959 if (r) 960 return r; 961 return 0; 962} 963 964static int tgsi_kill(struct r600_shader_ctx *ctx) 965{ 966 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 967 struct r600_bc_alu alu; 968 int i, r; 969 970 for (i = 0; i < 4; i++) { 971 memset(&alu, 0, sizeof(struct r600_bc_alu)); 972 alu.inst = ctx->inst_info->r600_opcode; 973 974 alu.dst.chan = i; 975 976 alu.src[0].sel = V_SQ_ALU_SRC_0; 977 978 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 979 alu.src[1].sel = V_SQ_ALU_SRC_1; 980 alu.src[1].neg = 1; 981 } else { 982 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 983 if (r) 984 return r; 985 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 986 } 987 if (i == 3) { 988 alu.last = 1; 989 } 990 r = r600_bc_add_alu(ctx->bc, &alu); 991 if (r) 992 return r; 993 } 994 r = r600_bc_add_literal(ctx->bc, ctx->value); 995 if (r) 996 return r; 997 998 /* kill must be last in ALU */ 999 ctx->bc->force_add_cf = 1; 1000 ctx->shader->uses_kill = TRUE; 1001 return 0; 1002} 1003 1004static int tgsi_lit(struct r600_shader_ctx *ctx) 1005{ 1006 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1007 struct r600_bc_alu alu; 1008 struct r600_bc_alu_src r600_src[3]; 1009 int r; 1010 1011 r = tgsi_split_constant(ctx, r600_src); 1012 if (r) 1013 return r; 1014 r = tgsi_split_literal_constant(ctx, r600_src); 1015 if (r) 1016 return r; 1017 1018 /* dst.x, <- 1.0 */ 1019 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1020 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1021 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1022 alu.src[0].chan = 0; 1023 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1024 if (r) 1025 return r; 1026 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1027 r = r600_bc_add_alu(ctx->bc, &alu); 1028 if (r) 1029 return r; 1030 1031 /* dst.y = max(src.x, 0.0) */ 1032 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1033 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1034 alu.src[0] = r600_src[0]; 1035 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1036 alu.src[1].chan = 0; 1037 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1038 if (r) 1039 return r; 1040 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1041 r = r600_bc_add_alu(ctx->bc, &alu); 1042 if (r) 1043 return r; 1044 1045 /* dst.w, <- 1.0 */ 1046 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1047 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1048 alu.src[0].sel = V_SQ_ALU_SRC_1; 1049 alu.src[0].chan = 0; 1050 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1051 if (r) 1052 return r; 1053 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1054 alu.last = 1; 1055 r = r600_bc_add_alu(ctx->bc, &alu); 1056 if (r) 1057 return r; 1058 1059 r = r600_bc_add_literal(ctx->bc, ctx->value); 1060 if (r) 1061 return r; 1062 1063 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1064 { 1065 int chan; 1066 int sel; 1067 1068 /* dst.z = log(src.y) */ 1069 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1070 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1071 alu.src[0] = r600_src[0]; 1072 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1073 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1074 if (r) 1075 return r; 1076 alu.last = 1; 1077 r = r600_bc_add_alu(ctx->bc, &alu); 1078 if (r) 1079 return r; 1080 1081 r = r600_bc_add_literal(ctx->bc, ctx->value); 1082 if (r) 1083 return r; 1084 1085 chan = alu.dst.chan; 1086 sel = alu.dst.sel; 1087 1088 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1089 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1090 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1091 alu.src[0] = r600_src[0]; 1092 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1093 alu.src[1].sel = sel; 1094 alu.src[1].chan = chan; 1095 1096 alu.src[2] = r600_src[0]; 1097 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 1098 alu.dst.sel = ctx->temp_reg; 1099 alu.dst.chan = 0; 1100 alu.dst.write = 1; 1101 alu.is_op3 = 1; 1102 alu.last = 1; 1103 r = r600_bc_add_alu(ctx->bc, &alu); 1104 if (r) 1105 return r; 1106 1107 r = r600_bc_add_literal(ctx->bc, ctx->value); 1108 if (r) 1109 return r; 1110 /* dst.z = exp(tmp.x) */ 1111 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1112 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1113 alu.src[0].sel = ctx->temp_reg; 1114 alu.src[0].chan = 0; 1115 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1116 if (r) 1117 return r; 1118 alu.last = 1; 1119 r = r600_bc_add_alu(ctx->bc, &alu); 1120 if (r) 1121 return r; 1122 } 1123 return 0; 1124} 1125 1126static int tgsi_rsq(struct r600_shader_ctx *ctx) 1127{ 1128 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1129 struct r600_bc_alu alu; 1130 int i, r; 1131 1132 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1133 1134 /* FIXME: 1135 * For state trackers other than OpenGL, we'll want to use 1136 * _RECIPSQRT_IEEE instead. 1137 */ 1138 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1139 1140 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1141 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1142 if (r) 1143 return r; 1144 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1145 alu.src[i].abs = 1; 1146 } 1147 alu.dst.sel = ctx->temp_reg; 1148 alu.dst.write = 1; 1149 alu.last = 1; 1150 r = r600_bc_add_alu(ctx->bc, &alu); 1151 if (r) 1152 return r; 1153 r = r600_bc_add_literal(ctx->bc, ctx->value); 1154 if (r) 1155 return r; 1156 /* replicate result */ 1157 return tgsi_helper_tempx_replicate(ctx); 1158} 1159 1160static int tgsi_trans(struct r600_shader_ctx *ctx) 1161{ 1162 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1163 struct r600_bc_alu alu; 1164 int i, j, r; 1165 1166 for (i = 0; i < 4; i++) { 1167 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1168 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 1169 alu.inst = ctx->inst_info->r600_opcode; 1170 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1171 r = tgsi_src(ctx, &inst->Src[j], &alu.src[j]); 1172 if (r) 1173 return r; 1174 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1175 } 1176 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1177 if (r) 1178 return r; 1179 alu.last = 1; 1180 r = r600_bc_add_alu(ctx->bc, &alu); 1181 if (r) 1182 return r; 1183 } 1184 } 1185 return 0; 1186} 1187 1188static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1189{ 1190 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1191 struct r600_bc_alu alu; 1192 int i, r; 1193 1194 for (i = 0; i < 4; i++) { 1195 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1196 alu.src[0].sel = ctx->temp_reg; 1197 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1198 alu.dst.chan = i; 1199 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1200 if (r) 1201 return r; 1202 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1203 if (i == 3) 1204 alu.last = 1; 1205 r = r600_bc_add_alu(ctx->bc, &alu); 1206 if (r) 1207 return r; 1208 } 1209 return 0; 1210} 1211 1212static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1213{ 1214 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1215 struct r600_bc_alu alu; 1216 int i, r; 1217 1218 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1219 alu.inst = ctx->inst_info->r600_opcode; 1220 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1221 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1222 if (r) 1223 return r; 1224 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1225 } 1226 alu.dst.sel = ctx->temp_reg; 1227 alu.dst.write = 1; 1228 alu.last = 1; 1229 r = r600_bc_add_alu(ctx->bc, &alu); 1230 if (r) 1231 return r; 1232 r = r600_bc_add_literal(ctx->bc, ctx->value); 1233 if (r) 1234 return r; 1235 /* replicate result */ 1236 return tgsi_helper_tempx_replicate(ctx); 1237} 1238 1239static int tgsi_pow(struct r600_shader_ctx *ctx) 1240{ 1241 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1242 struct r600_bc_alu alu; 1243 int r; 1244 1245 /* LOG2(a) */ 1246 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1247 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1248 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1249 if (r) 1250 return r; 1251 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1252 alu.dst.sel = ctx->temp_reg; 1253 alu.dst.write = 1; 1254 alu.last = 1; 1255 r = r600_bc_add_alu(ctx->bc, &alu); 1256 if (r) 1257 return r; 1258 r = r600_bc_add_literal(ctx->bc,ctx->value); 1259 if (r) 1260 return r; 1261 /* b * LOG2(a) */ 1262 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1263 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); 1264 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1265 if (r) 1266 return r; 1267 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1268 alu.src[1].sel = ctx->temp_reg; 1269 alu.dst.sel = ctx->temp_reg; 1270 alu.dst.write = 1; 1271 alu.last = 1; 1272 r = r600_bc_add_alu(ctx->bc, &alu); 1273 if (r) 1274 return r; 1275 r = r600_bc_add_literal(ctx->bc,ctx->value); 1276 if (r) 1277 return r; 1278 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1279 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1280 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1281 alu.src[0].sel = ctx->temp_reg; 1282 alu.dst.sel = ctx->temp_reg; 1283 alu.dst.write = 1; 1284 alu.last = 1; 1285 r = r600_bc_add_alu(ctx->bc, &alu); 1286 if (r) 1287 return r; 1288 r = r600_bc_add_literal(ctx->bc,ctx->value); 1289 if (r) 1290 return r; 1291 return tgsi_helper_tempx_replicate(ctx); 1292} 1293 1294static int tgsi_ssg(struct r600_shader_ctx *ctx) 1295{ 1296 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1297 struct r600_bc_alu alu; 1298 struct r600_bc_alu_src r600_src[3]; 1299 int i, r; 1300 1301 r = tgsi_split_constant(ctx, r600_src); 1302 if (r) 1303 return r; 1304 1305 /* tmp = (src > 0 ? 1 : src) */ 1306 for (i = 0; i < 4; i++) { 1307 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1308 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1309 alu.is_op3 = 1; 1310 1311 alu.dst.sel = ctx->temp_reg; 1312 alu.dst.chan = i; 1313 1314 alu.src[0] = r600_src[0]; 1315 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1316 1317 alu.src[1].sel = V_SQ_ALU_SRC_1; 1318 1319 alu.src[2] = r600_src[0]; 1320 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1321 if (i == 3) 1322 alu.last = 1; 1323 r = r600_bc_add_alu(ctx->bc, &alu); 1324 if (r) 1325 return r; 1326 } 1327 r = r600_bc_add_literal(ctx->bc, ctx->value); 1328 if (r) 1329 return r; 1330 1331 /* dst = (-tmp > 0 ? -1 : tmp) */ 1332 for (i = 0; i < 4; i++) { 1333 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1334 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1335 alu.is_op3 = 1; 1336 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1337 if (r) 1338 return r; 1339 1340 alu.src[0].sel = ctx->temp_reg; 1341 alu.src[0].chan = i; 1342 alu.src[0].neg = 1; 1343 1344 alu.src[1].sel = V_SQ_ALU_SRC_1; 1345 alu.src[1].neg = 1; 1346 1347 alu.src[2].sel = ctx->temp_reg; 1348 alu.src[2].chan = i; 1349 1350 if (i == 3) 1351 alu.last = 1; 1352 r = r600_bc_add_alu(ctx->bc, &alu); 1353 if (r) 1354 return r; 1355 } 1356 return 0; 1357} 1358 1359static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1360{ 1361 struct r600_bc_alu alu; 1362 int i, r; 1363 1364 r = r600_bc_add_literal(ctx->bc, ctx->value); 1365 if (r) 1366 return r; 1367 for (i = 0; i < 4; i++) { 1368 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1369 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1370 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1371 alu.dst.chan = i; 1372 } else { 1373 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1374 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1375 if (r) 1376 return r; 1377 alu.src[0].sel = ctx->temp_reg; 1378 alu.src[0].chan = i; 1379 } 1380 if (i == 3) { 1381 alu.last = 1; 1382 } 1383 r = r600_bc_add_alu(ctx->bc, &alu); 1384 if (r) 1385 return r; 1386 } 1387 return 0; 1388} 1389 1390static int tgsi_op3(struct r600_shader_ctx *ctx) 1391{ 1392 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1393 struct r600_bc_alu_src r600_src[3]; 1394 struct r600_bc_alu alu; 1395 int i, j, r; 1396 1397 r = tgsi_split_constant(ctx, r600_src); 1398 if (r) 1399 return r; 1400 /* do it in 2 step as op3 doesn't support writemask */ 1401 for (i = 0; i < 4; i++) { 1402 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1403 alu.inst = ctx->inst_info->r600_opcode; 1404 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1405 alu.src[j] = r600_src[j]; 1406 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1407 } 1408 alu.dst.sel = ctx->temp_reg; 1409 alu.dst.chan = i; 1410 alu.dst.write = 1; 1411 alu.is_op3 = 1; 1412 if (i == 3) { 1413 alu.last = 1; 1414 } 1415 r = r600_bc_add_alu(ctx->bc, &alu); 1416 if (r) 1417 return r; 1418 } 1419 return tgsi_helper_copy(ctx, inst); 1420} 1421 1422static int tgsi_dp(struct r600_shader_ctx *ctx) 1423{ 1424 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1425 struct r600_bc_alu_src r600_src[3]; 1426 struct r600_bc_alu alu; 1427 int i, j, r; 1428 1429 r = tgsi_split_constant(ctx, r600_src); 1430 if (r) 1431 return r; 1432 for (i = 0; i < 4; i++) { 1433 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1434 alu.inst = ctx->inst_info->r600_opcode; 1435 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1436 alu.src[j] = r600_src[j]; 1437 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1438 } 1439 alu.dst.sel = ctx->temp_reg; 1440 alu.dst.chan = i; 1441 alu.dst.write = 1; 1442 /* handle some special cases */ 1443 switch (ctx->inst_info->tgsi_opcode) { 1444 case TGSI_OPCODE_DP2: 1445 if (i > 1) { 1446 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1447 alu.src[0].chan = alu.src[1].chan = 0; 1448 } 1449 break; 1450 case TGSI_OPCODE_DP3: 1451 if (i > 2) { 1452 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1453 alu.src[0].chan = alu.src[1].chan = 0; 1454 } 1455 break; 1456 case TGSI_OPCODE_DPH: 1457 if (i == 3) { 1458 alu.src[0].sel = V_SQ_ALU_SRC_1; 1459 alu.src[0].chan = 0; 1460 alu.src[0].neg = 0; 1461 } 1462 break; 1463 default: 1464 break; 1465 } 1466 if (i == 3) { 1467 alu.last = 1; 1468 } 1469 r = r600_bc_add_alu(ctx->bc, &alu); 1470 if (r) 1471 return r; 1472 } 1473 return tgsi_helper_copy(ctx, inst); 1474} 1475 1476static int tgsi_tex(struct r600_shader_ctx *ctx) 1477{ 1478 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1479 struct r600_bc_tex tex; 1480 struct r600_bc_alu alu; 1481 unsigned src_gpr; 1482 int r, i; 1483 int opcode; 1484 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; 1485 uint32_t lit_vals[4]; 1486 1487 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1488 1489 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1490 /* Add perspective divide */ 1491 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1492 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1493 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1494 if (r) 1495 return r; 1496 1497 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1498 alu.dst.sel = ctx->temp_reg; 1499 alu.dst.chan = 3; 1500 alu.last = 1; 1501 alu.dst.write = 1; 1502 r = r600_bc_add_alu(ctx->bc, &alu); 1503 if (r) 1504 return r; 1505 1506 for (i = 0; i < 3; i++) { 1507 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1508 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1509 alu.src[0].sel = ctx->temp_reg; 1510 alu.src[0].chan = 3; 1511 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1512 if (r) 1513 return r; 1514 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1515 alu.dst.sel = ctx->temp_reg; 1516 alu.dst.chan = i; 1517 alu.dst.write = 1; 1518 r = r600_bc_add_alu(ctx->bc, &alu); 1519 if (r) 1520 return r; 1521 } 1522 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1523 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1524 alu.src[0].sel = V_SQ_ALU_SRC_1; 1525 alu.src[0].chan = 0; 1526 alu.dst.sel = ctx->temp_reg; 1527 alu.dst.chan = 3; 1528 alu.last = 1; 1529 alu.dst.write = 1; 1530 r = r600_bc_add_alu(ctx->bc, &alu); 1531 if (r) 1532 return r; 1533 src_not_temp = FALSE; 1534 src_gpr = ctx->temp_reg; 1535 } 1536 1537 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1538 int src_chan, src2_chan; 1539 1540 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1541 for (i = 0; i < 4; i++) { 1542 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1543 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1544 switch (i) { 1545 case 0: 1546 src_chan = 2; 1547 src2_chan = 1; 1548 break; 1549 case 1: 1550 src_chan = 2; 1551 src2_chan = 0; 1552 break; 1553 case 2: 1554 src_chan = 0; 1555 src2_chan = 2; 1556 break; 1557 case 3: 1558 src_chan = 1; 1559 src2_chan = 2; 1560 break; 1561 } 1562 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1563 if (r) 1564 return r; 1565 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); 1566 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1567 if (r) 1568 return r; 1569 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); 1570 alu.dst.sel = ctx->temp_reg; 1571 alu.dst.chan = i; 1572 if (i == 3) 1573 alu.last = 1; 1574 alu.dst.write = 1; 1575 r = r600_bc_add_alu(ctx->bc, &alu); 1576 if (r) 1577 return r; 1578 } 1579 1580 /* tmp1.z = RCP_e(|tmp1.z|) */ 1581 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1582 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1583 alu.src[0].sel = ctx->temp_reg; 1584 alu.src[0].chan = 2; 1585 alu.src[0].abs = 1; 1586 alu.dst.sel = ctx->temp_reg; 1587 alu.dst.chan = 2; 1588 alu.dst.write = 1; 1589 alu.last = 1; 1590 r = r600_bc_add_alu(ctx->bc, &alu); 1591 if (r) 1592 return r; 1593 1594 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1595 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1596 * muladd has no writemask, have to use another temp 1597 */ 1598 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1599 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1600 alu.is_op3 = 1; 1601 1602 alu.src[0].sel = ctx->temp_reg; 1603 alu.src[0].chan = 0; 1604 alu.src[1].sel = ctx->temp_reg; 1605 alu.src[1].chan = 2; 1606 1607 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1608 alu.src[2].chan = 0; 1609 1610 alu.dst.sel = ctx->temp_reg; 1611 alu.dst.chan = 0; 1612 alu.dst.write = 1; 1613 1614 r = r600_bc_add_alu(ctx->bc, &alu); 1615 if (r) 1616 return r; 1617 1618 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1619 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1620 alu.is_op3 = 1; 1621 1622 alu.src[0].sel = ctx->temp_reg; 1623 alu.src[0].chan = 1; 1624 alu.src[1].sel = ctx->temp_reg; 1625 alu.src[1].chan = 2; 1626 1627 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1628 alu.src[2].chan = 0; 1629 1630 alu.dst.sel = ctx->temp_reg; 1631 alu.dst.chan = 1; 1632 alu.dst.write = 1; 1633 1634 alu.last = 1; 1635 r = r600_bc_add_alu(ctx->bc, &alu); 1636 if (r) 1637 return r; 1638 1639 lit_vals[0] = fui(1.5f); 1640 1641 r = r600_bc_add_literal(ctx->bc, lit_vals); 1642 if (r) 1643 return r; 1644 src_not_temp = FALSE; 1645 src_gpr = ctx->temp_reg; 1646 } 1647 1648 if (src_not_temp) { 1649 for (i = 0; i < 4; i++) { 1650 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1651 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1652 alu.src[0].sel = src_gpr; 1653 alu.src[0].chan = i; 1654 alu.dst.sel = ctx->temp_reg; 1655 alu.dst.chan = i; 1656 if (i == 3) 1657 alu.last = 1; 1658 alu.dst.write = 1; 1659 r = r600_bc_add_alu(ctx->bc, &alu); 1660 if (r) 1661 return r; 1662 } 1663 src_gpr = ctx->temp_reg; 1664 } 1665 1666 opcode = ctx->inst_info->r600_opcode; 1667 if (opcode == SQ_TEX_INST_SAMPLE && 1668 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1669 opcode = SQ_TEX_INST_SAMPLE_C; 1670 1671 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1672 tex.inst = opcode; 1673 tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1674 tex.sampler_id = tex.resource_id; 1675 tex.src_gpr = src_gpr; 1676 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1677 tex.dst_sel_x = 0; 1678 tex.dst_sel_y = 1; 1679 tex.dst_sel_z = 2; 1680 tex.dst_sel_w = 3; 1681 tex.src_sel_x = 0; 1682 tex.src_sel_y = 1; 1683 tex.src_sel_z = 2; 1684 tex.src_sel_w = 3; 1685 1686 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1687 tex.src_sel_x = 1; 1688 tex.src_sel_y = 0; 1689 tex.src_sel_z = 3; 1690 tex.src_sel_w = 1; 1691 } 1692 1693 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1694 tex.coord_type_x = 1; 1695 tex.coord_type_y = 1; 1696 tex.coord_type_z = 1; 1697 tex.coord_type_w = 1; 1698 } 1699 1700 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 1701 tex.src_sel_w = 2; 1702 1703 r = r600_bc_add_tex(ctx->bc, &tex); 1704 if (r) 1705 return r; 1706 1707 /* add shadow ambient support - gallium doesn't do it yet */ 1708 return 0; 1709 1710} 1711 1712static int tgsi_lrp(struct r600_shader_ctx *ctx) 1713{ 1714 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1715 struct r600_bc_alu_src r600_src[3]; 1716 struct r600_bc_alu alu; 1717 unsigned i; 1718 int r; 1719 1720 r = tgsi_split_constant(ctx, r600_src); 1721 if (r) 1722 return r; 1723 /* 1 - src0 */ 1724 for (i = 0; i < 4; i++) { 1725 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1726 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1727 alu.src[0].sel = V_SQ_ALU_SRC_1; 1728 alu.src[0].chan = 0; 1729 alu.src[1] = r600_src[0]; 1730 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1731 alu.src[1].neg = 1; 1732 alu.dst.sel = ctx->temp_reg; 1733 alu.dst.chan = i; 1734 if (i == 3) { 1735 alu.last = 1; 1736 } 1737 alu.dst.write = 1; 1738 r = r600_bc_add_alu(ctx->bc, &alu); 1739 if (r) 1740 return r; 1741 } 1742 r = r600_bc_add_literal(ctx->bc, ctx->value); 1743 if (r) 1744 return r; 1745 1746 /* (1 - src0) * src2 */ 1747 for (i = 0; i < 4; i++) { 1748 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1749 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1750 alu.src[0].sel = ctx->temp_reg; 1751 alu.src[0].chan = i; 1752 alu.src[1] = r600_src[2]; 1753 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1754 alu.dst.sel = ctx->temp_reg; 1755 alu.dst.chan = i; 1756 if (i == 3) { 1757 alu.last = 1; 1758 } 1759 alu.dst.write = 1; 1760 r = r600_bc_add_alu(ctx->bc, &alu); 1761 if (r) 1762 return r; 1763 } 1764 r = r600_bc_add_literal(ctx->bc, ctx->value); 1765 if (r) 1766 return r; 1767 1768 /* src0 * src1 + (1 - src0) * src2 */ 1769 for (i = 0; i < 4; i++) { 1770 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1771 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1772 alu.is_op3 = 1; 1773 alu.src[0] = r600_src[0]; 1774 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1775 alu.src[1] = r600_src[1]; 1776 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 1777 alu.src[2].sel = ctx->temp_reg; 1778 alu.src[2].chan = i; 1779 alu.dst.sel = ctx->temp_reg; 1780 alu.dst.chan = i; 1781 if (i == 3) { 1782 alu.last = 1; 1783 } 1784 r = r600_bc_add_alu(ctx->bc, &alu); 1785 if (r) 1786 return r; 1787 } 1788 return tgsi_helper_copy(ctx, inst); 1789} 1790 1791static int tgsi_cmp(struct r600_shader_ctx *ctx) 1792{ 1793 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1794 struct r600_bc_alu_src r600_src[3]; 1795 struct r600_bc_alu alu; 1796 int use_temp = 0; 1797 int i, r; 1798 1799 r = tgsi_split_constant(ctx, r600_src); 1800 if (r) 1801 return r; 1802 1803 if (inst->Dst[0].Register.WriteMask != 0xf) 1804 use_temp = 1; 1805 1806 for (i = 0; i < 4; i++) { 1807 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1808 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 1809 alu.src[0] = r600_src[0]; 1810 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1811 1812 alu.src[1] = r600_src[2]; 1813 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1814 1815 alu.src[2] = r600_src[1]; 1816 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 1817 1818 if (use_temp) 1819 alu.dst.sel = ctx->temp_reg; 1820 else { 1821 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1822 if (r) 1823 return r; 1824 } 1825 alu.dst.chan = i; 1826 alu.dst.write = 1; 1827 alu.is_op3 = 1; 1828 if (i == 3) 1829 alu.last = 1; 1830 r = r600_bc_add_alu(ctx->bc, &alu); 1831 if (r) 1832 return r; 1833 } 1834 if (use_temp) 1835 return tgsi_helper_copy(ctx, inst); 1836 return 0; 1837} 1838 1839static int tgsi_xpd(struct r600_shader_ctx *ctx) 1840{ 1841 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1842 struct r600_bc_alu_src r600_src[3]; 1843 struct r600_bc_alu alu; 1844 uint32_t use_temp = 0; 1845 int i, r; 1846 1847 if (inst->Dst[0].Register.WriteMask != 0xf) 1848 use_temp = 1; 1849 1850 r = tgsi_split_constant(ctx, r600_src); 1851 if (r) 1852 return r; 1853 1854 for (i = 0; i < 4; i++) { 1855 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1856 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1857 1858 alu.src[0] = r600_src[0]; 1859 switch (i) { 1860 case 0: 1861 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 1862 break; 1863 case 1: 1864 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1865 break; 1866 case 2: 1867 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1868 break; 1869 case 3: 1870 alu.src[0].sel = V_SQ_ALU_SRC_0; 1871 alu.src[0].chan = i; 1872 } 1873 1874 alu.src[1] = r600_src[1]; 1875 switch (i) { 1876 case 0: 1877 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 1878 break; 1879 case 1: 1880 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 1881 break; 1882 case 2: 1883 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 1884 break; 1885 case 3: 1886 alu.src[1].sel = V_SQ_ALU_SRC_0; 1887 alu.src[1].chan = i; 1888 } 1889 1890 alu.dst.sel = ctx->temp_reg; 1891 alu.dst.chan = i; 1892 alu.dst.write = 1; 1893 1894 if (i == 3) 1895 alu.last = 1; 1896 r = r600_bc_add_alu(ctx->bc, &alu); 1897 if (r) 1898 return r; 1899 1900 r = r600_bc_add_literal(ctx->bc, ctx->value); 1901 if (r) 1902 return r; 1903 } 1904 1905 for (i = 0; i < 4; i++) { 1906 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1907 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1908 1909 alu.src[0] = r600_src[0]; 1910 switch (i) { 1911 case 0: 1912 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1913 break; 1914 case 1: 1915 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 1916 break; 1917 case 2: 1918 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1919 break; 1920 case 3: 1921 alu.src[0].sel = V_SQ_ALU_SRC_0; 1922 alu.src[0].chan = i; 1923 } 1924 1925 alu.src[1] = r600_src[1]; 1926 switch (i) { 1927 case 0: 1928 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 1929 break; 1930 case 1: 1931 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 1932 break; 1933 case 2: 1934 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 1935 break; 1936 case 3: 1937 alu.src[1].sel = V_SQ_ALU_SRC_0; 1938 alu.src[1].chan = i; 1939 } 1940 1941 alu.src[2].sel = ctx->temp_reg; 1942 alu.src[2].neg = 1; 1943 alu.src[2].chan = i; 1944 1945 if (use_temp) 1946 alu.dst.sel = ctx->temp_reg; 1947 else { 1948 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1949 if (r) 1950 return r; 1951 } 1952 alu.dst.chan = i; 1953 alu.dst.write = 1; 1954 alu.is_op3 = 1; 1955 if (i == 3) 1956 alu.last = 1; 1957 r = r600_bc_add_alu(ctx->bc, &alu); 1958 if (r) 1959 return r; 1960 1961 r = r600_bc_add_literal(ctx->bc, ctx->value); 1962 if (r) 1963 return r; 1964 } 1965 if (use_temp) 1966 return tgsi_helper_copy(ctx, inst); 1967 return 0; 1968} 1969 1970static int tgsi_exp(struct r600_shader_ctx *ctx) 1971{ 1972 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1973 struct r600_bc_alu_src r600_src[3]; 1974 struct r600_bc_alu alu; 1975 int r; 1976 1977 /* result.x = 2^floor(src); */ 1978 if (inst->Dst[0].Register.WriteMask & 1) { 1979 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1980 1981 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 1982 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1983 if (r) 1984 return r; 1985 1986 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1987 1988 alu.dst.sel = ctx->temp_reg; 1989 alu.dst.chan = 0; 1990 alu.dst.write = 1; 1991 alu.last = 1; 1992 r = r600_bc_add_alu(ctx->bc, &alu); 1993 if (r) 1994 return r; 1995 1996 r = r600_bc_add_literal(ctx->bc, ctx->value); 1997 if (r) 1998 return r; 1999 2000 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2001 alu.src[0].sel = ctx->temp_reg; 2002 alu.src[0].chan = 0; 2003 2004 alu.dst.sel = ctx->temp_reg; 2005 alu.dst.chan = 0; 2006 alu.dst.write = 1; 2007 alu.last = 1; 2008 r = r600_bc_add_alu(ctx->bc, &alu); 2009 if (r) 2010 return r; 2011 2012 r = r600_bc_add_literal(ctx->bc, ctx->value); 2013 if (r) 2014 return r; 2015 } 2016 2017 /* result.y = tmp - floor(tmp); */ 2018 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2019 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2020 2021 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2022 alu.src[0] = r600_src[0]; 2023 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2024 if (r) 2025 return r; 2026 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2027 2028 alu.dst.sel = ctx->temp_reg; 2029// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2030// if (r) 2031// return r; 2032 alu.dst.write = 1; 2033 alu.dst.chan = 1; 2034 2035 alu.last = 1; 2036 2037 r = r600_bc_add_alu(ctx->bc, &alu); 2038 if (r) 2039 return r; 2040 r = r600_bc_add_literal(ctx->bc, ctx->value); 2041 if (r) 2042 return r; 2043 } 2044 2045 /* result.z = RoughApprox2ToX(tmp);*/ 2046 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2047 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2048 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2049 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2050 if (r) 2051 return r; 2052 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2053 2054 alu.dst.sel = ctx->temp_reg; 2055 alu.dst.write = 1; 2056 alu.dst.chan = 2; 2057 2058 alu.last = 1; 2059 2060 r = r600_bc_add_alu(ctx->bc, &alu); 2061 if (r) 2062 return r; 2063 r = r600_bc_add_literal(ctx->bc, ctx->value); 2064 if (r) 2065 return r; 2066 } 2067 2068 /* result.w = 1.0;*/ 2069 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2070 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2071 2072 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2073 alu.src[0].sel = V_SQ_ALU_SRC_1; 2074 alu.src[0].chan = 0; 2075 2076 alu.dst.sel = ctx->temp_reg; 2077 alu.dst.chan = 3; 2078 alu.dst.write = 1; 2079 alu.last = 1; 2080 r = r600_bc_add_alu(ctx->bc, &alu); 2081 if (r) 2082 return r; 2083 r = r600_bc_add_literal(ctx->bc, ctx->value); 2084 if (r) 2085 return r; 2086 } 2087 return tgsi_helper_copy(ctx, inst); 2088} 2089 2090static int tgsi_log(struct r600_shader_ctx *ctx) 2091{ 2092 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2093 struct r600_bc_alu alu; 2094 int r; 2095 2096 /* result.x = floor(log2(src)); */ 2097 if (inst->Dst[0].Register.WriteMask & 1) { 2098 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2099 2100 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2101 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2102 if (r) 2103 return r; 2104 2105 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2106 2107 alu.dst.sel = ctx->temp_reg; 2108 alu.dst.chan = 0; 2109 alu.dst.write = 1; 2110 alu.last = 1; 2111 r = r600_bc_add_alu(ctx->bc, &alu); 2112 if (r) 2113 return r; 2114 2115 r = r600_bc_add_literal(ctx->bc, ctx->value); 2116 if (r) 2117 return r; 2118 2119 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2120 alu.src[0].sel = ctx->temp_reg; 2121 alu.src[0].chan = 0; 2122 2123 alu.dst.sel = ctx->temp_reg; 2124 alu.dst.chan = 0; 2125 alu.dst.write = 1; 2126 alu.last = 1; 2127 2128 r = r600_bc_add_alu(ctx->bc, &alu); 2129 if (r) 2130 return r; 2131 2132 r = r600_bc_add_literal(ctx->bc, ctx->value); 2133 if (r) 2134 return r; 2135 } 2136 2137 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2138 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2139 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2140 2141 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2142 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2143 if (r) 2144 return r; 2145 2146 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2147 2148 alu.dst.sel = ctx->temp_reg; 2149 alu.dst.chan = 1; 2150 alu.dst.write = 1; 2151 alu.last = 1; 2152 2153 r = r600_bc_add_alu(ctx->bc, &alu); 2154 if (r) 2155 return r; 2156 2157 r = r600_bc_add_literal(ctx->bc, ctx->value); 2158 if (r) 2159 return r; 2160 2161 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2162 2163 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2164 alu.src[0].sel = ctx->temp_reg; 2165 alu.src[0].chan = 1; 2166 2167 alu.dst.sel = ctx->temp_reg; 2168 alu.dst.chan = 1; 2169 alu.dst.write = 1; 2170 alu.last = 1; 2171 2172 r = r600_bc_add_alu(ctx->bc, &alu); 2173 if (r) 2174 return r; 2175 2176 r = r600_bc_add_literal(ctx->bc, ctx->value); 2177 if (r) 2178 return r; 2179 2180 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2181 2182 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2183 alu.src[0].sel = ctx->temp_reg; 2184 alu.src[0].chan = 1; 2185 2186 alu.dst.sel = ctx->temp_reg; 2187 alu.dst.chan = 1; 2188 alu.dst.write = 1; 2189 alu.last = 1; 2190 2191 r = r600_bc_add_alu(ctx->bc, &alu); 2192 if (r) 2193 return r; 2194 2195 r = r600_bc_add_literal(ctx->bc, ctx->value); 2196 if (r) 2197 return r; 2198 2199 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2200 2201 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2202 alu.src[0].sel = ctx->temp_reg; 2203 alu.src[0].chan = 1; 2204 2205 alu.dst.sel = ctx->temp_reg; 2206 alu.dst.chan = 1; 2207 alu.dst.write = 1; 2208 alu.last = 1; 2209 2210 r = r600_bc_add_alu(ctx->bc, &alu); 2211 if (r) 2212 return r; 2213 2214 r = r600_bc_add_literal(ctx->bc, ctx->value); 2215 if (r) 2216 return r; 2217 2218 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2219 2220 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2221 2222 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2223 if (r) 2224 return r; 2225 2226 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2227 2228 alu.src[1].sel = ctx->temp_reg; 2229 alu.src[1].chan = 1; 2230 2231 alu.dst.sel = ctx->temp_reg; 2232 alu.dst.chan = 1; 2233 alu.dst.write = 1; 2234 alu.last = 1; 2235 2236 r = r600_bc_add_alu(ctx->bc, &alu); 2237 if (r) 2238 return r; 2239 2240 r = r600_bc_add_literal(ctx->bc, ctx->value); 2241 if (r) 2242 return r; 2243 } 2244 2245 /* result.z = log2(src);*/ 2246 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2247 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2248 2249 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2250 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2251 if (r) 2252 return r; 2253 2254 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2255 2256 alu.dst.sel = ctx->temp_reg; 2257 alu.dst.write = 1; 2258 alu.dst.chan = 2; 2259 alu.last = 1; 2260 2261 r = r600_bc_add_alu(ctx->bc, &alu); 2262 if (r) 2263 return r; 2264 2265 r = r600_bc_add_literal(ctx->bc, ctx->value); 2266 if (r) 2267 return r; 2268 } 2269 2270 /* result.w = 1.0; */ 2271 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2272 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2273 2274 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2275 alu.src[0].sel = V_SQ_ALU_SRC_1; 2276 alu.src[0].chan = 0; 2277 2278 alu.dst.sel = ctx->temp_reg; 2279 alu.dst.chan = 3; 2280 alu.dst.write = 1; 2281 alu.last = 1; 2282 2283 r = r600_bc_add_alu(ctx->bc, &alu); 2284 if (r) 2285 return r; 2286 2287 r = r600_bc_add_literal(ctx->bc, ctx->value); 2288 if (r) 2289 return r; 2290 } 2291 2292 return tgsi_helper_copy(ctx, inst); 2293} 2294 2295/* r6/7 only for now */ 2296static int tgsi_arl(struct r600_shader_ctx *ctx) 2297{ 2298 /* TODO from r600c, ar values don't persist between clauses */ 2299 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2300 struct r600_bc_alu alu; 2301 int r; 2302 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2303 2304 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; 2305 2306 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2307 if (r) 2308 return r; 2309 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2310 2311 alu.last = 1; 2312 2313 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2314 if (r) 2315 return r; 2316 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2317 return 0; 2318} 2319 2320static int tgsi_opdst(struct r600_shader_ctx *ctx) 2321{ 2322 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2323 struct r600_bc_alu alu; 2324 int i, r = 0; 2325 2326 for (i = 0; i < 4; i++) { 2327 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2328 2329 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2330 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2331 if (r) 2332 return r; 2333 2334 if (i == 0 || i == 3) { 2335 alu.src[0].sel = V_SQ_ALU_SRC_1; 2336 } else { 2337 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2338 if (r) 2339 return r; 2340 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2341 } 2342 2343 if (i == 0 || i == 2) { 2344 alu.src[1].sel = V_SQ_ALU_SRC_1; 2345 } else { 2346 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); 2347 if (r) 2348 return r; 2349 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2350 } 2351 if (i == 3) 2352 alu.last = 1; 2353 r = r600_bc_add_alu(ctx->bc, &alu); 2354 if (r) 2355 return r; 2356 } 2357 return 0; 2358} 2359 2360static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2361{ 2362 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2363 struct r600_bc_alu alu; 2364 int r; 2365 2366 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2367 alu.inst = opcode; 2368 alu.predicate = 1; 2369 2370 alu.dst.sel = ctx->temp_reg; 2371 alu.dst.write = 1; 2372 alu.dst.chan = 0; 2373 2374 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2375 if (r) 2376 return r; 2377 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2378 alu.src[1].sel = V_SQ_ALU_SRC_0; 2379 alu.src[1].chan = 0; 2380 2381 alu.last = 1; 2382 2383 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2384 if (r) 2385 return r; 2386 return 0; 2387} 2388 2389static int pops(struct r600_shader_ctx *ctx, int pops) 2390{ 2391 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2392 ctx->bc->cf_last->pop_count = pops; 2393 return 0; 2394} 2395 2396static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2397{ 2398 switch(reason) { 2399 case FC_PUSH_VPM: 2400 ctx->bc->callstack[ctx->bc->call_sp].current--; 2401 break; 2402 case FC_PUSH_WQM: 2403 case FC_LOOP: 2404 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2405 break; 2406 case FC_REP: 2407 /* TOODO : for 16 vp asic should -= 2; */ 2408 ctx->bc->callstack[ctx->bc->call_sp].current --; 2409 break; 2410 } 2411} 2412 2413static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2414{ 2415 if (check_max_only) { 2416 int diff; 2417 switch (reason) { 2418 case FC_PUSH_VPM: 2419 diff = 1; 2420 break; 2421 case FC_PUSH_WQM: 2422 diff = 4; 2423 break; 2424 default: 2425 assert(0); 2426 diff = 0; 2427 } 2428 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2429 ctx->bc->callstack[ctx->bc->call_sp].max) { 2430 ctx->bc->callstack[ctx->bc->call_sp].max = 2431 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2432 } 2433 return; 2434 } 2435 switch (reason) { 2436 case FC_PUSH_VPM: 2437 ctx->bc->callstack[ctx->bc->call_sp].current++; 2438 break; 2439 case FC_PUSH_WQM: 2440 case FC_LOOP: 2441 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2442 break; 2443 case FC_REP: 2444 ctx->bc->callstack[ctx->bc->call_sp].current++; 2445 break; 2446 } 2447 2448 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2449 ctx->bc->callstack[ctx->bc->call_sp].max) { 2450 ctx->bc->callstack[ctx->bc->call_sp].max = 2451 ctx->bc->callstack[ctx->bc->call_sp].current; 2452 } 2453} 2454 2455static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2456{ 2457 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2458 2459 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2460 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2461 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2462 sp->num_mid++; 2463} 2464 2465static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2466{ 2467 ctx->bc->fc_sp++; 2468 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2469 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2470} 2471 2472static void fc_poplevel(struct r600_shader_ctx *ctx) 2473{ 2474 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2475 if (sp->mid) { 2476 free(sp->mid); 2477 sp->mid = NULL; 2478 } 2479 sp->num_mid = 0; 2480 sp->start = NULL; 2481 sp->type = 0; 2482 ctx->bc->fc_sp--; 2483} 2484 2485#if 0 2486static int emit_return(struct r600_shader_ctx *ctx) 2487{ 2488 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2489 return 0; 2490} 2491 2492static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2493{ 2494 2495 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2496 ctx->bc->cf_last->pop_count = pops; 2497 /* TODO work out offset */ 2498 return 0; 2499} 2500 2501static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2502{ 2503 return 0; 2504} 2505 2506static void emit_testflag(struct r600_shader_ctx *ctx) 2507{ 2508 2509} 2510 2511static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2512{ 2513 emit_testflag(ctx); 2514 emit_jump_to_offset(ctx, 1, 4); 2515 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2516 pops(ctx, ifidx + 1); 2517 emit_return(ctx); 2518} 2519 2520static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2521{ 2522 emit_testflag(ctx); 2523 2524 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2525 ctx->bc->cf_last->pop_count = 1; 2526 2527 fc_set_mid(ctx, fc_sp); 2528 2529 pops(ctx, 1); 2530} 2531#endif 2532 2533static int tgsi_if(struct r600_shader_ctx *ctx) 2534{ 2535 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2536 2537 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2538 2539 fc_pushlevel(ctx, FC_IF); 2540 2541 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2542 return 0; 2543} 2544 2545static int tgsi_else(struct r600_shader_ctx *ctx) 2546{ 2547 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2548 ctx->bc->cf_last->pop_count = 1; 2549 2550 fc_set_mid(ctx, ctx->bc->fc_sp); 2551 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2552 return 0; 2553} 2554 2555static int tgsi_endif(struct r600_shader_ctx *ctx) 2556{ 2557 pops(ctx, 1); 2558 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2559 R600_ERR("if/endif unbalanced in shader\n"); 2560 return -1; 2561 } 2562 2563 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2564 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2565 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2566 } else { 2567 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2568 } 2569 fc_poplevel(ctx); 2570 2571 callstack_decrease_current(ctx, FC_PUSH_VPM); 2572 return 0; 2573} 2574 2575static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2576{ 2577 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2578 2579 fc_pushlevel(ctx, FC_LOOP); 2580 2581 /* check stack depth */ 2582 callstack_check_depth(ctx, FC_LOOP, 0); 2583 return 0; 2584} 2585 2586static int tgsi_endloop(struct r600_shader_ctx *ctx) 2587{ 2588 int i; 2589 2590 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2591 2592 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2593 R600_ERR("loop/endloop in shader code are not paired.\n"); 2594 return -EINVAL; 2595 } 2596 2597 /* fixup loop pointers - from r600isa 2598 LOOP END points to CF after LOOP START, 2599 LOOP START point to CF after LOOP END 2600 BRK/CONT point to LOOP END CF 2601 */ 2602 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2603 2604 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2605 2606 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2607 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2608 } 2609 /* TODO add LOOPRET support */ 2610 fc_poplevel(ctx); 2611 callstack_decrease_current(ctx, FC_LOOP); 2612 return 0; 2613} 2614 2615static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2616{ 2617 unsigned int fscp; 2618 2619 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2620 { 2621 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2622 break; 2623 } 2624 2625 if (fscp == 0) { 2626 R600_ERR("Break not inside loop/endloop pair\n"); 2627 return -EINVAL; 2628 } 2629 2630 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2631 ctx->bc->cf_last->pop_count = 1; 2632 2633 fc_set_mid(ctx, fscp); 2634 2635 pops(ctx, 1); 2636 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2637 return 0; 2638} 2639 2640static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 2641 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl}, 2642 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2643 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2644 2645 /* FIXME: 2646 * For state trackers other than OpenGL, we'll want to use 2647 * _RECIP_IEEE instead. 2648 */ 2649 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 2650 2651 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 2652 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2653 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2654 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2655 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2656 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2657 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2658 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2659 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2660 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2661 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2662 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2663 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2664 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2665 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2666 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2667 /* gap */ 2668 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2669 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2670 /* gap */ 2671 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2672 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2673 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2674 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2675 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2676 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2677 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2678 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2679 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2680 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2681 /* gap */ 2682 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2683 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2684 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2685 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2686 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2687 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2688 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2689 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2690 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2691 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2692 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2693 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2694 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2695 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2696 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2697 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2698 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2699 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2700 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2701 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2702 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2703 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2704 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2705 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2706 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2707 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2708 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2709 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2710 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2711 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2712 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2713 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2714 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2715 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2716 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2717 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2718 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2719 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2720 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2721 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2722 {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2723 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2724 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2725 /* gap */ 2726 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2727 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2728 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2729 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2730 /* gap */ 2731 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2732 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2733 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2734 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2735 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2736 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2737 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2738 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 2739 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2740 /* gap */ 2741 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2742 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2743 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2744 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2745 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2746 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2747 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2748 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2749 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2750 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2751 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2752 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2753 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2754 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2755 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2756 /* gap */ 2757 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2758 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2759 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2760 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2761 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2762 /* gap */ 2763 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2764 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2765 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2766 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2767 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2768 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2769 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2770 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2771 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2772 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2773 /* gap */ 2774 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2775 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2776 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2777 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2778 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2779 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2780 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2781 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2782 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2783 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2784 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2785 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2786 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2787 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2788 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2789 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2790 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2791 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2792 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2793 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2794 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2795 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2796 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2797 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2798 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2799 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2800 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2801 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2802}; 2803 2804static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 2805 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2806 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2807 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2808 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 2809 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 2810 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2811 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2812 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2813 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2814 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2815 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2816 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2817 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2818 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2819 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2820 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2821 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2822 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2823 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2824 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2825 /* gap */ 2826 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2827 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2828 /* gap */ 2829 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2830 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2831 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2832 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2833 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2834 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2835 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2836 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2837 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2838 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2839 /* gap */ 2840 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2841 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2842 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2843 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2844 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2845 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2846 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2847 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2848 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2849 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2850 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2851 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2852 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2853 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2854 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2855 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2856 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2857 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2858 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2859 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2860 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2861 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2862 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2863 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2864 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2865 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2866 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2867 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2868 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2869 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2870 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2871 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2872 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2873 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2874 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2875 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2876 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2877 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2878 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2879 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2880 {TGSI_OPCODE_TXL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2881 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2882 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2883 /* gap */ 2884 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2885 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2886 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2887 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2888 /* gap */ 2889 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2890 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2891 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2892 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2893 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2894 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2895 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2896 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 2897 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2898 /* gap */ 2899 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2900 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2901 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2902 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2903 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2904 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2905 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2906 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2907 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2908 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2909 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2910 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2911 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2912 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2913 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2914 /* gap */ 2915 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2916 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2917 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2918 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2919 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2920 /* gap */ 2921 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2922 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2923 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2924 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2925 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2926 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2927 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2928 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2929 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2930 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2931 /* gap */ 2932 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2933 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2934 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2935 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2936 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2937 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2938 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2939 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2940 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2941 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2942 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2943 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2944 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2945 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2946 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2947 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2948 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2949 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2950 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2951 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2952 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2953 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2954 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2955 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2956 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2957 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2958 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2959 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2960}; 2961