r600_shader.c revision dffad730df17983cfaef0808555a8c26cad0aa15
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_pipe.h" 29#include "r600_asm.h" 30#include "r600_sq.h" 31#include "r600_opcodes.h" 32#include "r600d.h" 33#include <stdio.h> 34#include <errno.h> 35 36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) 37{ 38 struct r600_pipe_state *rstate = &shader->rstate; 39 struct r600_shader *rshader = &shader->shader; 40 unsigned spi_vs_out_id[10]; 41 unsigned i, tmp; 42 43 /* clear previous register */ 44 rstate->nregs = 0; 45 46 /* so far never got proper semantic id from tgsi */ 47 /* FIXME better to move this in config things so they get emited 48 * only one time per cs 49 */ 50 for (i = 0; i < 10; i++) { 51 spi_vs_out_id[i] = 0; 52 } 53 for (i = 0; i < 32; i++) { 54 tmp = i << ((i & 3) * 8); 55 spi_vs_out_id[i / 4] |= tmp; 56 } 57 for (i = 0; i < 10; i++) { 58 r600_pipe_state_add_reg(rstate, 59 R_028614_SPI_VS_OUT_ID_0 + i * 4, 60 spi_vs_out_id[i], 0xFFFFFFFF, NULL); 61 } 62 63 r600_pipe_state_add_reg(rstate, 64 R_0286C4_SPI_VS_OUT_CONFIG, 65 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), 66 0xFFFFFFFF, NULL); 67 r600_pipe_state_add_reg(rstate, 68 R_028868_SQ_PGM_RESOURCES_VS, 69 S_028868_NUM_GPRS(rshader->bc.ngpr) | 70 S_028868_STACK_SIZE(rshader->bc.nstack), 71 0xFFFFFFFF, NULL); 72 r600_pipe_state_add_reg(rstate, 73 R_0288D0_SQ_PGM_CF_OFFSET_VS, 74 0x00000000, 0xFFFFFFFF, NULL); 75 r600_pipe_state_add_reg(rstate, 76 R_028858_SQ_PGM_START_VS, 77 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 78 79 r600_pipe_state_add_reg(rstate, 80 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, 81 0xFFFFFFFF, NULL); 82 83} 84 85int r600_find_vs_semantic_index(struct r600_shader *vs, 86 struct r600_shader *ps, int id) 87{ 88 struct r600_shader_io *input = &ps->input[id]; 89 90 for (int i = 0; i < vs->noutput; i++) { 91 if (input->name == vs->output[i].name && 92 input->sid == vs->output[i].sid) { 93 return i - 1; 94 } 95 } 96 return 0; 97} 98 99static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) 100{ 101 struct r600_pipe_state *rstate = &shader->rstate; 102 struct r600_shader *rshader = &shader->shader; 103 unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; 104 int pos_index = -1, face_index = -1; 105 106 rstate->nregs = 0; 107 108 for (i = 0; i < rshader->ninput; i++) { 109 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 110 pos_index = i; 111 if (rshader->input[i].name == TGSI_SEMANTIC_FACE) 112 face_index = i; 113 } 114 115 for (i = 0; i < rshader->noutput; i++) { 116 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 117 r600_pipe_state_add_reg(rstate, 118 R_02880C_DB_SHADER_CONTROL, 119 S_02880C_Z_EXPORT_ENABLE(1), 120 S_02880C_Z_EXPORT_ENABLE(1), NULL); 121 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 122 r600_pipe_state_add_reg(rstate, 123 R_02880C_DB_SHADER_CONTROL, 124 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), 125 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); 126 } 127 128 exports_ps = 0; 129 num_cout = 0; 130 for (i = 0; i < rshader->noutput; i++) { 131 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 132 exports_ps |= 1; 133 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { 134 num_cout++; 135 } 136 } 137 exports_ps |= S_028854_EXPORT_COLORS(num_cout); 138 if (!exports_ps) { 139 /* always at least export 1 component per pixel */ 140 exports_ps = 2; 141 } 142 143 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | 144 S_0286CC_PERSP_GRADIENT_ENA(1); 145 spi_input_z = 0; 146 if (pos_index != -1) { 147 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | 148 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | 149 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | 150 S_0286CC_BARYC_SAMPLE_CNTL(1)); 151 spi_input_z |= 1; 152 } 153 154 spi_ps_in_control_1 = 0; 155 if (face_index != -1) { 156 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | 157 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); 158 } 159 160 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); 161 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); 162 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); 163 r600_pipe_state_add_reg(rstate, 164 R_028840_SQ_PGM_START_PS, 165 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 166 r600_pipe_state_add_reg(rstate, 167 R_028850_SQ_PGM_RESOURCES_PS, 168 S_028868_NUM_GPRS(rshader->bc.ngpr) | 169 S_028868_STACK_SIZE(rshader->bc.nstack), 170 0xFFFFFFFF, NULL); 171 r600_pipe_state_add_reg(rstate, 172 R_028854_SQ_PGM_EXPORTS_PS, 173 exports_ps, 0xFFFFFFFF, NULL); 174 r600_pipe_state_add_reg(rstate, 175 R_0288CC_SQ_PGM_CF_OFFSET_PS, 176 0x00000000, 0xFFFFFFFF, NULL); 177 178 if (rshader->uses_kill) { 179 /* only set some bits here, the other bits are set in the dsa state */ 180 r600_pipe_state_add_reg(rstate, 181 R_02880C_DB_SHADER_CONTROL, 182 S_02880C_KILL_ENABLE(1), 183 S_02880C_KILL_ENABLE(1), NULL); 184 } 185 r600_pipe_state_add_reg(rstate, 186 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, 187 0xFFFFFFFF, NULL); 188} 189 190int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 191{ 192 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 193 struct r600_shader *rshader = &shader->shader; 194 void *ptr; 195 196 /* copy new shader */ 197 if (shader->bo == NULL) { 198 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); 199 if (shader->bo == NULL) { 200 return -ENOMEM; 201 } 202 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 203 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); 204 r600_bo_unmap(rctx->radeon, shader->bo); 205 } 206 /* build state */ 207 switch (rshader->processor_type) { 208 case TGSI_PROCESSOR_VERTEX: 209 if (rshader->family >= CHIP_CEDAR) { 210 evergreen_pipe_shader_vs(ctx, shader); 211 } else { 212 r600_pipe_shader_vs(ctx, shader); 213 } 214 break; 215 case TGSI_PROCESSOR_FRAGMENT: 216 if (rshader->family >= CHIP_CEDAR) { 217 evergreen_pipe_shader_ps(ctx, shader); 218 } else { 219 r600_pipe_shader_ps(ctx, shader); 220 } 221 break; 222 default: 223 return -EINVAL; 224 } 225 return 0; 226} 227 228int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 229int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 230{ 231 static int dump_shaders = -1; 232 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 233 int r; 234 235 /* Would like some magic "get_bool_option_once" routine. 236 */ 237 if (dump_shaders == -1) 238 dump_shaders = debug_get_bool_option("R600_DUMP_SHADERS", FALSE); 239 240 if (dump_shaders) { 241 fprintf(stderr, "--------------------------------------------------------------\n"); 242 tgsi_dump(tokens, 0); 243 } 244 shader->shader.family = r600_get_family(rctx->radeon); 245 r = r600_shader_from_tgsi(tokens, &shader->shader); 246 if (r) { 247 R600_ERR("translation from TGSI failed !\n"); 248 return r; 249 } 250 r = r600_bc_build(&shader->shader.bc); 251 if (r) { 252 R600_ERR("building bytecode failed !\n"); 253 return r; 254 } 255 if (dump_shaders) { 256 r600_bc_dump(&shader->shader.bc); 257 fprintf(stderr, "______________________________________________________________\n"); 258 } 259 return r600_pipe_shader(ctx, shader); 260} 261 262void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 263{ 264 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 265 266 r600_bo_reference(rctx->radeon, &shader->bo, NULL); 267 r600_bc_clear(&shader->shader.bc); 268} 269 270/* 271 * tgsi -> r600 shader 272 */ 273struct r600_shader_tgsi_instruction; 274 275struct r600_shader_ctx { 276 struct tgsi_shader_info info; 277 struct tgsi_parse_context parse; 278 const struct tgsi_token *tokens; 279 unsigned type; 280 unsigned file_offset[TGSI_FILE_COUNT]; 281 unsigned temp_reg; 282 struct r600_shader_tgsi_instruction *inst_info; 283 struct r600_bc *bc; 284 struct r600_shader *shader; 285 u32 value[4]; 286 u32 *literals; 287 u32 nliterals; 288 u32 max_driver_temp_used; 289 /* needed for evergreen interpolation */ 290 boolean input_centroid; 291 boolean input_linear; 292 boolean input_perspective; 293 int num_interp_gpr; 294}; 295 296struct r600_shader_tgsi_instruction { 297 unsigned tgsi_opcode; 298 unsigned is_op3; 299 unsigned r600_opcode; 300 int (*process)(struct r600_shader_ctx *ctx); 301}; 302 303static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 304static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 305 306static int tgsi_is_supported(struct r600_shader_ctx *ctx) 307{ 308 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 309 int j; 310 311 if (i->Instruction.NumDstRegs > 1) { 312 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 313 return -EINVAL; 314 } 315 if (i->Instruction.Predicate) { 316 R600_ERR("predicate unsupported\n"); 317 return -EINVAL; 318 } 319#if 0 320 if (i->Instruction.Label) { 321 R600_ERR("label unsupported\n"); 322 return -EINVAL; 323 } 324#endif 325 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 326 if (i->Src[j].Register.Dimension) { 327 R600_ERR("unsupported src %d (dimension %d)\n", j, 328 i->Src[j].Register.Dimension); 329 return -EINVAL; 330 } 331 } 332 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 333 if (i->Dst[j].Register.Dimension) { 334 R600_ERR("unsupported dst (dimension)\n"); 335 return -EINVAL; 336 } 337 } 338 return 0; 339} 340 341static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 342{ 343 int i, r; 344 struct r600_bc_alu alu; 345 int gpr = 0, base_chan = 0; 346 int ij_index = 0; 347 348 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 349 ij_index = 0; 350 if (ctx->shader->input[input].centroid) 351 ij_index++; 352 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 353 ij_index = 0; 354 /* if we have perspective add one */ 355 if (ctx->input_perspective) { 356 ij_index++; 357 /* if we have perspective centroid */ 358 if (ctx->input_centroid) 359 ij_index++; 360 } 361 if (ctx->shader->input[input].centroid) 362 ij_index++; 363 } 364 365 /* work out gpr and base_chan from index */ 366 gpr = ij_index / 2; 367 base_chan = (2 * (ij_index % 2)) + 1; 368 369 for (i = 0; i < 8; i++) { 370 memset(&alu, 0, sizeof(struct r600_bc_alu)); 371 372 if (i < 4) 373 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 374 else 375 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 376 377 if ((i > 1) && (i < 6)) { 378 alu.dst.sel = ctx->shader->input[input].gpr; 379 alu.dst.write = 1; 380 } 381 382 alu.dst.chan = i % 4; 383 384 alu.src[0].sel = gpr; 385 alu.src[0].chan = (base_chan - (i % 2)); 386 387 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 388 389 alu.bank_swizzle_force = SQ_ALU_VEC_210; 390 if ((i % 4) == 3) 391 alu.last = 1; 392 r = r600_bc_add_alu(ctx->bc, &alu); 393 if (r) 394 return r; 395 } 396 return 0; 397} 398 399 400static int tgsi_declaration(struct r600_shader_ctx *ctx) 401{ 402 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 403 unsigned i; 404 405 switch (d->Declaration.File) { 406 case TGSI_FILE_INPUT: 407 i = ctx->shader->ninput++; 408 ctx->shader->input[i].name = d->Semantic.Name; 409 ctx->shader->input[i].sid = d->Semantic.Index; 410 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 411 ctx->shader->input[i].centroid = d->Declaration.Centroid; 412 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 413 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { 414 /* turn input into interpolate on EG */ 415 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 416 if (ctx->shader->input[i].interpolate > 0) { 417 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 418 evergreen_interp_alu(ctx, i); 419 } 420 } 421 } 422 break; 423 case TGSI_FILE_OUTPUT: 424 i = ctx->shader->noutput++; 425 ctx->shader->output[i].name = d->Semantic.Name; 426 ctx->shader->output[i].sid = d->Semantic.Index; 427 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 428 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 429 break; 430 case TGSI_FILE_CONSTANT: 431 case TGSI_FILE_TEMPORARY: 432 case TGSI_FILE_SAMPLER: 433 case TGSI_FILE_ADDRESS: 434 break; 435 default: 436 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 437 return -EINVAL; 438 } 439 return 0; 440} 441 442static int r600_get_temp(struct r600_shader_ctx *ctx) 443{ 444 return ctx->temp_reg + ctx->max_driver_temp_used++; 445} 446 447/* 448 * for evergreen we need to scan the shader to find the number of GPRs we need to 449 * reserve for interpolation. 450 * 451 * we need to know if we are going to emit 452 * any centroid inputs 453 * if perspective and linear are required 454*/ 455static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 456{ 457 int i; 458 int num_baryc; 459 460 ctx->input_linear = FALSE; 461 ctx->input_perspective = FALSE; 462 ctx->input_centroid = FALSE; 463 ctx->num_interp_gpr = 1; 464 465 /* any centroid inputs */ 466 for (i = 0; i < ctx->info.num_inputs; i++) { 467 /* skip position/face */ 468 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 469 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 470 continue; 471 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 472 ctx->input_linear = TRUE; 473 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 474 ctx->input_perspective = TRUE; 475 if (ctx->info.input_centroid[i]) 476 ctx->input_centroid = TRUE; 477 } 478 479 num_baryc = 0; 480 /* ignoring sample for now */ 481 if (ctx->input_perspective) 482 num_baryc++; 483 if (ctx->input_linear) 484 num_baryc++; 485 if (ctx->input_centroid) 486 num_baryc *= 2; 487 488 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 489 490 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 491 return ctx->num_interp_gpr; 492} 493 494int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 495{ 496 struct tgsi_full_immediate *immediate; 497 struct r600_shader_ctx ctx; 498 struct r600_bc_output output[32]; 499 unsigned output_done, noutput; 500 unsigned opcode; 501 int i, r = 0, pos0; 502 503 ctx.bc = &shader->bc; 504 ctx.shader = shader; 505 r = r600_bc_init(ctx.bc, shader->family); 506 if (r) 507 return r; 508 ctx.tokens = tokens; 509 tgsi_scan_shader(tokens, &ctx.info); 510 tgsi_parse_init(&ctx.parse, tokens); 511 ctx.type = ctx.parse.FullHeader.Processor.Processor; 512 shader->processor_type = ctx.type; 513 ctx.bc->type = shader->processor_type; 514 515 /* register allocations */ 516 /* Values [0,127] correspond to GPR[0..127]. 517 * Values [128,159] correspond to constant buffer bank 0 518 * Values [160,191] correspond to constant buffer bank 1 519 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 520 * Values [256,287] correspond to constant buffer bank 2 (EG) 521 * Values [288,319] correspond to constant buffer bank 3 (EG) 522 * Other special values are shown in the list below. 523 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 524 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 525 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 526 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 527 * 248 SQ_ALU_SRC_0: special constant 0.0. 528 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 529 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 530 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 531 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 532 * 253 SQ_ALU_SRC_LITERAL: literal constant. 533 * 254 SQ_ALU_SRC_PV: previous vector result. 534 * 255 SQ_ALU_SRC_PS: previous scalar result. 535 */ 536 for (i = 0; i < TGSI_FILE_COUNT; i++) { 537 ctx.file_offset[i] = 0; 538 } 539 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 540 ctx.file_offset[TGSI_FILE_INPUT] = 1; 541 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { 542 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 543 } else { 544 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 545 } 546 } 547 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) { 548 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 549 } 550 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 551 ctx.info.file_count[TGSI_FILE_INPUT]; 552 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 553 ctx.info.file_count[TGSI_FILE_OUTPUT]; 554 555 /* Outside the GPR range. This will be translated to one of the 556 * kcache banks later. */ 557 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 558 559 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 560 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 561 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 562 563 ctx.nliterals = 0; 564 ctx.literals = NULL; 565 566 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 567 tgsi_parse_token(&ctx.parse); 568 switch (ctx.parse.FullToken.Token.Type) { 569 case TGSI_TOKEN_TYPE_IMMEDIATE: 570 immediate = &ctx.parse.FullToken.FullImmediate; 571 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 572 if(ctx.literals == NULL) { 573 r = -ENOMEM; 574 goto out_err; 575 } 576 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 577 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 578 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 579 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 580 ctx.nliterals++; 581 break; 582 case TGSI_TOKEN_TYPE_DECLARATION: 583 r = tgsi_declaration(&ctx); 584 if (r) 585 goto out_err; 586 break; 587 case TGSI_TOKEN_TYPE_INSTRUCTION: 588 r = tgsi_is_supported(&ctx); 589 if (r) 590 goto out_err; 591 ctx.max_driver_temp_used = 0; 592 /* reserve first tmp for everyone */ 593 r600_get_temp(&ctx); 594 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 595 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) 596 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 597 else 598 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 599 r = ctx.inst_info->process(&ctx); 600 if (r) 601 goto out_err; 602 r = r600_bc_add_literal(ctx.bc, ctx.value); 603 if (r) 604 goto out_err; 605 break; 606 case TGSI_TOKEN_TYPE_PROPERTY: 607 break; 608 default: 609 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 610 r = -EINVAL; 611 goto out_err; 612 } 613 } 614 /* export output */ 615 noutput = shader->noutput; 616 for (i = 0, pos0 = 0; i < noutput; i++) { 617 memset(&output[i], 0, sizeof(struct r600_bc_output)); 618 output[i].gpr = shader->output[i].gpr; 619 output[i].elem_size = 3; 620 output[i].swizzle_x = 0; 621 output[i].swizzle_y = 1; 622 output[i].swizzle_z = 2; 623 output[i].swizzle_w = 3; 624 output[i].barrier = 1; 625 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 626 output[i].array_base = i - pos0; 627 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 628 switch (ctx.type) { 629 case TGSI_PROCESSOR_VERTEX: 630 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 631 output[i].array_base = 60; 632 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 633 /* position doesn't count in array_base */ 634 pos0++; 635 } 636 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 637 output[i].array_base = 61; 638 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 639 /* position doesn't count in array_base */ 640 pos0++; 641 } 642 break; 643 case TGSI_PROCESSOR_FRAGMENT: 644 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 645 output[i].array_base = shader->output[i].sid; 646 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 647 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 648 output[i].array_base = 61; 649 output[i].swizzle_x = 2; 650 output[i].swizzle_y = 7; 651 output[i].swizzle_z = output[i].swizzle_w = 7; 652 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 653 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 654 output[i].array_base = 61; 655 output[i].swizzle_x = 7; 656 output[i].swizzle_y = 1; 657 output[i].swizzle_z = output[i].swizzle_w = 7; 658 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 659 } else { 660 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 661 r = -EINVAL; 662 goto out_err; 663 } 664 break; 665 default: 666 R600_ERR("unsupported processor type %d\n", ctx.type); 667 r = -EINVAL; 668 goto out_err; 669 } 670 } 671 /* add fake param output for vertex shader if no param is exported */ 672 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 673 for (i = 0, pos0 = 0; i < noutput; i++) { 674 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 675 pos0 = 1; 676 break; 677 } 678 } 679 if (!pos0) { 680 memset(&output[i], 0, sizeof(struct r600_bc_output)); 681 output[i].gpr = 0; 682 output[i].elem_size = 3; 683 output[i].swizzle_x = 0; 684 output[i].swizzle_y = 1; 685 output[i].swizzle_z = 2; 686 output[i].swizzle_w = 3; 687 output[i].barrier = 1; 688 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 689 output[i].array_base = 0; 690 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 691 noutput++; 692 } 693 } 694 /* add fake pixel export */ 695 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 696 memset(&output[0], 0, sizeof(struct r600_bc_output)); 697 output[0].gpr = 0; 698 output[0].elem_size = 3; 699 output[0].swizzle_x = 7; 700 output[0].swizzle_y = 7; 701 output[0].swizzle_z = 7; 702 output[0].swizzle_w = 7; 703 output[0].barrier = 1; 704 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 705 output[0].array_base = 0; 706 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 707 noutput++; 708 } 709 /* set export done on last export of each type */ 710 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 711 if (i == (noutput - 1)) { 712 output[i].end_of_program = 1; 713 } 714 if (!(output_done & (1 << output[i].type))) { 715 output_done |= (1 << output[i].type); 716 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 717 } 718 } 719 /* add output to bytecode */ 720 for (i = 0; i < noutput; i++) { 721 r = r600_bc_add_output(ctx.bc, &output[i]); 722 if (r) 723 goto out_err; 724 } 725 free(ctx.literals); 726 tgsi_parse_free(&ctx.parse); 727 return 0; 728out_err: 729 free(ctx.literals); 730 tgsi_parse_free(&ctx.parse); 731 return r; 732} 733 734static int tgsi_unsupported(struct r600_shader_ctx *ctx) 735{ 736 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 737 return -EINVAL; 738} 739 740static int tgsi_end(struct r600_shader_ctx *ctx) 741{ 742 return 0; 743} 744 745static int tgsi_src(struct r600_shader_ctx *ctx, 746 const struct tgsi_full_src_register *tgsi_src, 747 struct r600_bc_alu_src *r600_src) 748{ 749 int index; 750 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 751 r600_src->sel = tgsi_src->Register.Index; 752 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 753 r600_src->sel = 0; 754 index = tgsi_src->Register.Index; 755 ctx->value[0] = ctx->literals[index * 4 + 0]; 756 ctx->value[1] = ctx->literals[index * 4 + 1]; 757 ctx->value[2] = ctx->literals[index * 4 + 2]; 758 ctx->value[3] = ctx->literals[index * 4 + 3]; 759 } 760 if (tgsi_src->Register.Indirect) 761 r600_src->rel = V_SQ_REL_RELATIVE; 762 r600_src->neg = tgsi_src->Register.Negate; 763 r600_src->abs = tgsi_src->Register.Absolute; 764 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 765 return 0; 766} 767 768static int tgsi_dst(struct r600_shader_ctx *ctx, 769 const struct tgsi_full_dst_register *tgsi_dst, 770 unsigned swizzle, 771 struct r600_bc_alu_dst *r600_dst) 772{ 773 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 774 775 r600_dst->sel = tgsi_dst->Register.Index; 776 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 777 r600_dst->chan = swizzle; 778 r600_dst->write = 1; 779 if (tgsi_dst->Register.Indirect) 780 r600_dst->rel = V_SQ_REL_RELATIVE; 781 if (inst->Instruction.Saturate) { 782 r600_dst->clamp = 1; 783 } 784 return 0; 785} 786 787static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 788{ 789 switch (swizzle) { 790 case 0: 791 return tgsi_src->Register.SwizzleX; 792 case 1: 793 return tgsi_src->Register.SwizzleY; 794 case 2: 795 return tgsi_src->Register.SwizzleZ; 796 case 3: 797 return tgsi_src->Register.SwizzleW; 798 default: 799 return 0; 800 } 801} 802 803static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 804{ 805 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 806 struct r600_bc_alu alu; 807 int i, j, k, nconst, r; 808 809 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 810 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 811 nconst++; 812 } 813 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 814 if (r) { 815 return r; 816 } 817 } 818 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 819 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 820 int treg = r600_get_temp(ctx); 821 for (k = 0; k < 4; k++) { 822 memset(&alu, 0, sizeof(struct r600_bc_alu)); 823 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 824 alu.src[0].sel = r600_src[i].sel; 825 alu.src[0].chan = k; 826 alu.src[0].rel = r600_src[i].rel; 827 alu.dst.sel = treg; 828 alu.dst.chan = k; 829 alu.dst.write = 1; 830 if (k == 3) 831 alu.last = 1; 832 r = r600_bc_add_alu(ctx->bc, &alu); 833 if (r) 834 return r; 835 } 836 r600_src[i].sel = treg; 837 r600_src[i].rel =0; 838 j--; 839 } 840 } 841 return 0; 842} 843 844/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 845static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 846{ 847 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 848 struct r600_bc_alu alu; 849 int i, j, k, nliteral, r; 850 851 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 852 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 853 nliteral++; 854 } 855 } 856 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 857 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 858 int treg = r600_get_temp(ctx); 859 for (k = 0; k < 4; k++) { 860 memset(&alu, 0, sizeof(struct r600_bc_alu)); 861 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 862 alu.src[0].sel = r600_src[i].sel; 863 alu.src[0].chan = k; 864 alu.dst.sel = treg; 865 alu.dst.chan = k; 866 alu.dst.write = 1; 867 if (k == 3) 868 alu.last = 1; 869 r = r600_bc_add_alu(ctx->bc, &alu); 870 if (r) 871 return r; 872 } 873 r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); 874 if (r) 875 return r; 876 r600_src[i].sel = treg; 877 j--; 878 } 879 } 880 return 0; 881} 882 883static int tgsi_last_instruction(unsigned writemask) 884{ 885 int i, lasti = 0; 886 887 for (i = 0; i < 4; i++) { 888 if (writemask & (1 << i)) { 889 lasti = i; 890 } 891 } 892 return lasti; 893} 894 895static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 896{ 897 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 898 struct r600_bc_alu_src r600_src[3]; 899 struct r600_bc_alu alu; 900 int i, j, r; 901 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 902 903 r = tgsi_split_constant(ctx, r600_src); 904 if (r) 905 return r; 906 r = tgsi_split_literal_constant(ctx, r600_src); 907 if (r) 908 return r; 909 for (i = 0; i < lasti + 1; i++) { 910 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 911 continue; 912 913 memset(&alu, 0, sizeof(struct r600_bc_alu)); 914 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 915 if (r) 916 return r; 917 918 alu.inst = ctx->inst_info->r600_opcode; 919 if (!swap) { 920 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 921 alu.src[j] = r600_src[j]; 922 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 923 } 924 } else { 925 alu.src[0] = r600_src[1]; 926 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 927 928 alu.src[1] = r600_src[0]; 929 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 930 } 931 /* handle some special cases */ 932 switch (ctx->inst_info->tgsi_opcode) { 933 case TGSI_OPCODE_SUB: 934 alu.src[1].neg = 1; 935 break; 936 case TGSI_OPCODE_ABS: 937 alu.src[0].abs = 1; 938 break; 939 default: 940 break; 941 } 942 if (i == lasti) { 943 alu.last = 1; 944 } 945 r = r600_bc_add_alu(ctx->bc, &alu); 946 if (r) 947 return r; 948 } 949 return 0; 950} 951 952static int tgsi_op2(struct r600_shader_ctx *ctx) 953{ 954 return tgsi_op2_s(ctx, 0); 955} 956 957static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 958{ 959 return tgsi_op2_s(ctx, 1); 960} 961 962/* 963 * r600 - trunc to -PI..PI range 964 * r700 - normalize by dividing by 2PI 965 * see fdo bug 27901 966 */ 967static int tgsi_setup_trig(struct r600_shader_ctx *ctx, 968 struct r600_bc_alu_src r600_src[3]) 969{ 970 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 971 int r, src0_chan; 972 uint32_t lit_vals[4]; 973 struct r600_bc_alu alu; 974 975 memset(lit_vals, 0, 4*4); 976 r = tgsi_split_constant(ctx, r600_src); 977 if (r) 978 return r; 979 r = tgsi_split_literal_constant(ctx, r600_src); 980 if (r) 981 return r; 982 983 src0_chan = tgsi_chan(&inst->Src[0], 0); 984 985 /* We are going to feed two literals to the MAD below, 986 * which means that if the first operand is a literal as well, 987 * we need to copy its value manually. 988 */ 989 if (r600_src[0].sel == V_SQ_ALU_SRC_LITERAL) { 990 unsigned index = inst->Src[0].Register.Index; 991 992 lit_vals[2] = ctx->literals[index * 4 + src0_chan]; 993 src0_chan = 2; 994 } 995 996 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 997 lit_vals[1] = fui(0.5f); 998 999 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1000 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1001 alu.is_op3 = 1; 1002 1003 alu.dst.chan = 0; 1004 alu.dst.sel = ctx->temp_reg; 1005 alu.dst.write = 1; 1006 1007 alu.src[0] = r600_src[0]; 1008 alu.src[0].chan = src0_chan; 1009 1010 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1011 alu.src[1].chan = 0; 1012 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1013 alu.src[2].chan = 1; 1014 alu.last = 1; 1015 r = r600_bc_add_alu(ctx->bc, &alu); 1016 if (r) 1017 return r; 1018 r = r600_bc_add_literal(ctx->bc, lit_vals); 1019 if (r) 1020 return r; 1021 1022 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1023 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1024 1025 alu.dst.chan = 0; 1026 alu.dst.sel = ctx->temp_reg; 1027 alu.dst.write = 1; 1028 1029 alu.src[0].sel = ctx->temp_reg; 1030 alu.src[0].chan = 0; 1031 alu.last = 1; 1032 r = r600_bc_add_alu(ctx->bc, &alu); 1033 if (r) 1034 return r; 1035 1036 if (ctx->bc->chiprev == CHIPREV_R600) { 1037 lit_vals[0] = fui(3.1415926535897f * 2.0f); 1038 lit_vals[1] = fui(-3.1415926535897f); 1039 } else { 1040 lit_vals[0] = fui(1.0f); 1041 lit_vals[1] = fui(-0.5f); 1042 } 1043 1044 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1045 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1046 alu.is_op3 = 1; 1047 1048 alu.dst.chan = 0; 1049 alu.dst.sel = ctx->temp_reg; 1050 alu.dst.write = 1; 1051 1052 alu.src[0].sel = ctx->temp_reg; 1053 alu.src[0].chan = 0; 1054 1055 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1056 alu.src[1].chan = 0; 1057 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1058 alu.src[2].chan = 1; 1059 alu.last = 1; 1060 r = r600_bc_add_alu(ctx->bc, &alu); 1061 if (r) 1062 return r; 1063 r = r600_bc_add_literal(ctx->bc, lit_vals); 1064 if (r) 1065 return r; 1066 return 0; 1067} 1068 1069static int tgsi_trig(struct r600_shader_ctx *ctx) 1070{ 1071 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1072 struct r600_bc_alu_src r600_src[3]; 1073 struct r600_bc_alu alu; 1074 int i, r; 1075 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1076 1077 r = tgsi_setup_trig(ctx, r600_src); 1078 if (r) 1079 return r; 1080 1081 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1082 alu.inst = ctx->inst_info->r600_opcode; 1083 alu.dst.chan = 0; 1084 alu.dst.sel = ctx->temp_reg; 1085 alu.dst.write = 1; 1086 1087 alu.src[0].sel = ctx->temp_reg; 1088 alu.src[0].chan = 0; 1089 alu.last = 1; 1090 r = r600_bc_add_alu(ctx->bc, &alu); 1091 if (r) 1092 return r; 1093 1094 /* replicate result */ 1095 for (i = 0; i < lasti + 1; i++) { 1096 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1097 continue; 1098 1099 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1100 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1101 1102 alu.src[0].sel = ctx->temp_reg; 1103 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1104 if (r) 1105 return r; 1106 if (i == lasti) 1107 alu.last = 1; 1108 r = r600_bc_add_alu(ctx->bc, &alu); 1109 if (r) 1110 return r; 1111 } 1112 return 0; 1113} 1114 1115static int tgsi_scs(struct r600_shader_ctx *ctx) 1116{ 1117 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1118 struct r600_bc_alu_src r600_src[3]; 1119 struct r600_bc_alu alu; 1120 int r; 1121 1122 /* We'll only need the trig stuff if we are going to write to the 1123 * X or Y components of the destination vector. 1124 */ 1125 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1126 r = tgsi_setup_trig(ctx, r600_src); 1127 if (r) 1128 return r; 1129 } 1130 1131 /* dst.x = COS */ 1132 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1133 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1134 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1135 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1136 if (r) 1137 return r; 1138 1139 alu.src[0].sel = ctx->temp_reg; 1140 alu.src[0].chan = 0; 1141 alu.last = 1; 1142 r = r600_bc_add_alu(ctx->bc, &alu); 1143 if (r) 1144 return r; 1145 } 1146 1147 /* dst.y = SIN */ 1148 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1149 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1150 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1151 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1152 if (r) 1153 return r; 1154 1155 alu.src[0].sel = ctx->temp_reg; 1156 alu.src[0].chan = 0; 1157 alu.last = 1; 1158 r = r600_bc_add_alu(ctx->bc, &alu); 1159 if (r) 1160 return r; 1161 } 1162 1163 /* dst.z = 0.0; */ 1164 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1165 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1166 1167 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1168 1169 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1170 if (r) 1171 return r; 1172 1173 alu.src[0].sel = V_SQ_ALU_SRC_0; 1174 alu.src[0].chan = 0; 1175 1176 alu.last = 1; 1177 1178 r = r600_bc_add_alu(ctx->bc, &alu); 1179 if (r) 1180 return r; 1181 1182 r = r600_bc_add_literal(ctx->bc, ctx->value); 1183 if (r) 1184 return r; 1185 } 1186 1187 /* dst.w = 1.0; */ 1188 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1189 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1190 1191 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1192 1193 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1194 if (r) 1195 return r; 1196 1197 alu.src[0].sel = V_SQ_ALU_SRC_1; 1198 alu.src[0].chan = 0; 1199 1200 alu.last = 1; 1201 1202 r = r600_bc_add_alu(ctx->bc, &alu); 1203 if (r) 1204 return r; 1205 1206 r = r600_bc_add_literal(ctx->bc, ctx->value); 1207 if (r) 1208 return r; 1209 } 1210 1211 return 0; 1212} 1213 1214static int tgsi_kill(struct r600_shader_ctx *ctx) 1215{ 1216 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1217 struct r600_bc_alu alu; 1218 int i, r; 1219 1220 for (i = 0; i < 4; i++) { 1221 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1222 alu.inst = ctx->inst_info->r600_opcode; 1223 1224 alu.dst.chan = i; 1225 1226 alu.src[0].sel = V_SQ_ALU_SRC_0; 1227 1228 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1229 alu.src[1].sel = V_SQ_ALU_SRC_1; 1230 alu.src[1].neg = 1; 1231 } else { 1232 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1233 if (r) 1234 return r; 1235 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1236 } 1237 if (i == 3) { 1238 alu.last = 1; 1239 } 1240 r = r600_bc_add_alu(ctx->bc, &alu); 1241 if (r) 1242 return r; 1243 } 1244 r = r600_bc_add_literal(ctx->bc, ctx->value); 1245 if (r) 1246 return r; 1247 1248 /* kill must be last in ALU */ 1249 ctx->bc->force_add_cf = 1; 1250 ctx->shader->uses_kill = TRUE; 1251 return 0; 1252} 1253 1254static int tgsi_lit(struct r600_shader_ctx *ctx) 1255{ 1256 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1257 struct r600_bc_alu alu; 1258 struct r600_bc_alu_src r600_src[3]; 1259 int r; 1260 1261 r = tgsi_split_constant(ctx, r600_src); 1262 if (r) 1263 return r; 1264 r = tgsi_split_literal_constant(ctx, r600_src); 1265 if (r) 1266 return r; 1267 1268 /* dst.x, <- 1.0 */ 1269 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1270 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1271 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1272 alu.src[0].chan = 0; 1273 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1274 if (r) 1275 return r; 1276 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1277 r = r600_bc_add_alu(ctx->bc, &alu); 1278 if (r) 1279 return r; 1280 1281 /* dst.y = max(src.x, 0.0) */ 1282 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1283 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1284 alu.src[0] = r600_src[0]; 1285 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1286 alu.src[1].chan = 0; 1287 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1288 if (r) 1289 return r; 1290 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1291 r = r600_bc_add_alu(ctx->bc, &alu); 1292 if (r) 1293 return r; 1294 1295 /* dst.w, <- 1.0 */ 1296 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1297 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1298 alu.src[0].sel = V_SQ_ALU_SRC_1; 1299 alu.src[0].chan = 0; 1300 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1301 if (r) 1302 return r; 1303 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1304 alu.last = 1; 1305 r = r600_bc_add_alu(ctx->bc, &alu); 1306 if (r) 1307 return r; 1308 1309 r = r600_bc_add_literal(ctx->bc, ctx->value); 1310 if (r) 1311 return r; 1312 1313 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1314 { 1315 int chan; 1316 int sel; 1317 1318 /* dst.z = log(src.y) */ 1319 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1320 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1321 alu.src[0] = r600_src[0]; 1322 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1323 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1324 if (r) 1325 return r; 1326 alu.last = 1; 1327 r = r600_bc_add_alu(ctx->bc, &alu); 1328 if (r) 1329 return r; 1330 1331 r = r600_bc_add_literal(ctx->bc, ctx->value); 1332 if (r) 1333 return r; 1334 1335 chan = alu.dst.chan; 1336 sel = alu.dst.sel; 1337 1338 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1339 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1340 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1341 alu.src[0] = r600_src[0]; 1342 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1343 alu.src[1].sel = sel; 1344 alu.src[1].chan = chan; 1345 1346 alu.src[2] = r600_src[0]; 1347 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 1348 alu.dst.sel = ctx->temp_reg; 1349 alu.dst.chan = 0; 1350 alu.dst.write = 1; 1351 alu.is_op3 = 1; 1352 alu.last = 1; 1353 r = r600_bc_add_alu(ctx->bc, &alu); 1354 if (r) 1355 return r; 1356 1357 r = r600_bc_add_literal(ctx->bc, ctx->value); 1358 if (r) 1359 return r; 1360 /* dst.z = exp(tmp.x) */ 1361 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1362 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1363 alu.src[0].sel = ctx->temp_reg; 1364 alu.src[0].chan = 0; 1365 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1366 if (r) 1367 return r; 1368 alu.last = 1; 1369 r = r600_bc_add_alu(ctx->bc, &alu); 1370 if (r) 1371 return r; 1372 } 1373 return 0; 1374} 1375 1376static int tgsi_rsq(struct r600_shader_ctx *ctx) 1377{ 1378 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1379 struct r600_bc_alu alu; 1380 int i, r; 1381 1382 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1383 1384 /* FIXME: 1385 * For state trackers other than OpenGL, we'll want to use 1386 * _RECIPSQRT_IEEE instead. 1387 */ 1388 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1389 1390 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1391 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1392 if (r) 1393 return r; 1394 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1395 alu.src[i].abs = 1; 1396 } 1397 alu.dst.sel = ctx->temp_reg; 1398 alu.dst.write = 1; 1399 alu.last = 1; 1400 r = r600_bc_add_alu(ctx->bc, &alu); 1401 if (r) 1402 return r; 1403 r = r600_bc_add_literal(ctx->bc, ctx->value); 1404 if (r) 1405 return r; 1406 /* replicate result */ 1407 return tgsi_helper_tempx_replicate(ctx); 1408} 1409 1410static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1411{ 1412 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1413 struct r600_bc_alu alu; 1414 int i, r; 1415 1416 for (i = 0; i < 4; i++) { 1417 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1418 alu.src[0].sel = ctx->temp_reg; 1419 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1420 alu.dst.chan = i; 1421 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1422 if (r) 1423 return r; 1424 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1425 if (i == 3) 1426 alu.last = 1; 1427 r = r600_bc_add_alu(ctx->bc, &alu); 1428 if (r) 1429 return r; 1430 } 1431 return 0; 1432} 1433 1434static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1435{ 1436 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1437 struct r600_bc_alu alu; 1438 int i, r; 1439 1440 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1441 alu.inst = ctx->inst_info->r600_opcode; 1442 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1443 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1444 if (r) 1445 return r; 1446 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1447 } 1448 alu.dst.sel = ctx->temp_reg; 1449 alu.dst.write = 1; 1450 alu.last = 1; 1451 r = r600_bc_add_alu(ctx->bc, &alu); 1452 if (r) 1453 return r; 1454 r = r600_bc_add_literal(ctx->bc, ctx->value); 1455 if (r) 1456 return r; 1457 /* replicate result */ 1458 return tgsi_helper_tempx_replicate(ctx); 1459} 1460 1461static int tgsi_pow(struct r600_shader_ctx *ctx) 1462{ 1463 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1464 struct r600_bc_alu alu; 1465 int r; 1466 1467 /* LOG2(a) */ 1468 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1469 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1470 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1471 if (r) 1472 return r; 1473 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1474 alu.dst.sel = ctx->temp_reg; 1475 alu.dst.write = 1; 1476 alu.last = 1; 1477 r = r600_bc_add_alu(ctx->bc, &alu); 1478 if (r) 1479 return r; 1480 r = r600_bc_add_literal(ctx->bc,ctx->value); 1481 if (r) 1482 return r; 1483 /* b * LOG2(a) */ 1484 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1485 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1486 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1487 if (r) 1488 return r; 1489 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1490 alu.src[1].sel = ctx->temp_reg; 1491 alu.dst.sel = ctx->temp_reg; 1492 alu.dst.write = 1; 1493 alu.last = 1; 1494 r = r600_bc_add_alu(ctx->bc, &alu); 1495 if (r) 1496 return r; 1497 r = r600_bc_add_literal(ctx->bc,ctx->value); 1498 if (r) 1499 return r; 1500 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1501 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1502 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1503 alu.src[0].sel = ctx->temp_reg; 1504 alu.dst.sel = ctx->temp_reg; 1505 alu.dst.write = 1; 1506 alu.last = 1; 1507 r = r600_bc_add_alu(ctx->bc, &alu); 1508 if (r) 1509 return r; 1510 r = r600_bc_add_literal(ctx->bc,ctx->value); 1511 if (r) 1512 return r; 1513 return tgsi_helper_tempx_replicate(ctx); 1514} 1515 1516static int tgsi_ssg(struct r600_shader_ctx *ctx) 1517{ 1518 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1519 struct r600_bc_alu alu; 1520 struct r600_bc_alu_src r600_src[3]; 1521 int i, r; 1522 1523 r = tgsi_split_constant(ctx, r600_src); 1524 if (r) 1525 return r; 1526 r = tgsi_split_literal_constant(ctx, r600_src); 1527 if (r) 1528 return r; 1529 1530 /* tmp = (src > 0 ? 1 : src) */ 1531 for (i = 0; i < 4; i++) { 1532 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1533 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1534 alu.is_op3 = 1; 1535 1536 alu.dst.sel = ctx->temp_reg; 1537 alu.dst.chan = i; 1538 1539 alu.src[0] = r600_src[0]; 1540 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1541 1542 alu.src[1].sel = V_SQ_ALU_SRC_1; 1543 1544 alu.src[2] = r600_src[0]; 1545 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1546 if (i == 3) 1547 alu.last = 1; 1548 r = r600_bc_add_alu(ctx->bc, &alu); 1549 if (r) 1550 return r; 1551 } 1552 r = r600_bc_add_literal(ctx->bc, ctx->value); 1553 if (r) 1554 return r; 1555 1556 /* dst = (-tmp > 0 ? -1 : tmp) */ 1557 for (i = 0; i < 4; i++) { 1558 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1559 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1560 alu.is_op3 = 1; 1561 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1562 if (r) 1563 return r; 1564 1565 alu.src[0].sel = ctx->temp_reg; 1566 alu.src[0].chan = i; 1567 alu.src[0].neg = 1; 1568 1569 alu.src[1].sel = V_SQ_ALU_SRC_1; 1570 alu.src[1].neg = 1; 1571 1572 alu.src[2].sel = ctx->temp_reg; 1573 alu.src[2].chan = i; 1574 1575 if (i == 3) 1576 alu.last = 1; 1577 r = r600_bc_add_alu(ctx->bc, &alu); 1578 if (r) 1579 return r; 1580 } 1581 return 0; 1582} 1583 1584static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1585{ 1586 struct r600_bc_alu alu; 1587 int i, r; 1588 1589 r = r600_bc_add_literal(ctx->bc, ctx->value); 1590 if (r) 1591 return r; 1592 for (i = 0; i < 4; i++) { 1593 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1594 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1595 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1596 alu.dst.chan = i; 1597 } else { 1598 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1599 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1600 if (r) 1601 return r; 1602 alu.src[0].sel = ctx->temp_reg; 1603 alu.src[0].chan = i; 1604 } 1605 if (i == 3) { 1606 alu.last = 1; 1607 } 1608 r = r600_bc_add_alu(ctx->bc, &alu); 1609 if (r) 1610 return r; 1611 } 1612 return 0; 1613} 1614 1615static int tgsi_op3(struct r600_shader_ctx *ctx) 1616{ 1617 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1618 struct r600_bc_alu_src r600_src[3]; 1619 struct r600_bc_alu alu; 1620 int i, j, r; 1621 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1622 1623 r = tgsi_split_constant(ctx, r600_src); 1624 if (r) 1625 return r; 1626 r = tgsi_split_literal_constant(ctx, r600_src); 1627 if (r) 1628 return r; 1629 for (i = 0; i < lasti + 1; i++) { 1630 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1631 continue; 1632 1633 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1634 alu.inst = ctx->inst_info->r600_opcode; 1635 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1636 alu.src[j] = r600_src[j]; 1637 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1638 } 1639 1640 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1641 if (r) 1642 return r; 1643 1644 alu.dst.chan = i; 1645 alu.dst.write = 1; 1646 alu.is_op3 = 1; 1647 if (i == lasti) { 1648 alu.last = 1; 1649 } 1650 r = r600_bc_add_alu(ctx->bc, &alu); 1651 if (r) 1652 return r; 1653 } 1654 return 0; 1655} 1656 1657static int tgsi_dp(struct r600_shader_ctx *ctx) 1658{ 1659 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1660 struct r600_bc_alu_src r600_src[3]; 1661 struct r600_bc_alu alu; 1662 int i, j, r; 1663 1664 r = tgsi_split_constant(ctx, r600_src); 1665 if (r) 1666 return r; 1667 r = tgsi_split_literal_constant(ctx, r600_src); 1668 if (r) 1669 return r; 1670 for (i = 0; i < 4; i++) { 1671 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1672 alu.inst = ctx->inst_info->r600_opcode; 1673 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1674 alu.src[j] = r600_src[j]; 1675 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1676 } 1677 1678 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1679 if (r) 1680 return r; 1681 1682 alu.dst.chan = i; 1683 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1684 /* handle some special cases */ 1685 switch (ctx->inst_info->tgsi_opcode) { 1686 case TGSI_OPCODE_DP2: 1687 if (i > 1) { 1688 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1689 alu.src[0].chan = alu.src[1].chan = 0; 1690 } 1691 break; 1692 case TGSI_OPCODE_DP3: 1693 if (i > 2) { 1694 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1695 alu.src[0].chan = alu.src[1].chan = 0; 1696 } 1697 break; 1698 case TGSI_OPCODE_DPH: 1699 if (i == 3) { 1700 alu.src[0].sel = V_SQ_ALU_SRC_1; 1701 alu.src[0].chan = 0; 1702 alu.src[0].neg = 0; 1703 } 1704 break; 1705 default: 1706 break; 1707 } 1708 if (i == 3) { 1709 alu.last = 1; 1710 } 1711 r = r600_bc_add_alu(ctx->bc, &alu); 1712 if (r) 1713 return r; 1714 } 1715 return 0; 1716} 1717 1718static int tgsi_tex(struct r600_shader_ctx *ctx) 1719{ 1720 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1721 struct r600_bc_tex tex; 1722 struct r600_bc_alu alu; 1723 unsigned src_gpr; 1724 int r, i; 1725 int opcode; 1726 boolean src_not_temp = 1727 inst->Src[0].Register.File != TGSI_FILE_TEMPORARY && 1728 inst->Src[0].Register.File != TGSI_FILE_INPUT; 1729 uint32_t lit_vals[4]; 1730 1731 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1732 1733 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1734 /* Add perspective divide */ 1735 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1736 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1737 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1738 if (r) 1739 return r; 1740 1741 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1742 alu.dst.sel = ctx->temp_reg; 1743 alu.dst.chan = 3; 1744 alu.last = 1; 1745 alu.dst.write = 1; 1746 r = r600_bc_add_alu(ctx->bc, &alu); 1747 if (r) 1748 return r; 1749 1750 for (i = 0; i < 3; i++) { 1751 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1752 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1753 alu.src[0].sel = ctx->temp_reg; 1754 alu.src[0].chan = 3; 1755 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1756 if (r) 1757 return r; 1758 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1759 alu.dst.sel = ctx->temp_reg; 1760 alu.dst.chan = i; 1761 alu.dst.write = 1; 1762 r = r600_bc_add_alu(ctx->bc, &alu); 1763 if (r) 1764 return r; 1765 } 1766 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1767 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1768 alu.src[0].sel = V_SQ_ALU_SRC_1; 1769 alu.src[0].chan = 0; 1770 alu.dst.sel = ctx->temp_reg; 1771 alu.dst.chan = 3; 1772 alu.last = 1; 1773 alu.dst.write = 1; 1774 r = r600_bc_add_alu(ctx->bc, &alu); 1775 if (r) 1776 return r; 1777 src_not_temp = FALSE; 1778 src_gpr = ctx->temp_reg; 1779 } 1780 1781 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1782 int src_chan, src2_chan; 1783 1784 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1785 for (i = 0; i < 4; i++) { 1786 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1787 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1788 switch (i) { 1789 case 0: 1790 src_chan = 2; 1791 src2_chan = 1; 1792 break; 1793 case 1: 1794 src_chan = 2; 1795 src2_chan = 0; 1796 break; 1797 case 2: 1798 src_chan = 0; 1799 src2_chan = 2; 1800 break; 1801 case 3: 1802 src_chan = 1; 1803 src2_chan = 2; 1804 break; 1805 default: 1806 assert(0); 1807 src_chan = 0; 1808 src2_chan = 0; 1809 break; 1810 } 1811 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1812 if (r) 1813 return r; 1814 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); 1815 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1816 if (r) 1817 return r; 1818 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); 1819 alu.dst.sel = ctx->temp_reg; 1820 alu.dst.chan = i; 1821 if (i == 3) 1822 alu.last = 1; 1823 alu.dst.write = 1; 1824 r = r600_bc_add_alu(ctx->bc, &alu); 1825 if (r) 1826 return r; 1827 } 1828 1829 /* tmp1.z = RCP_e(|tmp1.z|) */ 1830 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1831 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1832 alu.src[0].sel = ctx->temp_reg; 1833 alu.src[0].chan = 2; 1834 alu.src[0].abs = 1; 1835 alu.dst.sel = ctx->temp_reg; 1836 alu.dst.chan = 2; 1837 alu.dst.write = 1; 1838 alu.last = 1; 1839 r = r600_bc_add_alu(ctx->bc, &alu); 1840 if (r) 1841 return r; 1842 1843 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1844 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1845 * muladd has no writemask, have to use another temp 1846 */ 1847 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1848 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1849 alu.is_op3 = 1; 1850 1851 alu.src[0].sel = ctx->temp_reg; 1852 alu.src[0].chan = 0; 1853 alu.src[1].sel = ctx->temp_reg; 1854 alu.src[1].chan = 2; 1855 1856 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1857 alu.src[2].chan = 0; 1858 1859 alu.dst.sel = ctx->temp_reg; 1860 alu.dst.chan = 0; 1861 alu.dst.write = 1; 1862 1863 r = r600_bc_add_alu(ctx->bc, &alu); 1864 if (r) 1865 return r; 1866 1867 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1868 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1869 alu.is_op3 = 1; 1870 1871 alu.src[0].sel = ctx->temp_reg; 1872 alu.src[0].chan = 1; 1873 alu.src[1].sel = ctx->temp_reg; 1874 alu.src[1].chan = 2; 1875 1876 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1877 alu.src[2].chan = 0; 1878 1879 alu.dst.sel = ctx->temp_reg; 1880 alu.dst.chan = 1; 1881 alu.dst.write = 1; 1882 1883 alu.last = 1; 1884 r = r600_bc_add_alu(ctx->bc, &alu); 1885 if (r) 1886 return r; 1887 1888 lit_vals[0] = fui(1.5f); 1889 1890 r = r600_bc_add_literal(ctx->bc, lit_vals); 1891 if (r) 1892 return r; 1893 src_not_temp = FALSE; 1894 src_gpr = ctx->temp_reg; 1895 } 1896 1897 if (src_not_temp) { 1898 for (i = 0; i < 4; i++) { 1899 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1900 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1901 alu.src[0].sel = src_gpr; 1902 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1903 alu.dst.sel = ctx->temp_reg; 1904 alu.dst.chan = i; 1905 if (i == 3) 1906 alu.last = 1; 1907 alu.dst.write = 1; 1908 r = r600_bc_add_alu(ctx->bc, &alu); 1909 if (r) 1910 return r; 1911 } 1912 src_gpr = ctx->temp_reg; 1913 } 1914 1915 opcode = ctx->inst_info->r600_opcode; 1916 if (opcode == SQ_TEX_INST_SAMPLE && 1917 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1918 opcode = SQ_TEX_INST_SAMPLE_C; 1919 1920 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1921 tex.inst = opcode; 1922 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1923 tex.resource_id = tex.sampler_id; 1924 tex.src_gpr = src_gpr; 1925 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1926 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 1927 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 1928 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 1929 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 1930 tex.src_sel_x = 0; 1931 tex.src_sel_y = 1; 1932 tex.src_sel_z = 2; 1933 tex.src_sel_w = 3; 1934 1935 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1936 tex.src_sel_x = 1; 1937 tex.src_sel_y = 0; 1938 tex.src_sel_z = 3; 1939 tex.src_sel_w = 1; 1940 } 1941 1942 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1943 tex.coord_type_x = 1; 1944 tex.coord_type_y = 1; 1945 tex.coord_type_z = 1; 1946 tex.coord_type_w = 1; 1947 } 1948 1949 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 1950 tex.src_sel_w = 2; 1951 1952 r = r600_bc_add_tex(ctx->bc, &tex); 1953 if (r) 1954 return r; 1955 1956 /* add shadow ambient support - gallium doesn't do it yet */ 1957 return 0; 1958} 1959 1960static int tgsi_lrp(struct r600_shader_ctx *ctx) 1961{ 1962 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1963 struct r600_bc_alu_src r600_src[3]; 1964 struct r600_bc_alu alu; 1965 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 1966 unsigned i; 1967 int r; 1968 1969 r = tgsi_split_constant(ctx, r600_src); 1970 if (r) 1971 return r; 1972 r = tgsi_split_literal_constant(ctx, r600_src); 1973 if (r) 1974 return r; 1975 /* 1 - src0 */ 1976 for (i = 0; i < lasti + 1; i++) { 1977 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1978 continue; 1979 1980 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1981 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1982 alu.src[0].sel = V_SQ_ALU_SRC_1; 1983 alu.src[0].chan = 0; 1984 alu.src[1] = r600_src[0]; 1985 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1986 alu.src[1].neg = 1; 1987 alu.dst.sel = ctx->temp_reg; 1988 alu.dst.chan = i; 1989 if (i == lasti) { 1990 alu.last = 1; 1991 } 1992 alu.dst.write = 1; 1993 r = r600_bc_add_alu(ctx->bc, &alu); 1994 if (r) 1995 return r; 1996 } 1997 r = r600_bc_add_literal(ctx->bc, ctx->value); 1998 if (r) 1999 return r; 2000 2001 /* (1 - src0) * src2 */ 2002 for (i = 0; i < lasti + 1; i++) { 2003 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2004 continue; 2005 2006 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2007 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2008 alu.src[0].sel = ctx->temp_reg; 2009 alu.src[0].chan = i; 2010 alu.src[1] = r600_src[2]; 2011 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2012 alu.dst.sel = ctx->temp_reg; 2013 alu.dst.chan = i; 2014 if (i == lasti) { 2015 alu.last = 1; 2016 } 2017 alu.dst.write = 1; 2018 r = r600_bc_add_alu(ctx->bc, &alu); 2019 if (r) 2020 return r; 2021 } 2022 r = r600_bc_add_literal(ctx->bc, ctx->value); 2023 if (r) 2024 return r; 2025 2026 /* src0 * src1 + (1 - src0) * src2 */ 2027 for (i = 0; i < lasti + 1; i++) { 2028 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2029 continue; 2030 2031 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2032 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2033 alu.is_op3 = 1; 2034 alu.src[0] = r600_src[0]; 2035 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2036 alu.src[1] = r600_src[1]; 2037 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2038 alu.src[2].sel = ctx->temp_reg; 2039 alu.src[2].chan = i; 2040 2041 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2042 if (r) 2043 return r; 2044 2045 alu.dst.chan = i; 2046 if (i == lasti) { 2047 alu.last = 1; 2048 } 2049 r = r600_bc_add_alu(ctx->bc, &alu); 2050 if (r) 2051 return r; 2052 } 2053 return 0; 2054} 2055 2056static int tgsi_cmp(struct r600_shader_ctx *ctx) 2057{ 2058 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2059 struct r600_bc_alu_src r600_src[3]; 2060 struct r600_bc_alu alu; 2061 int i, r; 2062 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 2063 2064 r = tgsi_split_constant(ctx, r600_src); 2065 if (r) 2066 return r; 2067 r = tgsi_split_literal_constant(ctx, r600_src); 2068 if (r) 2069 return r; 2070 2071 for (i = 0; i < lasti + 1; i++) { 2072 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 2073 continue; 2074 2075 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2076 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2077 alu.src[0] = r600_src[0]; 2078 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2079 2080 alu.src[1] = r600_src[2]; 2081 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2082 2083 alu.src[2] = r600_src[1]; 2084 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 2085 2086 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2087 if (r) 2088 return r; 2089 2090 alu.dst.chan = i; 2091 alu.dst.write = 1; 2092 alu.is_op3 = 1; 2093 if (i == lasti) 2094 alu.last = 1; 2095 r = r600_bc_add_alu(ctx->bc, &alu); 2096 if (r) 2097 return r; 2098 } 2099 return 0; 2100} 2101 2102static int tgsi_xpd(struct r600_shader_ctx *ctx) 2103{ 2104 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2105 struct r600_bc_alu_src r600_src[3]; 2106 struct r600_bc_alu alu; 2107 uint32_t use_temp = 0; 2108 int i, r; 2109 2110 if (inst->Dst[0].Register.WriteMask != 0xf) 2111 use_temp = 1; 2112 2113 r = tgsi_split_constant(ctx, r600_src); 2114 if (r) 2115 return r; 2116 r = tgsi_split_literal_constant(ctx, r600_src); 2117 if (r) 2118 return r; 2119 2120 for (i = 0; i < 4; i++) { 2121 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2122 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2123 2124 alu.src[0] = r600_src[0]; 2125 switch (i) { 2126 case 0: 2127 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2128 break; 2129 case 1: 2130 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2131 break; 2132 case 2: 2133 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2134 break; 2135 case 3: 2136 alu.src[0].sel = V_SQ_ALU_SRC_0; 2137 alu.src[0].chan = i; 2138 } 2139 2140 alu.src[1] = r600_src[1]; 2141 switch (i) { 2142 case 0: 2143 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2144 break; 2145 case 1: 2146 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2147 break; 2148 case 2: 2149 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2150 break; 2151 case 3: 2152 alu.src[1].sel = V_SQ_ALU_SRC_0; 2153 alu.src[1].chan = i; 2154 } 2155 2156 alu.dst.sel = ctx->temp_reg; 2157 alu.dst.chan = i; 2158 alu.dst.write = 1; 2159 2160 if (i == 3) 2161 alu.last = 1; 2162 r = r600_bc_add_alu(ctx->bc, &alu); 2163 if (r) 2164 return r; 2165 2166 r = r600_bc_add_literal(ctx->bc, ctx->value); 2167 if (r) 2168 return r; 2169 } 2170 2171 for (i = 0; i < 4; i++) { 2172 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2173 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2174 2175 alu.src[0] = r600_src[0]; 2176 switch (i) { 2177 case 0: 2178 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2179 break; 2180 case 1: 2181 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2182 break; 2183 case 2: 2184 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2185 break; 2186 case 3: 2187 alu.src[0].sel = V_SQ_ALU_SRC_0; 2188 alu.src[0].chan = i; 2189 } 2190 2191 alu.src[1] = r600_src[1]; 2192 switch (i) { 2193 case 0: 2194 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2195 break; 2196 case 1: 2197 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2198 break; 2199 case 2: 2200 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2201 break; 2202 case 3: 2203 alu.src[1].sel = V_SQ_ALU_SRC_0; 2204 alu.src[1].chan = i; 2205 } 2206 2207 alu.src[2].sel = ctx->temp_reg; 2208 alu.src[2].neg = 1; 2209 alu.src[2].chan = i; 2210 2211 if (use_temp) 2212 alu.dst.sel = ctx->temp_reg; 2213 else { 2214 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2215 if (r) 2216 return r; 2217 } 2218 alu.dst.chan = i; 2219 alu.dst.write = 1; 2220 alu.is_op3 = 1; 2221 if (i == 3) 2222 alu.last = 1; 2223 r = r600_bc_add_alu(ctx->bc, &alu); 2224 if (r) 2225 return r; 2226 2227 r = r600_bc_add_literal(ctx->bc, ctx->value); 2228 if (r) 2229 return r; 2230 } 2231 if (use_temp) 2232 return tgsi_helper_copy(ctx, inst); 2233 return 0; 2234} 2235 2236static int tgsi_exp(struct r600_shader_ctx *ctx) 2237{ 2238 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2239 struct r600_bc_alu_src r600_src[3] = { { 0 } }; 2240 struct r600_bc_alu alu; 2241 int r; 2242 2243 /* result.x = 2^floor(src); */ 2244 if (inst->Dst[0].Register.WriteMask & 1) { 2245 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2246 2247 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2248 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2249 if (r) 2250 return r; 2251 2252 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2253 2254 alu.dst.sel = ctx->temp_reg; 2255 alu.dst.chan = 0; 2256 alu.dst.write = 1; 2257 alu.last = 1; 2258 r = r600_bc_add_alu(ctx->bc, &alu); 2259 if (r) 2260 return r; 2261 2262 r = r600_bc_add_literal(ctx->bc, ctx->value); 2263 if (r) 2264 return r; 2265 2266 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2267 alu.src[0].sel = ctx->temp_reg; 2268 alu.src[0].chan = 0; 2269 2270 alu.dst.sel = ctx->temp_reg; 2271 alu.dst.chan = 0; 2272 alu.dst.write = 1; 2273 alu.last = 1; 2274 r = r600_bc_add_alu(ctx->bc, &alu); 2275 if (r) 2276 return r; 2277 2278 r = r600_bc_add_literal(ctx->bc, ctx->value); 2279 if (r) 2280 return r; 2281 } 2282 2283 /* result.y = tmp - floor(tmp); */ 2284 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2285 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2286 2287 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2288 alu.src[0] = r600_src[0]; 2289 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2290 if (r) 2291 return r; 2292 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2293 2294 alu.dst.sel = ctx->temp_reg; 2295// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2296// if (r) 2297// return r; 2298 alu.dst.write = 1; 2299 alu.dst.chan = 1; 2300 2301 alu.last = 1; 2302 2303 r = r600_bc_add_alu(ctx->bc, &alu); 2304 if (r) 2305 return r; 2306 r = r600_bc_add_literal(ctx->bc, ctx->value); 2307 if (r) 2308 return r; 2309 } 2310 2311 /* result.z = RoughApprox2ToX(tmp);*/ 2312 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2313 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2314 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2315 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2316 if (r) 2317 return r; 2318 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2319 2320 alu.dst.sel = ctx->temp_reg; 2321 alu.dst.write = 1; 2322 alu.dst.chan = 2; 2323 2324 alu.last = 1; 2325 2326 r = r600_bc_add_alu(ctx->bc, &alu); 2327 if (r) 2328 return r; 2329 r = r600_bc_add_literal(ctx->bc, ctx->value); 2330 if (r) 2331 return r; 2332 } 2333 2334 /* result.w = 1.0;*/ 2335 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2336 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2337 2338 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2339 alu.src[0].sel = V_SQ_ALU_SRC_1; 2340 alu.src[0].chan = 0; 2341 2342 alu.dst.sel = ctx->temp_reg; 2343 alu.dst.chan = 3; 2344 alu.dst.write = 1; 2345 alu.last = 1; 2346 r = r600_bc_add_alu(ctx->bc, &alu); 2347 if (r) 2348 return r; 2349 r = r600_bc_add_literal(ctx->bc, ctx->value); 2350 if (r) 2351 return r; 2352 } 2353 return tgsi_helper_copy(ctx, inst); 2354} 2355 2356static int tgsi_log(struct r600_shader_ctx *ctx) 2357{ 2358 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2359 struct r600_bc_alu alu; 2360 int r; 2361 2362 /* result.x = floor(log2(src)); */ 2363 if (inst->Dst[0].Register.WriteMask & 1) { 2364 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2365 2366 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2367 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2368 if (r) 2369 return r; 2370 2371 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2372 2373 alu.dst.sel = ctx->temp_reg; 2374 alu.dst.chan = 0; 2375 alu.dst.write = 1; 2376 alu.last = 1; 2377 r = r600_bc_add_alu(ctx->bc, &alu); 2378 if (r) 2379 return r; 2380 2381 r = r600_bc_add_literal(ctx->bc, ctx->value); 2382 if (r) 2383 return r; 2384 2385 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2386 alu.src[0].sel = ctx->temp_reg; 2387 alu.src[0].chan = 0; 2388 2389 alu.dst.sel = ctx->temp_reg; 2390 alu.dst.chan = 0; 2391 alu.dst.write = 1; 2392 alu.last = 1; 2393 2394 r = r600_bc_add_alu(ctx->bc, &alu); 2395 if (r) 2396 return r; 2397 2398 r = r600_bc_add_literal(ctx->bc, ctx->value); 2399 if (r) 2400 return r; 2401 } 2402 2403 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2404 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2405 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2406 2407 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2408 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2409 if (r) 2410 return r; 2411 2412 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2413 2414 alu.dst.sel = ctx->temp_reg; 2415 alu.dst.chan = 1; 2416 alu.dst.write = 1; 2417 alu.last = 1; 2418 2419 r = r600_bc_add_alu(ctx->bc, &alu); 2420 if (r) 2421 return r; 2422 2423 r = r600_bc_add_literal(ctx->bc, ctx->value); 2424 if (r) 2425 return r; 2426 2427 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2428 2429 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2430 alu.src[0].sel = ctx->temp_reg; 2431 alu.src[0].chan = 1; 2432 2433 alu.dst.sel = ctx->temp_reg; 2434 alu.dst.chan = 1; 2435 alu.dst.write = 1; 2436 alu.last = 1; 2437 2438 r = r600_bc_add_alu(ctx->bc, &alu); 2439 if (r) 2440 return r; 2441 2442 r = r600_bc_add_literal(ctx->bc, ctx->value); 2443 if (r) 2444 return r; 2445 2446 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2447 2448 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2449 alu.src[0].sel = ctx->temp_reg; 2450 alu.src[0].chan = 1; 2451 2452 alu.dst.sel = ctx->temp_reg; 2453 alu.dst.chan = 1; 2454 alu.dst.write = 1; 2455 alu.last = 1; 2456 2457 r = r600_bc_add_alu(ctx->bc, &alu); 2458 if (r) 2459 return r; 2460 2461 r = r600_bc_add_literal(ctx->bc, ctx->value); 2462 if (r) 2463 return r; 2464 2465 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2466 2467 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2468 alu.src[0].sel = ctx->temp_reg; 2469 alu.src[0].chan = 1; 2470 2471 alu.dst.sel = ctx->temp_reg; 2472 alu.dst.chan = 1; 2473 alu.dst.write = 1; 2474 alu.last = 1; 2475 2476 r = r600_bc_add_alu(ctx->bc, &alu); 2477 if (r) 2478 return r; 2479 2480 r = r600_bc_add_literal(ctx->bc, ctx->value); 2481 if (r) 2482 return r; 2483 2484 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2485 2486 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2487 2488 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2489 if (r) 2490 return r; 2491 2492 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2493 2494 alu.src[1].sel = ctx->temp_reg; 2495 alu.src[1].chan = 1; 2496 2497 alu.dst.sel = ctx->temp_reg; 2498 alu.dst.chan = 1; 2499 alu.dst.write = 1; 2500 alu.last = 1; 2501 2502 r = r600_bc_add_alu(ctx->bc, &alu); 2503 if (r) 2504 return r; 2505 2506 r = r600_bc_add_literal(ctx->bc, ctx->value); 2507 if (r) 2508 return r; 2509 } 2510 2511 /* result.z = log2(src);*/ 2512 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2513 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2514 2515 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2516 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2517 if (r) 2518 return r; 2519 2520 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2521 2522 alu.dst.sel = ctx->temp_reg; 2523 alu.dst.write = 1; 2524 alu.dst.chan = 2; 2525 alu.last = 1; 2526 2527 r = r600_bc_add_alu(ctx->bc, &alu); 2528 if (r) 2529 return r; 2530 2531 r = r600_bc_add_literal(ctx->bc, ctx->value); 2532 if (r) 2533 return r; 2534 } 2535 2536 /* result.w = 1.0; */ 2537 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2538 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2539 2540 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2541 alu.src[0].sel = V_SQ_ALU_SRC_1; 2542 alu.src[0].chan = 0; 2543 2544 alu.dst.sel = ctx->temp_reg; 2545 alu.dst.chan = 3; 2546 alu.dst.write = 1; 2547 alu.last = 1; 2548 2549 r = r600_bc_add_alu(ctx->bc, &alu); 2550 if (r) 2551 return r; 2552 2553 r = r600_bc_add_literal(ctx->bc, ctx->value); 2554 if (r) 2555 return r; 2556 } 2557 2558 return tgsi_helper_copy(ctx, inst); 2559} 2560 2561static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2562{ 2563 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2564 struct r600_bc_alu alu; 2565 int r; 2566 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2567 2568 switch (inst->Instruction.Opcode) { 2569 case TGSI_OPCODE_ARL: 2570 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2571 break; 2572 case TGSI_OPCODE_ARR: 2573 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2574 break; 2575 default: 2576 assert(0); 2577 return -1; 2578 } 2579 2580 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2581 if (r) 2582 return r; 2583 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2584 alu.last = 1; 2585 alu.dst.chan = 0; 2586 alu.dst.sel = ctx->temp_reg; 2587 alu.dst.write = 1; 2588 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2589 if (r) 2590 return r; 2591 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2592 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2593 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2594 if (r) 2595 return r; 2596 alu.src[0].sel = ctx->temp_reg; 2597 alu.src[0].chan = 0; 2598 alu.last = 1; 2599 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2600 if (r) 2601 return r; 2602 return 0; 2603} 2604static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2605{ 2606 /* TODO from r600c, ar values don't persist between clauses */ 2607 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2608 struct r600_bc_alu alu; 2609 int r; 2610 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2611 2612 switch (inst->Instruction.Opcode) { 2613 case TGSI_OPCODE_ARL: 2614 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; 2615 break; 2616 case TGSI_OPCODE_ARR: 2617 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; 2618 break; 2619 default: 2620 assert(0); 2621 return -1; 2622 } 2623 2624 2625 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2626 if (r) 2627 return r; 2628 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2629 2630 alu.last = 1; 2631 2632 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2633 if (r) 2634 return r; 2635 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2636 return 0; 2637} 2638 2639static int tgsi_opdst(struct r600_shader_ctx *ctx) 2640{ 2641 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2642 struct r600_bc_alu alu; 2643 int i, r = 0; 2644 2645 for (i = 0; i < 4; i++) { 2646 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2647 2648 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2649 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2650 if (r) 2651 return r; 2652 2653 if (i == 0 || i == 3) { 2654 alu.src[0].sel = V_SQ_ALU_SRC_1; 2655 } else { 2656 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2657 if (r) 2658 return r; 2659 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2660 } 2661 2662 if (i == 0 || i == 2) { 2663 alu.src[1].sel = V_SQ_ALU_SRC_1; 2664 } else { 2665 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); 2666 if (r) 2667 return r; 2668 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2669 } 2670 if (i == 3) 2671 alu.last = 1; 2672 r = r600_bc_add_alu(ctx->bc, &alu); 2673 if (r) 2674 return r; 2675 } 2676 return 0; 2677} 2678 2679static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2680{ 2681 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2682 struct r600_bc_alu alu; 2683 int r; 2684 2685 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2686 alu.inst = opcode; 2687 alu.predicate = 1; 2688 2689 alu.dst.sel = ctx->temp_reg; 2690 alu.dst.write = 1; 2691 alu.dst.chan = 0; 2692 2693 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2694 if (r) 2695 return r; 2696 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2697 alu.src[1].sel = V_SQ_ALU_SRC_0; 2698 alu.src[1].chan = 0; 2699 2700 alu.last = 1; 2701 2702 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2703 if (r) 2704 return r; 2705 return 0; 2706} 2707 2708static int pops(struct r600_shader_ctx *ctx, int pops) 2709{ 2710 int alu_pop = 3; 2711 if (ctx->bc->cf_last) { 2712 if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3) 2713 alu_pop = 0; 2714 else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3) 2715 alu_pop = 1; 2716 } 2717 alu_pop += pops; 2718 if (alu_pop == 1) { 2719 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3; 2720 ctx->bc->force_add_cf = 1; 2721 } else if (alu_pop == 2) { 2722 ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3; 2723 ctx->bc->force_add_cf = 1; 2724 } else { 2725 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2726 ctx->bc->cf_last->pop_count = pops; 2727 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 2728 } 2729 return 0; 2730} 2731 2732static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2733{ 2734 switch(reason) { 2735 case FC_PUSH_VPM: 2736 ctx->bc->callstack[ctx->bc->call_sp].current--; 2737 break; 2738 case FC_PUSH_WQM: 2739 case FC_LOOP: 2740 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2741 break; 2742 case FC_REP: 2743 /* TOODO : for 16 vp asic should -= 2; */ 2744 ctx->bc->callstack[ctx->bc->call_sp].current --; 2745 break; 2746 } 2747} 2748 2749static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2750{ 2751 if (check_max_only) { 2752 int diff; 2753 switch (reason) { 2754 case FC_PUSH_VPM: 2755 diff = 1; 2756 break; 2757 case FC_PUSH_WQM: 2758 diff = 4; 2759 break; 2760 default: 2761 assert(0); 2762 diff = 0; 2763 } 2764 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2765 ctx->bc->callstack[ctx->bc->call_sp].max) { 2766 ctx->bc->callstack[ctx->bc->call_sp].max = 2767 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2768 } 2769 return; 2770 } 2771 switch (reason) { 2772 case FC_PUSH_VPM: 2773 ctx->bc->callstack[ctx->bc->call_sp].current++; 2774 break; 2775 case FC_PUSH_WQM: 2776 case FC_LOOP: 2777 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2778 break; 2779 case FC_REP: 2780 ctx->bc->callstack[ctx->bc->call_sp].current++; 2781 break; 2782 } 2783 2784 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2785 ctx->bc->callstack[ctx->bc->call_sp].max) { 2786 ctx->bc->callstack[ctx->bc->call_sp].max = 2787 ctx->bc->callstack[ctx->bc->call_sp].current; 2788 } 2789} 2790 2791static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2792{ 2793 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2794 2795 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2796 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2797 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2798 sp->num_mid++; 2799} 2800 2801static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2802{ 2803 ctx->bc->fc_sp++; 2804 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2805 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2806} 2807 2808static void fc_poplevel(struct r600_shader_ctx *ctx) 2809{ 2810 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2811 if (sp->mid) { 2812 free(sp->mid); 2813 sp->mid = NULL; 2814 } 2815 sp->num_mid = 0; 2816 sp->start = NULL; 2817 sp->type = 0; 2818 ctx->bc->fc_sp--; 2819} 2820 2821#if 0 2822static int emit_return(struct r600_shader_ctx *ctx) 2823{ 2824 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2825 return 0; 2826} 2827 2828static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2829{ 2830 2831 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2832 ctx->bc->cf_last->pop_count = pops; 2833 /* TODO work out offset */ 2834 return 0; 2835} 2836 2837static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2838{ 2839 return 0; 2840} 2841 2842static void emit_testflag(struct r600_shader_ctx *ctx) 2843{ 2844 2845} 2846 2847static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2848{ 2849 emit_testflag(ctx); 2850 emit_jump_to_offset(ctx, 1, 4); 2851 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2852 pops(ctx, ifidx + 1); 2853 emit_return(ctx); 2854} 2855 2856static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2857{ 2858 emit_testflag(ctx); 2859 2860 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2861 ctx->bc->cf_last->pop_count = 1; 2862 2863 fc_set_mid(ctx, fc_sp); 2864 2865 pops(ctx, 1); 2866} 2867#endif 2868 2869static int tgsi_if(struct r600_shader_ctx *ctx) 2870{ 2871 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2872 2873 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2874 2875 fc_pushlevel(ctx, FC_IF); 2876 2877 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2878 return 0; 2879} 2880 2881static int tgsi_else(struct r600_shader_ctx *ctx) 2882{ 2883 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2884 ctx->bc->cf_last->pop_count = 1; 2885 2886 fc_set_mid(ctx, ctx->bc->fc_sp); 2887 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2888 return 0; 2889} 2890 2891static int tgsi_endif(struct r600_shader_ctx *ctx) 2892{ 2893 pops(ctx, 1); 2894 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2895 R600_ERR("if/endif unbalanced in shader\n"); 2896 return -1; 2897 } 2898 2899 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2900 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2901 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2902 } else { 2903 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2904 } 2905 fc_poplevel(ctx); 2906 2907 callstack_decrease_current(ctx, FC_PUSH_VPM); 2908 return 0; 2909} 2910 2911static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2912{ 2913 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2914 2915 fc_pushlevel(ctx, FC_LOOP); 2916 2917 /* check stack depth */ 2918 callstack_check_depth(ctx, FC_LOOP, 0); 2919 return 0; 2920} 2921 2922static int tgsi_endloop(struct r600_shader_ctx *ctx) 2923{ 2924 int i; 2925 2926 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2927 2928 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2929 R600_ERR("loop/endloop in shader code are not paired.\n"); 2930 return -EINVAL; 2931 } 2932 2933 /* fixup loop pointers - from r600isa 2934 LOOP END points to CF after LOOP START, 2935 LOOP START point to CF after LOOP END 2936 BRK/CONT point to LOOP END CF 2937 */ 2938 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2939 2940 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2941 2942 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2943 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2944 } 2945 /* TODO add LOOPRET support */ 2946 fc_poplevel(ctx); 2947 callstack_decrease_current(ctx, FC_LOOP); 2948 return 0; 2949} 2950 2951static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2952{ 2953 unsigned int fscp; 2954 2955 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2956 { 2957 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2958 break; 2959 } 2960 2961 if (fscp == 0) { 2962 R600_ERR("Break not inside loop/endloop pair\n"); 2963 return -EINVAL; 2964 } 2965 2966 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2967 ctx->bc->cf_last->pop_count = 1; 2968 2969 fc_set_mid(ctx, fscp); 2970 2971 pops(ctx, 1); 2972 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2973 return 0; 2974} 2975 2976static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 2977 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 2978 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2979 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2980 2981 /* FIXME: 2982 * For state trackers other than OpenGL, we'll want to use 2983 * _RECIP_IEEE instead. 2984 */ 2985 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 2986 2987 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 2988 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2989 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2990 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2991 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2992 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2993 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2994 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2995 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2996 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2997 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2998 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2999 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3000 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3001 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3002 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3003 /* gap */ 3004 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3005 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3006 /* gap */ 3007 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3008 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3009 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3010 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3011 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3012 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3013 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3014 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3015 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3016 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3017 /* gap */ 3018 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3019 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3020 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3021 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3022 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3023 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3024 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3025 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3026 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3027 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3028 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3029 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3030 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3031 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3032 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3033 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3034 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3035 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3036 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3037 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3038 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3039 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3040 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3041 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3042 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3043 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3044 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3045 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3046 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3047 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3048 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3049 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3050 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3051 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3052 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3053 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3054 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3055 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3056 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3057 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3058 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3059 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3060 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3061 /* gap */ 3062 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3063 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3064 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3065 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3066 /* gap */ 3067 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3068 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3069 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3070 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3071 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3072 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3073 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3074 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3075 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3076 /* gap */ 3077 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3078 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3079 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3080 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3081 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3082 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3083 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3084 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3085 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3086 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3087 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3088 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3089 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3090 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3091 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3092 /* gap */ 3093 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3094 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3095 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3096 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3097 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3098 /* gap */ 3099 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3100 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3101 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3102 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3103 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3104 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3105 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3106 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3107 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3108 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3109 /* gap */ 3110 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3111 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3112 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3113 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3114 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3115 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3116 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3117 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3118 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3119 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3120 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3121 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3122 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3123 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3124 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3125 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3126 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3127 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3128 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3129 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3130 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3131 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3132 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3133 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3134 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3135 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3136 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3137 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3138}; 3139 3140static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 3141 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3142 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3143 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3144 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3145 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 3146 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3147 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3148 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3149 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3150 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3151 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3152 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3153 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3154 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3155 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3156 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3157 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3158 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3159 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3160 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3161 /* gap */ 3162 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3163 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3164 /* gap */ 3165 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3166 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3167 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3168 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3169 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3170 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3171 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3172 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3173 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3174 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3175 /* gap */ 3176 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3177 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3178 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3179 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3180 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3181 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3182 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3183 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3184 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3185 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3186 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3187 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3188 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3189 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3190 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3191 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3192 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3193 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3194 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3195 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3196 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3197 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3198 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3199 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3200 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3201 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3202 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3203 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3204 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3205 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3206 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3207 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3208 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3209 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3210 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3211 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3212 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3213 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3214 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3215 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3216 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3217 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3218 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3219 /* gap */ 3220 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3221 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3222 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3223 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3224 /* gap */ 3225 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3226 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3227 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3228 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3229 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3230 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3231 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3232 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3233 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3234 /* gap */ 3235 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3236 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3237 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3238 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3239 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3240 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3241 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3242 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3243 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3244 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3245 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3246 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3247 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3248 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3249 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3250 /* gap */ 3251 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3252 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3253 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3254 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3255 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3256 /* gap */ 3257 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3258 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3259 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3260 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3261 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3262 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3263 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3264 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3265 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3266 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3267 /* gap */ 3268 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3269 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3270 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3271 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3272 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3273 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3274 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3275 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3276 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3277 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3278 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3279 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3280 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3281 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3282 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3283 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3284 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3285 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3286 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3287 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3288 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3289 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3290 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3291 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3292 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3293 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3294 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3295 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3296}; 3297