r600_shader.c revision ccb9be105602edaaff196046e324c8cb4a12fe0a
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_pipe.h" 29#include "r600_asm.h" 30#include "r600_sq.h" 31#include "r600_opcodes.h" 32#include "r600d.h" 33#include <stdio.h> 34#include <errno.h> 35 36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) 37{ 38 struct r600_pipe_state *rstate = &shader->rstate; 39 struct r600_shader *rshader = &shader->shader; 40 unsigned spi_vs_out_id[10]; 41 unsigned i, tmp; 42 43 /* clear previous register */ 44 rstate->nregs = 0; 45 46 /* so far never got proper semantic id from tgsi */ 47 for (i = 0; i < 10; i++) { 48 spi_vs_out_id[i] = 0; 49 } 50 for (i = 0; i < 32; i++) { 51 tmp = i << ((i & 3) * 8); 52 spi_vs_out_id[i / 4] |= tmp; 53 } 54 for (i = 0; i < 10; i++) { 55 r600_pipe_state_add_reg(rstate, 56 R_028614_SPI_VS_OUT_ID_0 + i * 4, 57 spi_vs_out_id[i], 0xFFFFFFFF, NULL); 58 } 59 60 r600_pipe_state_add_reg(rstate, 61 R_0286C4_SPI_VS_OUT_CONFIG, 62 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), 63 0xFFFFFFFF, NULL); 64 r600_pipe_state_add_reg(rstate, 65 R_028868_SQ_PGM_RESOURCES_VS, 66 S_028868_NUM_GPRS(rshader->bc.ngpr) | 67 S_028868_STACK_SIZE(rshader->bc.nstack), 68 0xFFFFFFFF, NULL); 69 r600_pipe_state_add_reg(rstate, 70 R_0288A4_SQ_PGM_RESOURCES_FS, 71 0x00000000, 0xFFFFFFFF, NULL); 72 r600_pipe_state_add_reg(rstate, 73 R_0288D0_SQ_PGM_CF_OFFSET_VS, 74 0x00000000, 0xFFFFFFFF, NULL); 75 r600_pipe_state_add_reg(rstate, 76 R_0288DC_SQ_PGM_CF_OFFSET_FS, 77 0x00000000, 0xFFFFFFFF, NULL); 78 r600_pipe_state_add_reg(rstate, 79 R_028858_SQ_PGM_START_VS, 80 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 81 r600_pipe_state_add_reg(rstate, 82 R_028894_SQ_PGM_START_FS, 83 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 84 85 r600_pipe_state_add_reg(rstate, 86 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, 87 0xFFFFFFFF, NULL); 88 89} 90 91int r600_find_vs_semantic_index(struct r600_shader *vs, 92 struct r600_shader *ps, int id) 93{ 94 struct r600_shader_io *input = &ps->input[id]; 95 96 for (int i = 0; i < vs->noutput; i++) { 97 if (input->name == vs->output[i].name && 98 input->sid == vs->output[i].sid) { 99 return i - 1; 100 } 101 } 102 return 0; 103} 104 105static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) 106{ 107 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 108 struct r600_pipe_state *rstate = &shader->rstate; 109 struct r600_shader *rshader = &shader->shader; 110 unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; 111 int pos_index = -1, face_index = -1; 112 113 /* clear previous register */ 114 rstate->nregs = 0; 115 116 for (i = 0; i < rshader->ninput; i++) { 117 tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); 118 if (rshader->input[i].centroid) 119 tmp |= S_028644_SEL_CENTROID(1); 120 if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) 121 tmp |= S_028644_SEL_LINEAR(1); 122 123 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 124 pos_index = i; 125 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || 126 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || 127 rshader->input[i].name == TGSI_SEMANTIC_POSITION) { 128 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); 129 } 130 if (rshader->input[i].name == TGSI_SEMANTIC_FACE) 131 face_index = i; 132 if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && 133 rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { 134 tmp |= S_028644_PT_SPRITE_TEX(1); 135 } 136 r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); 137 } 138 for (i = 0; i < rshader->noutput; i++) { 139 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 140 r600_pipe_state_add_reg(rstate, 141 R_02880C_DB_SHADER_CONTROL, 142 S_02880C_Z_EXPORT_ENABLE(1), 143 S_02880C_Z_EXPORT_ENABLE(1), NULL); 144 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 145 r600_pipe_state_add_reg(rstate, 146 R_02880C_DB_SHADER_CONTROL, 147 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), 148 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); 149 } 150 151 exports_ps = 0; 152 num_cout = 0; 153 for (i = 0; i < rshader->noutput; i++) { 154 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 155 exports_ps |= 1; 156 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { 157 num_cout++; 158 } 159 } 160 exports_ps |= S_028854_EXPORT_COLORS(num_cout); 161 if (!exports_ps) { 162 /* always at least export 1 component per pixel */ 163 exports_ps = 2; 164 } 165 166 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | 167 S_0286CC_PERSP_GRADIENT_ENA(1); 168 spi_input_z = 0; 169 if (pos_index != -1) { 170 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | 171 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | 172 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | 173 S_0286CC_BARYC_SAMPLE_CNTL(1)); 174 spi_input_z |= 1; 175 } 176 177 spi_ps_in_control_1 = 0; 178 if (face_index != -1) { 179 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | 180 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); 181 } 182 183 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); 184 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); 185 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); 186 r600_pipe_state_add_reg(rstate, 187 R_028840_SQ_PGM_START_PS, 188 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 189 r600_pipe_state_add_reg(rstate, 190 R_028850_SQ_PGM_RESOURCES_PS, 191 S_028868_NUM_GPRS(rshader->bc.ngpr) | 192 S_028868_STACK_SIZE(rshader->bc.nstack), 193 0xFFFFFFFF, NULL); 194 r600_pipe_state_add_reg(rstate, 195 R_028854_SQ_PGM_EXPORTS_PS, 196 exports_ps, 0xFFFFFFFF, NULL); 197 r600_pipe_state_add_reg(rstate, 198 R_0288CC_SQ_PGM_CF_OFFSET_PS, 199 0x00000000, 0xFFFFFFFF, NULL); 200 201 if (rshader->uses_kill) { 202 /* only set some bits here, the other bits are set in the dsa state */ 203 r600_pipe_state_add_reg(rstate, 204 R_02880C_DB_SHADER_CONTROL, 205 S_02880C_KILL_ENABLE(1), 206 S_02880C_KILL_ENABLE(1), NULL); 207 } 208 r600_pipe_state_add_reg(rstate, 209 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, 210 0xFFFFFFFF, NULL); 211} 212 213static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 214{ 215 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 216 struct r600_shader *rshader = &shader->shader; 217 void *ptr; 218 219 /* copy new shader */ 220 if (shader->bo == NULL) { 221 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0); 222 if (shader->bo == NULL) { 223 return -ENOMEM; 224 } 225 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 226 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); 227 r600_bo_unmap(rctx->radeon, shader->bo); 228 } 229 /* build state */ 230 rshader->flat_shade = rctx->flatshade; 231 switch (rshader->processor_type) { 232 case TGSI_PROCESSOR_VERTEX: 233 if (rshader->family >= CHIP_CEDAR) { 234 evergreen_pipe_shader_vs(ctx, shader); 235 } else { 236 r600_pipe_shader_vs(ctx, shader); 237 } 238 break; 239 case TGSI_PROCESSOR_FRAGMENT: 240 if (rshader->family >= CHIP_CEDAR) { 241 evergreen_pipe_shader_ps(ctx, shader); 242 } else { 243 r600_pipe_shader_ps(ctx, shader); 244 } 245 break; 246 default: 247 return -EINVAL; 248 } 249 r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); 250 return 0; 251} 252 253static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) 254{ 255 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 256 struct r600_shader *shader = &rshader->shader; 257 const struct util_format_description *desc; 258 enum pipe_format resource_format[160]; 259 unsigned i, nresources = 0; 260 struct r600_bc *bc = &shader->bc; 261 struct r600_bc_cf *cf; 262 struct r600_bc_vtx *vtx; 263 264 if (shader->processor_type != TGSI_PROCESSOR_VERTEX) 265 return 0; 266 /* doing a full memcmp fell over the refcount */ 267 if ((rshader->vertex_elements.count == rctx->vertex_elements->count) && 268 (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 32 * sizeof(struct pipe_vertex_element)))) { 269 return 0; 270 } 271 rshader->vertex_elements = *rctx->vertex_elements; 272 for (i = 0; i < rctx->vertex_elements->count; i++) { 273 resource_format[nresources++] = rctx->vertex_elements->hw_format[i]; 274 } 275 r600_bo_reference(rctx->radeon, &rshader->bo, NULL); 276 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 277 switch (cf->inst) { 278 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 279 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 280 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 281 desc = util_format_description(resource_format[vtx->buffer_id]); 282 if (desc == NULL) { 283 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); 284 return -EINVAL; 285 } 286 vtx->dst_sel_x = desc->swizzle[0]; 287 vtx->dst_sel_y = desc->swizzle[1]; 288 vtx->dst_sel_z = desc->swizzle[2]; 289 vtx->dst_sel_w = desc->swizzle[3]; 290 } 291 break; 292 default: 293 break; 294 } 295 } 296 return r600_bc_build(&shader->bc); 297} 298 299int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader) 300{ 301 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 302 int r; 303 304 if (shader == NULL) 305 return -EINVAL; 306 /* there should be enough input */ 307 if (rctx->vertex_elements->count < shader->shader.bc.nresource) { 308 R600_ERR("%d resources provided, expecting %d\n", 309 rctx->vertex_elements->count, shader->shader.bc.nresource); 310 return -EINVAL; 311 } 312 r = r600_shader_update(ctx, shader); 313 if (r) 314 return r; 315 return r600_pipe_shader(ctx, shader); 316} 317 318int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 319int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 320{ 321 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 322 int r; 323 324//fprintf(stderr, "--------------------------------------------------------------\n"); 325//tgsi_dump(tokens, 0); 326 shader->shader.family = r600_get_family(rctx->radeon); 327 r = r600_shader_from_tgsi(tokens, &shader->shader); 328 if (r) { 329 R600_ERR("translation from TGSI failed !\n"); 330 return r; 331 } 332 r = r600_bc_build(&shader->shader.bc); 333 if (r) { 334 R600_ERR("building bytecode failed !\n"); 335 return r; 336 } 337//fprintf(stderr, "______________________________________________________________\n"); 338 return 0; 339} 340 341void 342r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 343{ 344 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 345 struct r600_bc_cf *cf, *next_cf; 346 347 r600_bo_reference(rctx->radeon, &shader->bo, NULL); 348 349 /* FIXME: is there more stuff to free? */ 350} 351 352/* 353 * tgsi -> r600 shader 354 */ 355struct r600_shader_tgsi_instruction; 356 357struct r600_shader_ctx { 358 struct tgsi_shader_info info; 359 struct tgsi_parse_context parse; 360 const struct tgsi_token *tokens; 361 unsigned type; 362 unsigned file_offset[TGSI_FILE_COUNT]; 363 unsigned temp_reg; 364 struct r600_shader_tgsi_instruction *inst_info; 365 struct r600_bc *bc; 366 struct r600_shader *shader; 367 u32 value[4]; 368 u32 *literals; 369 u32 nliterals; 370 u32 max_driver_temp_used; 371 /* needed for evergreen interpolation */ 372 boolean input_centroid; 373 boolean input_linear; 374 boolean input_perspective; 375 int num_interp_gpr; 376}; 377 378struct r600_shader_tgsi_instruction { 379 unsigned tgsi_opcode; 380 unsigned is_op3; 381 unsigned r600_opcode; 382 int (*process)(struct r600_shader_ctx *ctx); 383}; 384 385static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 386static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 387 388static int tgsi_is_supported(struct r600_shader_ctx *ctx) 389{ 390 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 391 int j; 392 393 if (i->Instruction.NumDstRegs > 1) { 394 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 395 return -EINVAL; 396 } 397 if (i->Instruction.Predicate) { 398 R600_ERR("predicate unsupported\n"); 399 return -EINVAL; 400 } 401#if 0 402 if (i->Instruction.Label) { 403 R600_ERR("label unsupported\n"); 404 return -EINVAL; 405 } 406#endif 407 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 408 if (i->Src[j].Register.Dimension) { 409 R600_ERR("unsupported src %d (dimension %d)\n", j, 410 i->Src[j].Register.Dimension); 411 return -EINVAL; 412 } 413 } 414 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 415 if (i->Dst[j].Register.Dimension) { 416 R600_ERR("unsupported dst (dimension)\n"); 417 return -EINVAL; 418 } 419 } 420 return 0; 421} 422 423static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 424{ 425 int i, r; 426 struct r600_bc_alu alu; 427 int gpr = 0, base_chan = 0; 428 int ij_index = 0; 429 430 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 431 ij_index = 0; 432 if (ctx->shader->input[input].centroid) 433 ij_index++; 434 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 435 ij_index = 0; 436 /* if we have perspective add one */ 437 if (ctx->input_perspective) { 438 ij_index++; 439 /* if we have perspective centroid */ 440 if (ctx->input_centroid) 441 ij_index++; 442 } 443 if (ctx->shader->input[input].centroid) 444 ij_index++; 445 } 446 447 /* work out gpr and base_chan from index */ 448 gpr = ij_index / 2; 449 base_chan = (2 * (ij_index % 2)) + 1; 450 451 for (i = 0; i < 8; i++) { 452 memset(&alu, 0, sizeof(struct r600_bc_alu)); 453 454 if (i < 4) 455 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 456 else 457 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 458 459 if ((i > 1) && (i < 6)) { 460 alu.dst.sel = ctx->shader->input[input].gpr; 461 alu.dst.write = 1; 462 } 463 464 alu.dst.chan = i % 4; 465 466 alu.src[0].sel = gpr; 467 alu.src[0].chan = (base_chan - (i % 2)); 468 469 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 470 471 alu.bank_swizzle_force = SQ_ALU_VEC_210; 472 if ((i % 4) == 3) 473 alu.last = 1; 474 r = r600_bc_add_alu(ctx->bc, &alu); 475 if (r) 476 return r; 477 } 478 return 0; 479} 480 481 482static int tgsi_declaration(struct r600_shader_ctx *ctx) 483{ 484 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 485 struct r600_bc_vtx vtx; 486 unsigned i; 487 int r; 488 489 switch (d->Declaration.File) { 490 case TGSI_FILE_INPUT: 491 i = ctx->shader->ninput++; 492 ctx->shader->input[i].name = d->Semantic.Name; 493 ctx->shader->input[i].sid = d->Semantic.Index; 494 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 495 ctx->shader->input[i].centroid = d->Declaration.Centroid; 496 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 497 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 498 /* turn input into fetch */ 499 memset(&vtx, 0, sizeof(struct r600_bc_vtx)); 500 vtx.inst = 0; 501 vtx.fetch_type = 0; 502 vtx.buffer_id = i; 503 /* register containing the index into the buffer */ 504 vtx.src_gpr = 0; 505 vtx.src_sel_x = 0; 506 vtx.mega_fetch_count = 0x1F; 507 vtx.dst_gpr = ctx->shader->input[i].gpr; 508 vtx.dst_sel_x = 0; 509 vtx.dst_sel_y = 1; 510 vtx.dst_sel_z = 2; 511 vtx.dst_sel_w = 3; 512 vtx.use_const_fields = 1; 513 r = r600_bc_add_vtx(ctx->bc, &vtx); 514 if (r) 515 return r; 516 } 517 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { 518 /* turn input into interpolate on EG */ 519 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 520 if (ctx->shader->input[i].interpolate > 0) { 521 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 522 evergreen_interp_alu(ctx, i); 523 } 524 } 525 } 526 break; 527 case TGSI_FILE_OUTPUT: 528 i = ctx->shader->noutput++; 529 ctx->shader->output[i].name = d->Semantic.Name; 530 ctx->shader->output[i].sid = d->Semantic.Index; 531 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 532 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 533 break; 534 case TGSI_FILE_CONSTANT: 535 case TGSI_FILE_TEMPORARY: 536 case TGSI_FILE_SAMPLER: 537 case TGSI_FILE_ADDRESS: 538 break; 539 default: 540 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 541 return -EINVAL; 542 } 543 return 0; 544} 545 546static int r600_get_temp(struct r600_shader_ctx *ctx) 547{ 548 return ctx->temp_reg + ctx->max_driver_temp_used++; 549} 550 551/* 552 * for evergreen we need to scan the shader to find the number of GPRs we need to 553 * reserve for interpolation. 554 * 555 * we need to know if we are going to emit 556 * any centroid inputs 557 * if perspective and linear are required 558*/ 559static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 560{ 561 int i; 562 int num_baryc; 563 564 ctx->input_linear = FALSE; 565 ctx->input_perspective = FALSE; 566 ctx->input_centroid = FALSE; 567 ctx->num_interp_gpr = 1; 568 569 /* any centroid inputs */ 570 for (i = 0; i < ctx->info.num_inputs; i++) { 571 /* skip position/face */ 572 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 573 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 574 continue; 575 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 576 ctx->input_linear = TRUE; 577 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 578 ctx->input_perspective = TRUE; 579 if (ctx->info.input_centroid[i]) 580 ctx->input_centroid = TRUE; 581 } 582 583 num_baryc = 0; 584 /* ignoring sample for now */ 585 if (ctx->input_perspective) 586 num_baryc++; 587 if (ctx->input_linear) 588 num_baryc++; 589 if (ctx->input_centroid) 590 num_baryc *= 2; 591 592 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 593 594 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 595 return ctx->num_interp_gpr; 596} 597 598int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 599{ 600 struct tgsi_full_immediate *immediate; 601 struct r600_shader_ctx ctx; 602 struct r600_bc_output output[32]; 603 unsigned output_done, noutput; 604 unsigned opcode; 605 int i, r = 0, pos0; 606 607 ctx.bc = &shader->bc; 608 ctx.shader = shader; 609 r = r600_bc_init(ctx.bc, shader->family); 610 if (r) 611 return r; 612 ctx.tokens = tokens; 613 tgsi_scan_shader(tokens, &ctx.info); 614 tgsi_parse_init(&ctx.parse, tokens); 615 ctx.type = ctx.parse.FullHeader.Processor.Processor; 616 shader->processor_type = ctx.type; 617 618 /* register allocations */ 619 /* Values [0,127] correspond to GPR[0..127]. 620 * Values [128,159] correspond to constant buffer bank 0 621 * Values [160,191] correspond to constant buffer bank 1 622 * Values [256,511] correspond to cfile constants c[0..255]. 623 * Other special values are shown in the list below. 624 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 625 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 626 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 627 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 628 * 248 SQ_ALU_SRC_0: special constant 0.0. 629 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 630 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 631 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 632 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 633 * 253 SQ_ALU_SRC_LITERAL: literal constant. 634 * 254 SQ_ALU_SRC_PV: previous vector result. 635 * 255 SQ_ALU_SRC_PS: previous scalar result. 636 */ 637 for (i = 0; i < TGSI_FILE_COUNT; i++) { 638 ctx.file_offset[i] = 0; 639 } 640 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 641 ctx.file_offset[TGSI_FILE_INPUT] = 1; 642 } 643 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) { 644 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 645 } 646 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 647 ctx.info.file_count[TGSI_FILE_INPUT]; 648 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 649 ctx.info.file_count[TGSI_FILE_OUTPUT]; 650 651 ctx.file_offset[TGSI_FILE_CONSTANT] = 128; 652 653 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 654 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 655 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 656 657 ctx.nliterals = 0; 658 ctx.literals = NULL; 659 660 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 661 tgsi_parse_token(&ctx.parse); 662 switch (ctx.parse.FullToken.Token.Type) { 663 case TGSI_TOKEN_TYPE_IMMEDIATE: 664 immediate = &ctx.parse.FullToken.FullImmediate; 665 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 666 if(ctx.literals == NULL) { 667 r = -ENOMEM; 668 goto out_err; 669 } 670 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 671 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 672 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 673 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 674 ctx.nliterals++; 675 break; 676 case TGSI_TOKEN_TYPE_DECLARATION: 677 r = tgsi_declaration(&ctx); 678 if (r) 679 goto out_err; 680 break; 681 case TGSI_TOKEN_TYPE_INSTRUCTION: 682 r = tgsi_is_supported(&ctx); 683 if (r) 684 goto out_err; 685 ctx.max_driver_temp_used = 0; 686 /* reserve first tmp for everyone */ 687 r600_get_temp(&ctx); 688 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 689 if (ctx.bc->chiprev == 2) 690 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 691 else 692 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 693 r = ctx.inst_info->process(&ctx); 694 if (r) 695 goto out_err; 696 r = r600_bc_add_literal(ctx.bc, ctx.value); 697 if (r) 698 goto out_err; 699 break; 700 default: 701 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 702 r = -EINVAL; 703 goto out_err; 704 } 705 } 706 /* export output */ 707 noutput = shader->noutput; 708 for (i = 0, pos0 = 0; i < noutput; i++) { 709 memset(&output[i], 0, sizeof(struct r600_bc_output)); 710 output[i].gpr = shader->output[i].gpr; 711 output[i].elem_size = 3; 712 output[i].swizzle_x = 0; 713 output[i].swizzle_y = 1; 714 output[i].swizzle_z = 2; 715 output[i].swizzle_w = 3; 716 output[i].barrier = 1; 717 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 718 output[i].array_base = i - pos0; 719 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 720 switch (ctx.type) { 721 case TGSI_PROCESSOR_VERTEX: 722 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 723 output[i].array_base = 60; 724 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 725 /* position doesn't count in array_base */ 726 pos0++; 727 } 728 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 729 output[i].array_base = 61; 730 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 731 /* position doesn't count in array_base */ 732 pos0++; 733 } 734 break; 735 case TGSI_PROCESSOR_FRAGMENT: 736 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 737 output[i].array_base = shader->output[i].sid; 738 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 739 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 740 output[i].array_base = 61; 741 output[i].swizzle_x = 2; 742 output[i].swizzle_y = 7; 743 output[i].swizzle_z = output[i].swizzle_w = 7; 744 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 745 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 746 output[i].array_base = 61; 747 output[i].swizzle_x = 7; 748 output[i].swizzle_y = 1; 749 output[i].swizzle_z = output[i].swizzle_w = 7; 750 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 751 } else { 752 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 753 r = -EINVAL; 754 goto out_err; 755 } 756 break; 757 default: 758 R600_ERR("unsupported processor type %d\n", ctx.type); 759 r = -EINVAL; 760 goto out_err; 761 } 762 } 763 /* add fake param output for vertex shader if no param is exported */ 764 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 765 for (i = 0, pos0 = 0; i < noutput; i++) { 766 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 767 pos0 = 1; 768 break; 769 } 770 } 771 if (!pos0) { 772 memset(&output[i], 0, sizeof(struct r600_bc_output)); 773 output[i].gpr = 0; 774 output[i].elem_size = 3; 775 output[i].swizzle_x = 0; 776 output[i].swizzle_y = 1; 777 output[i].swizzle_z = 2; 778 output[i].swizzle_w = 3; 779 output[i].barrier = 1; 780 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 781 output[i].array_base = 0; 782 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 783 noutput++; 784 } 785 } 786 /* add fake pixel export */ 787 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 788 memset(&output[0], 0, sizeof(struct r600_bc_output)); 789 output[0].gpr = 0; 790 output[0].elem_size = 3; 791 output[0].swizzle_x = 7; 792 output[0].swizzle_y = 7; 793 output[0].swizzle_z = 7; 794 output[0].swizzle_w = 7; 795 output[0].barrier = 1; 796 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 797 output[0].array_base = 0; 798 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 799 noutput++; 800 } 801 /* set export done on last export of each type */ 802 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 803 if (i == (noutput - 1)) { 804 output[i].end_of_program = 1; 805 } 806 if (!(output_done & (1 << output[i].type))) { 807 output_done |= (1 << output[i].type); 808 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 809 } 810 } 811 /* add output to bytecode */ 812 for (i = 0; i < noutput; i++) { 813 r = r600_bc_add_output(ctx.bc, &output[i]); 814 if (r) 815 goto out_err; 816 } 817 free(ctx.literals); 818 tgsi_parse_free(&ctx.parse); 819 return 0; 820out_err: 821 free(ctx.literals); 822 tgsi_parse_free(&ctx.parse); 823 return r; 824} 825 826static int tgsi_unsupported(struct r600_shader_ctx *ctx) 827{ 828 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 829 return -EINVAL; 830} 831 832static int tgsi_end(struct r600_shader_ctx *ctx) 833{ 834 return 0; 835} 836 837static int tgsi_src(struct r600_shader_ctx *ctx, 838 const struct tgsi_full_src_register *tgsi_src, 839 struct r600_bc_alu_src *r600_src) 840{ 841 int index; 842 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 843 r600_src->sel = tgsi_src->Register.Index; 844 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 845 r600_src->sel = 0; 846 index = tgsi_src->Register.Index; 847 ctx->value[0] = ctx->literals[index * 4 + 0]; 848 ctx->value[1] = ctx->literals[index * 4 + 1]; 849 ctx->value[2] = ctx->literals[index * 4 + 2]; 850 ctx->value[3] = ctx->literals[index * 4 + 3]; 851 } 852 if (tgsi_src->Register.Indirect) 853 r600_src->rel = V_SQ_REL_RELATIVE; 854 r600_src->neg = tgsi_src->Register.Negate; 855 r600_src->abs = tgsi_src->Register.Absolute; 856 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 857 return 0; 858} 859 860static int tgsi_dst(struct r600_shader_ctx *ctx, 861 const struct tgsi_full_dst_register *tgsi_dst, 862 unsigned swizzle, 863 struct r600_bc_alu_dst *r600_dst) 864{ 865 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 866 867 r600_dst->sel = tgsi_dst->Register.Index; 868 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 869 r600_dst->chan = swizzle; 870 r600_dst->write = 1; 871 if (tgsi_dst->Register.Indirect) 872 r600_dst->rel = V_SQ_REL_RELATIVE; 873 if (inst->Instruction.Saturate) { 874 r600_dst->clamp = 1; 875 } 876 return 0; 877} 878 879static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 880{ 881 switch (swizzle) { 882 case 0: 883 return tgsi_src->Register.SwizzleX; 884 case 1: 885 return tgsi_src->Register.SwizzleY; 886 case 2: 887 return tgsi_src->Register.SwizzleZ; 888 case 3: 889 return tgsi_src->Register.SwizzleW; 890 default: 891 return 0; 892 } 893} 894 895static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 896{ 897 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 898 struct r600_bc_alu alu; 899 int i, j, k, nconst, r; 900 901 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 902 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 903 nconst++; 904 } 905 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 906 if (r) { 907 return r; 908 } 909 } 910 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 911 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 912 int treg = r600_get_temp(ctx); 913 for (k = 0; k < 4; k++) { 914 memset(&alu, 0, sizeof(struct r600_bc_alu)); 915 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 916 alu.src[0].sel = r600_src[i].sel; 917 alu.src[0].chan = k; 918 alu.src[0].rel = r600_src[i].rel; 919 alu.dst.sel = treg; 920 alu.dst.chan = k; 921 alu.dst.write = 1; 922 if (k == 3) 923 alu.last = 1; 924 r = r600_bc_add_alu(ctx->bc, &alu); 925 if (r) 926 return r; 927 } 928 r600_src[i].sel = treg; 929 r600_src[i].rel =0; 930 j--; 931 } 932 } 933 return 0; 934} 935 936/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 937static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 938{ 939 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 940 struct r600_bc_alu alu; 941 int i, j, k, nliteral, r; 942 943 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 944 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 945 nliteral++; 946 } 947 } 948 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 949 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 950 int treg = r600_get_temp(ctx); 951 for (k = 0; k < 4; k++) { 952 memset(&alu, 0, sizeof(struct r600_bc_alu)); 953 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 954 alu.src[0].sel = r600_src[i].sel; 955 alu.src[0].chan = k; 956 alu.dst.sel = treg; 957 alu.dst.chan = k; 958 alu.dst.write = 1; 959 if (k == 3) 960 alu.last = 1; 961 r = r600_bc_add_alu(ctx->bc, &alu); 962 if (r) 963 return r; 964 } 965 r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); 966 if (r) 967 return r; 968 r600_src[i].sel = treg; 969 j--; 970 } 971 } 972 return 0; 973} 974 975static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 976{ 977 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 978 struct r600_bc_alu_src r600_src[3]; 979 struct r600_bc_alu alu; 980 int i, j, r; 981 int lasti = 0; 982 983 for (i = 0; i < 4; i++) { 984 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 985 lasti = i; 986 } 987 } 988 989 r = tgsi_split_constant(ctx, r600_src); 990 if (r) 991 return r; 992 r = tgsi_split_literal_constant(ctx, r600_src); 993 if (r) 994 return r; 995 for (i = 0; i < lasti + 1; i++) { 996 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 997 continue; 998 999 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1000 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1001 if (r) 1002 return r; 1003 1004 alu.inst = ctx->inst_info->r600_opcode; 1005 if (!swap) { 1006 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1007 alu.src[j] = r600_src[j]; 1008 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1009 } 1010 } else { 1011 alu.src[0] = r600_src[1]; 1012 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 1013 1014 alu.src[1] = r600_src[0]; 1015 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1016 } 1017 /* handle some special cases */ 1018 switch (ctx->inst_info->tgsi_opcode) { 1019 case TGSI_OPCODE_SUB: 1020 alu.src[1].neg = 1; 1021 break; 1022 case TGSI_OPCODE_ABS: 1023 alu.src[0].abs = 1; 1024 break; 1025 default: 1026 break; 1027 } 1028 if (i == lasti) { 1029 alu.last = 1; 1030 } 1031 r = r600_bc_add_alu(ctx->bc, &alu); 1032 if (r) 1033 return r; 1034 } 1035 return 0; 1036} 1037 1038static int tgsi_op2(struct r600_shader_ctx *ctx) 1039{ 1040 return tgsi_op2_s(ctx, 0); 1041} 1042 1043static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1044{ 1045 return tgsi_op2_s(ctx, 1); 1046} 1047 1048/* 1049 * r600 - trunc to -PI..PI range 1050 * r700 - normalize by dividing by 2PI 1051 * see fdo bug 27901 1052 */ 1053static int tgsi_setup_trig(struct r600_shader_ctx *ctx, 1054 struct r600_bc_alu_src r600_src[3]) 1055{ 1056 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1057 int r; 1058 uint32_t lit_vals[4]; 1059 struct r600_bc_alu alu; 1060 1061 memset(lit_vals, 0, 4*4); 1062 r = tgsi_split_constant(ctx, r600_src); 1063 if (r) 1064 return r; 1065 r = tgsi_split_literal_constant(ctx, r600_src); 1066 if (r) 1067 return r; 1068 1069 r = tgsi_split_literal_constant(ctx, r600_src); 1070 if (r) 1071 return r; 1072 1073 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 1074 lit_vals[1] = fui(0.5f); 1075 1076 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1077 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1078 alu.is_op3 = 1; 1079 1080 alu.dst.chan = 0; 1081 alu.dst.sel = ctx->temp_reg; 1082 alu.dst.write = 1; 1083 1084 alu.src[0] = r600_src[0]; 1085 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1086 1087 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1088 alu.src[1].chan = 0; 1089 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1090 alu.src[2].chan = 1; 1091 alu.last = 1; 1092 r = r600_bc_add_alu(ctx->bc, &alu); 1093 if (r) 1094 return r; 1095 r = r600_bc_add_literal(ctx->bc, lit_vals); 1096 if (r) 1097 return r; 1098 1099 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1100 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1101 1102 alu.dst.chan = 0; 1103 alu.dst.sel = ctx->temp_reg; 1104 alu.dst.write = 1; 1105 1106 alu.src[0].sel = ctx->temp_reg; 1107 alu.src[0].chan = 0; 1108 alu.last = 1; 1109 r = r600_bc_add_alu(ctx->bc, &alu); 1110 if (r) 1111 return r; 1112 1113 if (ctx->bc->chiprev == 0) { 1114 lit_vals[0] = fui(3.1415926535897f * 2.0f); 1115 lit_vals[1] = fui(-3.1415926535897f); 1116 } else { 1117 lit_vals[0] = fui(1.0f); 1118 lit_vals[1] = fui(-0.5f); 1119 } 1120 1121 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1122 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1123 alu.is_op3 = 1; 1124 1125 alu.dst.chan = 0; 1126 alu.dst.sel = ctx->temp_reg; 1127 alu.dst.write = 1; 1128 1129 alu.src[0].sel = ctx->temp_reg; 1130 alu.src[0].chan = 0; 1131 1132 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1133 alu.src[1].chan = 0; 1134 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1135 alu.src[2].chan = 1; 1136 alu.last = 1; 1137 r = r600_bc_add_alu(ctx->bc, &alu); 1138 if (r) 1139 return r; 1140 r = r600_bc_add_literal(ctx->bc, lit_vals); 1141 if (r) 1142 return r; 1143 return 0; 1144} 1145 1146static int tgsi_trig(struct r600_shader_ctx *ctx) 1147{ 1148 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1149 struct r600_bc_alu_src r600_src[3]; 1150 struct r600_bc_alu alu; 1151 int i, r; 1152 int lasti = 0; 1153 1154 r = tgsi_setup_trig(ctx, r600_src); 1155 if (r) 1156 return r; 1157 1158 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1159 alu.inst = ctx->inst_info->r600_opcode; 1160 alu.dst.chan = 0; 1161 alu.dst.sel = ctx->temp_reg; 1162 alu.dst.write = 1; 1163 1164 alu.src[0].sel = ctx->temp_reg; 1165 alu.src[0].chan = 0; 1166 alu.last = 1; 1167 r = r600_bc_add_alu(ctx->bc, &alu); 1168 if (r) 1169 return r; 1170 1171 /* replicate result */ 1172 for (i = 0; i < 4; i++) { 1173 if (inst->Dst[0].Register.WriteMask & (1 << i)) 1174 lasti = i; 1175 } 1176 for (i = 0; i < lasti + 1; i++) { 1177 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1178 continue; 1179 1180 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1181 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1182 1183 alu.src[0].sel = ctx->temp_reg; 1184 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1185 if (r) 1186 return r; 1187 if (i == lasti) 1188 alu.last = 1; 1189 r = r600_bc_add_alu(ctx->bc, &alu); 1190 if (r) 1191 return r; 1192 } 1193 return 0; 1194} 1195 1196static int tgsi_scs(struct r600_shader_ctx *ctx) 1197{ 1198 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1199 struct r600_bc_alu_src r600_src[3]; 1200 struct r600_bc_alu alu; 1201 int r; 1202 1203 /* We'll only need the trig stuff if we are going to write to the 1204 * X or Y components of the destination vector. 1205 */ 1206 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1207 r = tgsi_setup_trig(ctx, r600_src); 1208 if (r) 1209 return r; 1210 } 1211 1212 /* dst.x = COS */ 1213 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1214 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1215 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1216 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1217 if (r) 1218 return r; 1219 1220 alu.src[0].sel = ctx->temp_reg; 1221 alu.src[0].chan = 0; 1222 alu.last = 1; 1223 r = r600_bc_add_alu(ctx->bc, &alu); 1224 if (r) 1225 return r; 1226 } 1227 1228 /* dst.y = SIN */ 1229 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1230 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1231 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1232 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1233 if (r) 1234 return r; 1235 1236 alu.src[0].sel = ctx->temp_reg; 1237 alu.src[0].chan = 0; 1238 alu.last = 1; 1239 r = r600_bc_add_alu(ctx->bc, &alu); 1240 if (r) 1241 return r; 1242 } 1243 1244 /* dst.z = 0.0; */ 1245 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1246 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1247 1248 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1249 1250 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1251 if (r) 1252 return r; 1253 1254 alu.src[0].sel = V_SQ_ALU_SRC_0; 1255 alu.src[0].chan = 0; 1256 1257 alu.last = 1; 1258 1259 r = r600_bc_add_alu(ctx->bc, &alu); 1260 if (r) 1261 return r; 1262 1263 r = r600_bc_add_literal(ctx->bc, ctx->value); 1264 if (r) 1265 return r; 1266 } 1267 1268 /* dst.w = 1.0; */ 1269 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1270 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1271 1272 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1273 1274 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1275 if (r) 1276 return r; 1277 1278 alu.src[0].sel = V_SQ_ALU_SRC_1; 1279 alu.src[0].chan = 0; 1280 1281 alu.last = 1; 1282 1283 r = r600_bc_add_alu(ctx->bc, &alu); 1284 if (r) 1285 return r; 1286 1287 r = r600_bc_add_literal(ctx->bc, ctx->value); 1288 if (r) 1289 return r; 1290 } 1291 1292 return 0; 1293} 1294 1295static int tgsi_kill(struct r600_shader_ctx *ctx) 1296{ 1297 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1298 struct r600_bc_alu alu; 1299 int i, r; 1300 1301 for (i = 0; i < 4; i++) { 1302 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1303 alu.inst = ctx->inst_info->r600_opcode; 1304 1305 alu.dst.chan = i; 1306 1307 alu.src[0].sel = V_SQ_ALU_SRC_0; 1308 1309 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1310 alu.src[1].sel = V_SQ_ALU_SRC_1; 1311 alu.src[1].neg = 1; 1312 } else { 1313 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1314 if (r) 1315 return r; 1316 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1317 } 1318 if (i == 3) { 1319 alu.last = 1; 1320 } 1321 r = r600_bc_add_alu(ctx->bc, &alu); 1322 if (r) 1323 return r; 1324 } 1325 r = r600_bc_add_literal(ctx->bc, ctx->value); 1326 if (r) 1327 return r; 1328 1329 /* kill must be last in ALU */ 1330 ctx->bc->force_add_cf = 1; 1331 ctx->shader->uses_kill = TRUE; 1332 return 0; 1333} 1334 1335static int tgsi_lit(struct r600_shader_ctx *ctx) 1336{ 1337 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1338 struct r600_bc_alu alu; 1339 struct r600_bc_alu_src r600_src[3]; 1340 int r; 1341 1342 r = tgsi_split_constant(ctx, r600_src); 1343 if (r) 1344 return r; 1345 r = tgsi_split_literal_constant(ctx, r600_src); 1346 if (r) 1347 return r; 1348 1349 /* dst.x, <- 1.0 */ 1350 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1351 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1352 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1353 alu.src[0].chan = 0; 1354 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1355 if (r) 1356 return r; 1357 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1358 r = r600_bc_add_alu(ctx->bc, &alu); 1359 if (r) 1360 return r; 1361 1362 /* dst.y = max(src.x, 0.0) */ 1363 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1364 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1365 alu.src[0] = r600_src[0]; 1366 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1367 alu.src[1].chan = 0; 1368 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1369 if (r) 1370 return r; 1371 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1372 r = r600_bc_add_alu(ctx->bc, &alu); 1373 if (r) 1374 return r; 1375 1376 /* dst.w, <- 1.0 */ 1377 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1378 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1379 alu.src[0].sel = V_SQ_ALU_SRC_1; 1380 alu.src[0].chan = 0; 1381 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1382 if (r) 1383 return r; 1384 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1385 alu.last = 1; 1386 r = r600_bc_add_alu(ctx->bc, &alu); 1387 if (r) 1388 return r; 1389 1390 r = r600_bc_add_literal(ctx->bc, ctx->value); 1391 if (r) 1392 return r; 1393 1394 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1395 { 1396 int chan; 1397 int sel; 1398 1399 /* dst.z = log(src.y) */ 1400 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1401 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1402 alu.src[0] = r600_src[0]; 1403 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1404 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1405 if (r) 1406 return r; 1407 alu.last = 1; 1408 r = r600_bc_add_alu(ctx->bc, &alu); 1409 if (r) 1410 return r; 1411 1412 r = r600_bc_add_literal(ctx->bc, ctx->value); 1413 if (r) 1414 return r; 1415 1416 chan = alu.dst.chan; 1417 sel = alu.dst.sel; 1418 1419 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1420 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1421 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1422 alu.src[0] = r600_src[0]; 1423 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1424 alu.src[1].sel = sel; 1425 alu.src[1].chan = chan; 1426 1427 alu.src[2] = r600_src[0]; 1428 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 1429 alu.dst.sel = ctx->temp_reg; 1430 alu.dst.chan = 0; 1431 alu.dst.write = 1; 1432 alu.is_op3 = 1; 1433 alu.last = 1; 1434 r = r600_bc_add_alu(ctx->bc, &alu); 1435 if (r) 1436 return r; 1437 1438 r = r600_bc_add_literal(ctx->bc, ctx->value); 1439 if (r) 1440 return r; 1441 /* dst.z = exp(tmp.x) */ 1442 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1443 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1444 alu.src[0].sel = ctx->temp_reg; 1445 alu.src[0].chan = 0; 1446 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1447 if (r) 1448 return r; 1449 alu.last = 1; 1450 r = r600_bc_add_alu(ctx->bc, &alu); 1451 if (r) 1452 return r; 1453 } 1454 return 0; 1455} 1456 1457static int tgsi_rsq(struct r600_shader_ctx *ctx) 1458{ 1459 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1460 struct r600_bc_alu alu; 1461 int i, r; 1462 1463 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1464 1465 /* FIXME: 1466 * For state trackers other than OpenGL, we'll want to use 1467 * _RECIPSQRT_IEEE instead. 1468 */ 1469 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1470 1471 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1472 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1473 if (r) 1474 return r; 1475 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1476 alu.src[i].abs = 1; 1477 } 1478 alu.dst.sel = ctx->temp_reg; 1479 alu.dst.write = 1; 1480 alu.last = 1; 1481 r = r600_bc_add_alu(ctx->bc, &alu); 1482 if (r) 1483 return r; 1484 r = r600_bc_add_literal(ctx->bc, ctx->value); 1485 if (r) 1486 return r; 1487 /* replicate result */ 1488 return tgsi_helper_tempx_replicate(ctx); 1489} 1490 1491static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1492{ 1493 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1494 struct r600_bc_alu alu; 1495 int i, r; 1496 1497 for (i = 0; i < 4; i++) { 1498 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1499 alu.src[0].sel = ctx->temp_reg; 1500 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1501 alu.dst.chan = i; 1502 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1503 if (r) 1504 return r; 1505 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1506 if (i == 3) 1507 alu.last = 1; 1508 r = r600_bc_add_alu(ctx->bc, &alu); 1509 if (r) 1510 return r; 1511 } 1512 return 0; 1513} 1514 1515static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1516{ 1517 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1518 struct r600_bc_alu alu; 1519 int i, r; 1520 1521 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1522 alu.inst = ctx->inst_info->r600_opcode; 1523 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1524 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1525 if (r) 1526 return r; 1527 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1528 } 1529 alu.dst.sel = ctx->temp_reg; 1530 alu.dst.write = 1; 1531 alu.last = 1; 1532 r = r600_bc_add_alu(ctx->bc, &alu); 1533 if (r) 1534 return r; 1535 r = r600_bc_add_literal(ctx->bc, ctx->value); 1536 if (r) 1537 return r; 1538 /* replicate result */ 1539 return tgsi_helper_tempx_replicate(ctx); 1540} 1541 1542static int tgsi_pow(struct r600_shader_ctx *ctx) 1543{ 1544 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1545 struct r600_bc_alu alu; 1546 int r; 1547 1548 /* LOG2(a) */ 1549 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1550 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1551 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1552 if (r) 1553 return r; 1554 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1555 alu.dst.sel = ctx->temp_reg; 1556 alu.dst.write = 1; 1557 alu.last = 1; 1558 r = r600_bc_add_alu(ctx->bc, &alu); 1559 if (r) 1560 return r; 1561 r = r600_bc_add_literal(ctx->bc,ctx->value); 1562 if (r) 1563 return r; 1564 /* b * LOG2(a) */ 1565 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1566 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); 1567 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1568 if (r) 1569 return r; 1570 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1571 alu.src[1].sel = ctx->temp_reg; 1572 alu.dst.sel = ctx->temp_reg; 1573 alu.dst.write = 1; 1574 alu.last = 1; 1575 r = r600_bc_add_alu(ctx->bc, &alu); 1576 if (r) 1577 return r; 1578 r = r600_bc_add_literal(ctx->bc,ctx->value); 1579 if (r) 1580 return r; 1581 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1582 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1583 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1584 alu.src[0].sel = ctx->temp_reg; 1585 alu.dst.sel = ctx->temp_reg; 1586 alu.dst.write = 1; 1587 alu.last = 1; 1588 r = r600_bc_add_alu(ctx->bc, &alu); 1589 if (r) 1590 return r; 1591 r = r600_bc_add_literal(ctx->bc,ctx->value); 1592 if (r) 1593 return r; 1594 return tgsi_helper_tempx_replicate(ctx); 1595} 1596 1597static int tgsi_ssg(struct r600_shader_ctx *ctx) 1598{ 1599 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1600 struct r600_bc_alu alu; 1601 struct r600_bc_alu_src r600_src[3]; 1602 int i, r; 1603 1604 r = tgsi_split_constant(ctx, r600_src); 1605 if (r) 1606 return r; 1607 r = tgsi_split_literal_constant(ctx, r600_src); 1608 if (r) 1609 return r; 1610 1611 /* tmp = (src > 0 ? 1 : src) */ 1612 for (i = 0; i < 4; i++) { 1613 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1614 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1615 alu.is_op3 = 1; 1616 1617 alu.dst.sel = ctx->temp_reg; 1618 alu.dst.chan = i; 1619 1620 alu.src[0] = r600_src[0]; 1621 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1622 1623 alu.src[1].sel = V_SQ_ALU_SRC_1; 1624 1625 alu.src[2] = r600_src[0]; 1626 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1627 if (i == 3) 1628 alu.last = 1; 1629 r = r600_bc_add_alu(ctx->bc, &alu); 1630 if (r) 1631 return r; 1632 } 1633 r = r600_bc_add_literal(ctx->bc, ctx->value); 1634 if (r) 1635 return r; 1636 1637 /* dst = (-tmp > 0 ? -1 : tmp) */ 1638 for (i = 0; i < 4; i++) { 1639 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1640 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1641 alu.is_op3 = 1; 1642 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1643 if (r) 1644 return r; 1645 1646 alu.src[0].sel = ctx->temp_reg; 1647 alu.src[0].chan = i; 1648 alu.src[0].neg = 1; 1649 1650 alu.src[1].sel = V_SQ_ALU_SRC_1; 1651 alu.src[1].neg = 1; 1652 1653 alu.src[2].sel = ctx->temp_reg; 1654 alu.src[2].chan = i; 1655 1656 if (i == 3) 1657 alu.last = 1; 1658 r = r600_bc_add_alu(ctx->bc, &alu); 1659 if (r) 1660 return r; 1661 } 1662 return 0; 1663} 1664 1665static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1666{ 1667 struct r600_bc_alu alu; 1668 int i, r; 1669 1670 r = r600_bc_add_literal(ctx->bc, ctx->value); 1671 if (r) 1672 return r; 1673 for (i = 0; i < 4; i++) { 1674 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1675 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1676 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1677 alu.dst.chan = i; 1678 } else { 1679 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1680 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1681 if (r) 1682 return r; 1683 alu.src[0].sel = ctx->temp_reg; 1684 alu.src[0].chan = i; 1685 } 1686 if (i == 3) { 1687 alu.last = 1; 1688 } 1689 r = r600_bc_add_alu(ctx->bc, &alu); 1690 if (r) 1691 return r; 1692 } 1693 return 0; 1694} 1695 1696static int tgsi_op3(struct r600_shader_ctx *ctx) 1697{ 1698 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1699 struct r600_bc_alu_src r600_src[3]; 1700 struct r600_bc_alu alu; 1701 int i, j, r; 1702 1703 r = tgsi_split_constant(ctx, r600_src); 1704 if (r) 1705 return r; 1706 r = tgsi_split_literal_constant(ctx, r600_src); 1707 if (r) 1708 return r; 1709 /* do it in 2 step as op3 doesn't support writemask */ 1710 for (i = 0; i < 4; i++) { 1711 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1712 alu.inst = ctx->inst_info->r600_opcode; 1713 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1714 alu.src[j] = r600_src[j]; 1715 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1716 } 1717 alu.dst.sel = ctx->temp_reg; 1718 alu.dst.chan = i; 1719 alu.dst.write = 1; 1720 alu.is_op3 = 1; 1721 if (i == 3) { 1722 alu.last = 1; 1723 } 1724 r = r600_bc_add_alu(ctx->bc, &alu); 1725 if (r) 1726 return r; 1727 } 1728 return tgsi_helper_copy(ctx, inst); 1729} 1730 1731static int tgsi_dp(struct r600_shader_ctx *ctx) 1732{ 1733 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1734 struct r600_bc_alu_src r600_src[3]; 1735 struct r600_bc_alu alu; 1736 int i, j, r; 1737 1738 r = tgsi_split_constant(ctx, r600_src); 1739 if (r) 1740 return r; 1741 r = tgsi_split_literal_constant(ctx, r600_src); 1742 if (r) 1743 return r; 1744 for (i = 0; i < 4; i++) { 1745 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1746 alu.inst = ctx->inst_info->r600_opcode; 1747 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1748 alu.src[j] = r600_src[j]; 1749 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1750 } 1751 alu.dst.sel = ctx->temp_reg; 1752 alu.dst.chan = i; 1753 alu.dst.write = 1; 1754 /* handle some special cases */ 1755 switch (ctx->inst_info->tgsi_opcode) { 1756 case TGSI_OPCODE_DP2: 1757 if (i > 1) { 1758 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1759 alu.src[0].chan = alu.src[1].chan = 0; 1760 } 1761 break; 1762 case TGSI_OPCODE_DP3: 1763 if (i > 2) { 1764 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1765 alu.src[0].chan = alu.src[1].chan = 0; 1766 } 1767 break; 1768 case TGSI_OPCODE_DPH: 1769 if (i == 3) { 1770 alu.src[0].sel = V_SQ_ALU_SRC_1; 1771 alu.src[0].chan = 0; 1772 alu.src[0].neg = 0; 1773 } 1774 break; 1775 default: 1776 break; 1777 } 1778 if (i == 3) { 1779 alu.last = 1; 1780 } 1781 r = r600_bc_add_alu(ctx->bc, &alu); 1782 if (r) 1783 return r; 1784 } 1785 return tgsi_helper_copy(ctx, inst); 1786} 1787 1788static int tgsi_tex(struct r600_shader_ctx *ctx) 1789{ 1790 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1791 struct r600_bc_tex tex; 1792 struct r600_bc_alu alu; 1793 unsigned src_gpr; 1794 int r, i; 1795 int opcode; 1796 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; 1797 uint32_t lit_vals[4]; 1798 1799 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1800 1801 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1802 /* Add perspective divide */ 1803 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1804 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1805 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1806 if (r) 1807 return r; 1808 1809 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1810 alu.dst.sel = ctx->temp_reg; 1811 alu.dst.chan = 3; 1812 alu.last = 1; 1813 alu.dst.write = 1; 1814 r = r600_bc_add_alu(ctx->bc, &alu); 1815 if (r) 1816 return r; 1817 1818 for (i = 0; i < 3; i++) { 1819 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1820 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1821 alu.src[0].sel = ctx->temp_reg; 1822 alu.src[0].chan = 3; 1823 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1824 if (r) 1825 return r; 1826 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1827 alu.dst.sel = ctx->temp_reg; 1828 alu.dst.chan = i; 1829 alu.dst.write = 1; 1830 r = r600_bc_add_alu(ctx->bc, &alu); 1831 if (r) 1832 return r; 1833 } 1834 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1835 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1836 alu.src[0].sel = V_SQ_ALU_SRC_1; 1837 alu.src[0].chan = 0; 1838 alu.dst.sel = ctx->temp_reg; 1839 alu.dst.chan = 3; 1840 alu.last = 1; 1841 alu.dst.write = 1; 1842 r = r600_bc_add_alu(ctx->bc, &alu); 1843 if (r) 1844 return r; 1845 src_not_temp = FALSE; 1846 src_gpr = ctx->temp_reg; 1847 } 1848 1849 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1850 int src_chan, src2_chan; 1851 1852 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1853 for (i = 0; i < 4; i++) { 1854 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1855 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1856 switch (i) { 1857 case 0: 1858 src_chan = 2; 1859 src2_chan = 1; 1860 break; 1861 case 1: 1862 src_chan = 2; 1863 src2_chan = 0; 1864 break; 1865 case 2: 1866 src_chan = 0; 1867 src2_chan = 2; 1868 break; 1869 case 3: 1870 src_chan = 1; 1871 src2_chan = 2; 1872 break; 1873 default: 1874 assert(0); 1875 src_chan = 0; 1876 src2_chan = 0; 1877 break; 1878 } 1879 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1880 if (r) 1881 return r; 1882 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); 1883 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1884 if (r) 1885 return r; 1886 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); 1887 alu.dst.sel = ctx->temp_reg; 1888 alu.dst.chan = i; 1889 if (i == 3) 1890 alu.last = 1; 1891 alu.dst.write = 1; 1892 r = r600_bc_add_alu(ctx->bc, &alu); 1893 if (r) 1894 return r; 1895 } 1896 1897 /* tmp1.z = RCP_e(|tmp1.z|) */ 1898 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1899 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1900 alu.src[0].sel = ctx->temp_reg; 1901 alu.src[0].chan = 2; 1902 alu.src[0].abs = 1; 1903 alu.dst.sel = ctx->temp_reg; 1904 alu.dst.chan = 2; 1905 alu.dst.write = 1; 1906 alu.last = 1; 1907 r = r600_bc_add_alu(ctx->bc, &alu); 1908 if (r) 1909 return r; 1910 1911 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1912 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1913 * muladd has no writemask, have to use another temp 1914 */ 1915 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1916 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1917 alu.is_op3 = 1; 1918 1919 alu.src[0].sel = ctx->temp_reg; 1920 alu.src[0].chan = 0; 1921 alu.src[1].sel = ctx->temp_reg; 1922 alu.src[1].chan = 2; 1923 1924 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1925 alu.src[2].chan = 0; 1926 1927 alu.dst.sel = ctx->temp_reg; 1928 alu.dst.chan = 0; 1929 alu.dst.write = 1; 1930 1931 r = r600_bc_add_alu(ctx->bc, &alu); 1932 if (r) 1933 return r; 1934 1935 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1936 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1937 alu.is_op3 = 1; 1938 1939 alu.src[0].sel = ctx->temp_reg; 1940 alu.src[0].chan = 1; 1941 alu.src[1].sel = ctx->temp_reg; 1942 alu.src[1].chan = 2; 1943 1944 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1945 alu.src[2].chan = 0; 1946 1947 alu.dst.sel = ctx->temp_reg; 1948 alu.dst.chan = 1; 1949 alu.dst.write = 1; 1950 1951 alu.last = 1; 1952 r = r600_bc_add_alu(ctx->bc, &alu); 1953 if (r) 1954 return r; 1955 1956 lit_vals[0] = fui(1.5f); 1957 1958 r = r600_bc_add_literal(ctx->bc, lit_vals); 1959 if (r) 1960 return r; 1961 src_not_temp = FALSE; 1962 src_gpr = ctx->temp_reg; 1963 } 1964 1965 if (src_not_temp) { 1966 for (i = 0; i < 4; i++) { 1967 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1968 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1969 alu.src[0].sel = src_gpr; 1970 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1971 alu.dst.sel = ctx->temp_reg; 1972 alu.dst.chan = i; 1973 if (i == 3) 1974 alu.last = 1; 1975 alu.dst.write = 1; 1976 r = r600_bc_add_alu(ctx->bc, &alu); 1977 if (r) 1978 return r; 1979 } 1980 src_gpr = ctx->temp_reg; 1981 } 1982 1983 opcode = ctx->inst_info->r600_opcode; 1984 if (opcode == SQ_TEX_INST_SAMPLE && 1985 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1986 opcode = SQ_TEX_INST_SAMPLE_C; 1987 1988 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1989 tex.inst = opcode; 1990 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1991 tex.resource_id = tex.sampler_id; 1992 if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX) 1993 tex.resource_id += PIPE_MAX_ATTRIBS; 1994 tex.src_gpr = src_gpr; 1995 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1996 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 1997 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 1998 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 1999 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 2000 tex.src_sel_x = 0; 2001 tex.src_sel_y = 1; 2002 tex.src_sel_z = 2; 2003 tex.src_sel_w = 3; 2004 2005 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2006 tex.src_sel_x = 1; 2007 tex.src_sel_y = 0; 2008 tex.src_sel_z = 3; 2009 tex.src_sel_w = 1; 2010 } 2011 2012 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 2013 tex.coord_type_x = 1; 2014 tex.coord_type_y = 1; 2015 tex.coord_type_z = 1; 2016 tex.coord_type_w = 1; 2017 } 2018 2019 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 2020 tex.src_sel_w = 2; 2021 2022 r = r600_bc_add_tex(ctx->bc, &tex); 2023 if (r) 2024 return r; 2025 2026 /* add shadow ambient support - gallium doesn't do it yet */ 2027 return 0; 2028 2029} 2030 2031static int tgsi_lrp(struct r600_shader_ctx *ctx) 2032{ 2033 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2034 struct r600_bc_alu_src r600_src[3]; 2035 struct r600_bc_alu alu; 2036 unsigned i; 2037 int r; 2038 2039 r = tgsi_split_constant(ctx, r600_src); 2040 if (r) 2041 return r; 2042 r = tgsi_split_literal_constant(ctx, r600_src); 2043 if (r) 2044 return r; 2045 /* 1 - src0 */ 2046 for (i = 0; i < 4; i++) { 2047 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2048 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2049 alu.src[0].sel = V_SQ_ALU_SRC_1; 2050 alu.src[0].chan = 0; 2051 alu.src[1] = r600_src[0]; 2052 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 2053 alu.src[1].neg = 1; 2054 alu.dst.sel = ctx->temp_reg; 2055 alu.dst.chan = i; 2056 if (i == 3) { 2057 alu.last = 1; 2058 } 2059 alu.dst.write = 1; 2060 r = r600_bc_add_alu(ctx->bc, &alu); 2061 if (r) 2062 return r; 2063 } 2064 r = r600_bc_add_literal(ctx->bc, ctx->value); 2065 if (r) 2066 return r; 2067 2068 /* (1 - src0) * src2 */ 2069 for (i = 0; i < 4; i++) { 2070 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2071 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2072 alu.src[0].sel = ctx->temp_reg; 2073 alu.src[0].chan = i; 2074 alu.src[1] = r600_src[2]; 2075 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2076 alu.dst.sel = ctx->temp_reg; 2077 alu.dst.chan = i; 2078 if (i == 3) { 2079 alu.last = 1; 2080 } 2081 alu.dst.write = 1; 2082 r = r600_bc_add_alu(ctx->bc, &alu); 2083 if (r) 2084 return r; 2085 } 2086 r = r600_bc_add_literal(ctx->bc, ctx->value); 2087 if (r) 2088 return r; 2089 2090 /* src0 * src1 + (1 - src0) * src2 */ 2091 for (i = 0; i < 4; i++) { 2092 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2093 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2094 alu.is_op3 = 1; 2095 alu.src[0] = r600_src[0]; 2096 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2097 alu.src[1] = r600_src[1]; 2098 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2099 alu.src[2].sel = ctx->temp_reg; 2100 alu.src[2].chan = i; 2101 alu.dst.sel = ctx->temp_reg; 2102 alu.dst.chan = i; 2103 if (i == 3) { 2104 alu.last = 1; 2105 } 2106 r = r600_bc_add_alu(ctx->bc, &alu); 2107 if (r) 2108 return r; 2109 } 2110 return tgsi_helper_copy(ctx, inst); 2111} 2112 2113static int tgsi_cmp(struct r600_shader_ctx *ctx) 2114{ 2115 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2116 struct r600_bc_alu_src r600_src[3]; 2117 struct r600_bc_alu alu; 2118 int use_temp = 0; 2119 int i, r; 2120 2121 r = tgsi_split_constant(ctx, r600_src); 2122 if (r) 2123 return r; 2124 r = tgsi_split_literal_constant(ctx, r600_src); 2125 if (r) 2126 return r; 2127 2128 if (inst->Dst[0].Register.WriteMask != 0xf) 2129 use_temp = 1; 2130 2131 for (i = 0; i < 4; i++) { 2132 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2133 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2134 alu.src[0] = r600_src[0]; 2135 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2136 2137 alu.src[1] = r600_src[2]; 2138 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2139 2140 alu.src[2] = r600_src[1]; 2141 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 2142 2143 if (use_temp) 2144 alu.dst.sel = ctx->temp_reg; 2145 else { 2146 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2147 if (r) 2148 return r; 2149 } 2150 alu.dst.chan = i; 2151 alu.dst.write = 1; 2152 alu.is_op3 = 1; 2153 if (i == 3) 2154 alu.last = 1; 2155 r = r600_bc_add_alu(ctx->bc, &alu); 2156 if (r) 2157 return r; 2158 } 2159 if (use_temp) 2160 return tgsi_helper_copy(ctx, inst); 2161 return 0; 2162} 2163 2164static int tgsi_xpd(struct r600_shader_ctx *ctx) 2165{ 2166 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2167 struct r600_bc_alu_src r600_src[3]; 2168 struct r600_bc_alu alu; 2169 uint32_t use_temp = 0; 2170 int i, r; 2171 2172 if (inst->Dst[0].Register.WriteMask != 0xf) 2173 use_temp = 1; 2174 2175 r = tgsi_split_constant(ctx, r600_src); 2176 if (r) 2177 return r; 2178 r = tgsi_split_literal_constant(ctx, r600_src); 2179 if (r) 2180 return r; 2181 2182 for (i = 0; i < 4; i++) { 2183 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2184 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2185 2186 alu.src[0] = r600_src[0]; 2187 switch (i) { 2188 case 0: 2189 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2190 break; 2191 case 1: 2192 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2193 break; 2194 case 2: 2195 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2196 break; 2197 case 3: 2198 alu.src[0].sel = V_SQ_ALU_SRC_0; 2199 alu.src[0].chan = i; 2200 } 2201 2202 alu.src[1] = r600_src[1]; 2203 switch (i) { 2204 case 0: 2205 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2206 break; 2207 case 1: 2208 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2209 break; 2210 case 2: 2211 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2212 break; 2213 case 3: 2214 alu.src[1].sel = V_SQ_ALU_SRC_0; 2215 alu.src[1].chan = i; 2216 } 2217 2218 alu.dst.sel = ctx->temp_reg; 2219 alu.dst.chan = i; 2220 alu.dst.write = 1; 2221 2222 if (i == 3) 2223 alu.last = 1; 2224 r = r600_bc_add_alu(ctx->bc, &alu); 2225 if (r) 2226 return r; 2227 2228 r = r600_bc_add_literal(ctx->bc, ctx->value); 2229 if (r) 2230 return r; 2231 } 2232 2233 for (i = 0; i < 4; i++) { 2234 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2235 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2236 2237 alu.src[0] = r600_src[0]; 2238 switch (i) { 2239 case 0: 2240 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2241 break; 2242 case 1: 2243 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2244 break; 2245 case 2: 2246 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2247 break; 2248 case 3: 2249 alu.src[0].sel = V_SQ_ALU_SRC_0; 2250 alu.src[0].chan = i; 2251 } 2252 2253 alu.src[1] = r600_src[1]; 2254 switch (i) { 2255 case 0: 2256 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2257 break; 2258 case 1: 2259 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2260 break; 2261 case 2: 2262 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2263 break; 2264 case 3: 2265 alu.src[1].sel = V_SQ_ALU_SRC_0; 2266 alu.src[1].chan = i; 2267 } 2268 2269 alu.src[2].sel = ctx->temp_reg; 2270 alu.src[2].neg = 1; 2271 alu.src[2].chan = i; 2272 2273 if (use_temp) 2274 alu.dst.sel = ctx->temp_reg; 2275 else { 2276 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2277 if (r) 2278 return r; 2279 } 2280 alu.dst.chan = i; 2281 alu.dst.write = 1; 2282 alu.is_op3 = 1; 2283 if (i == 3) 2284 alu.last = 1; 2285 r = r600_bc_add_alu(ctx->bc, &alu); 2286 if (r) 2287 return r; 2288 2289 r = r600_bc_add_literal(ctx->bc, ctx->value); 2290 if (r) 2291 return r; 2292 } 2293 if (use_temp) 2294 return tgsi_helper_copy(ctx, inst); 2295 return 0; 2296} 2297 2298static int tgsi_exp(struct r600_shader_ctx *ctx) 2299{ 2300 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2301 struct r600_bc_alu_src r600_src[3] = { { 0 } }; 2302 struct r600_bc_alu alu; 2303 int r; 2304 2305 /* result.x = 2^floor(src); */ 2306 if (inst->Dst[0].Register.WriteMask & 1) { 2307 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2308 2309 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2310 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2311 if (r) 2312 return r; 2313 2314 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2315 2316 alu.dst.sel = ctx->temp_reg; 2317 alu.dst.chan = 0; 2318 alu.dst.write = 1; 2319 alu.last = 1; 2320 r = r600_bc_add_alu(ctx->bc, &alu); 2321 if (r) 2322 return r; 2323 2324 r = r600_bc_add_literal(ctx->bc, ctx->value); 2325 if (r) 2326 return r; 2327 2328 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2329 alu.src[0].sel = ctx->temp_reg; 2330 alu.src[0].chan = 0; 2331 2332 alu.dst.sel = ctx->temp_reg; 2333 alu.dst.chan = 0; 2334 alu.dst.write = 1; 2335 alu.last = 1; 2336 r = r600_bc_add_alu(ctx->bc, &alu); 2337 if (r) 2338 return r; 2339 2340 r = r600_bc_add_literal(ctx->bc, ctx->value); 2341 if (r) 2342 return r; 2343 } 2344 2345 /* result.y = tmp - floor(tmp); */ 2346 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2347 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2348 2349 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2350 alu.src[0] = r600_src[0]; 2351 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2352 if (r) 2353 return r; 2354 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2355 2356 alu.dst.sel = ctx->temp_reg; 2357// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2358// if (r) 2359// return r; 2360 alu.dst.write = 1; 2361 alu.dst.chan = 1; 2362 2363 alu.last = 1; 2364 2365 r = r600_bc_add_alu(ctx->bc, &alu); 2366 if (r) 2367 return r; 2368 r = r600_bc_add_literal(ctx->bc, ctx->value); 2369 if (r) 2370 return r; 2371 } 2372 2373 /* result.z = RoughApprox2ToX(tmp);*/ 2374 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2375 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2376 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2377 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2378 if (r) 2379 return r; 2380 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2381 2382 alu.dst.sel = ctx->temp_reg; 2383 alu.dst.write = 1; 2384 alu.dst.chan = 2; 2385 2386 alu.last = 1; 2387 2388 r = r600_bc_add_alu(ctx->bc, &alu); 2389 if (r) 2390 return r; 2391 r = r600_bc_add_literal(ctx->bc, ctx->value); 2392 if (r) 2393 return r; 2394 } 2395 2396 /* result.w = 1.0;*/ 2397 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2398 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2399 2400 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2401 alu.src[0].sel = V_SQ_ALU_SRC_1; 2402 alu.src[0].chan = 0; 2403 2404 alu.dst.sel = ctx->temp_reg; 2405 alu.dst.chan = 3; 2406 alu.dst.write = 1; 2407 alu.last = 1; 2408 r = r600_bc_add_alu(ctx->bc, &alu); 2409 if (r) 2410 return r; 2411 r = r600_bc_add_literal(ctx->bc, ctx->value); 2412 if (r) 2413 return r; 2414 } 2415 return tgsi_helper_copy(ctx, inst); 2416} 2417 2418static int tgsi_log(struct r600_shader_ctx *ctx) 2419{ 2420 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2421 struct r600_bc_alu alu; 2422 int r; 2423 2424 /* result.x = floor(log2(src)); */ 2425 if (inst->Dst[0].Register.WriteMask & 1) { 2426 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2427 2428 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2429 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2430 if (r) 2431 return r; 2432 2433 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2434 2435 alu.dst.sel = ctx->temp_reg; 2436 alu.dst.chan = 0; 2437 alu.dst.write = 1; 2438 alu.last = 1; 2439 r = r600_bc_add_alu(ctx->bc, &alu); 2440 if (r) 2441 return r; 2442 2443 r = r600_bc_add_literal(ctx->bc, ctx->value); 2444 if (r) 2445 return r; 2446 2447 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2448 alu.src[0].sel = ctx->temp_reg; 2449 alu.src[0].chan = 0; 2450 2451 alu.dst.sel = ctx->temp_reg; 2452 alu.dst.chan = 0; 2453 alu.dst.write = 1; 2454 alu.last = 1; 2455 2456 r = r600_bc_add_alu(ctx->bc, &alu); 2457 if (r) 2458 return r; 2459 2460 r = r600_bc_add_literal(ctx->bc, ctx->value); 2461 if (r) 2462 return r; 2463 } 2464 2465 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2466 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2467 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2468 2469 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2470 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2471 if (r) 2472 return r; 2473 2474 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2475 2476 alu.dst.sel = ctx->temp_reg; 2477 alu.dst.chan = 1; 2478 alu.dst.write = 1; 2479 alu.last = 1; 2480 2481 r = r600_bc_add_alu(ctx->bc, &alu); 2482 if (r) 2483 return r; 2484 2485 r = r600_bc_add_literal(ctx->bc, ctx->value); 2486 if (r) 2487 return r; 2488 2489 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2490 2491 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2492 alu.src[0].sel = ctx->temp_reg; 2493 alu.src[0].chan = 1; 2494 2495 alu.dst.sel = ctx->temp_reg; 2496 alu.dst.chan = 1; 2497 alu.dst.write = 1; 2498 alu.last = 1; 2499 2500 r = r600_bc_add_alu(ctx->bc, &alu); 2501 if (r) 2502 return r; 2503 2504 r = r600_bc_add_literal(ctx->bc, ctx->value); 2505 if (r) 2506 return r; 2507 2508 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2509 2510 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2511 alu.src[0].sel = ctx->temp_reg; 2512 alu.src[0].chan = 1; 2513 2514 alu.dst.sel = ctx->temp_reg; 2515 alu.dst.chan = 1; 2516 alu.dst.write = 1; 2517 alu.last = 1; 2518 2519 r = r600_bc_add_alu(ctx->bc, &alu); 2520 if (r) 2521 return r; 2522 2523 r = r600_bc_add_literal(ctx->bc, ctx->value); 2524 if (r) 2525 return r; 2526 2527 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2528 2529 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2530 alu.src[0].sel = ctx->temp_reg; 2531 alu.src[0].chan = 1; 2532 2533 alu.dst.sel = ctx->temp_reg; 2534 alu.dst.chan = 1; 2535 alu.dst.write = 1; 2536 alu.last = 1; 2537 2538 r = r600_bc_add_alu(ctx->bc, &alu); 2539 if (r) 2540 return r; 2541 2542 r = r600_bc_add_literal(ctx->bc, ctx->value); 2543 if (r) 2544 return r; 2545 2546 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2547 2548 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2549 2550 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2551 if (r) 2552 return r; 2553 2554 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2555 2556 alu.src[1].sel = ctx->temp_reg; 2557 alu.src[1].chan = 1; 2558 2559 alu.dst.sel = ctx->temp_reg; 2560 alu.dst.chan = 1; 2561 alu.dst.write = 1; 2562 alu.last = 1; 2563 2564 r = r600_bc_add_alu(ctx->bc, &alu); 2565 if (r) 2566 return r; 2567 2568 r = r600_bc_add_literal(ctx->bc, ctx->value); 2569 if (r) 2570 return r; 2571 } 2572 2573 /* result.z = log2(src);*/ 2574 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2575 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2576 2577 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2578 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2579 if (r) 2580 return r; 2581 2582 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2583 2584 alu.dst.sel = ctx->temp_reg; 2585 alu.dst.write = 1; 2586 alu.dst.chan = 2; 2587 alu.last = 1; 2588 2589 r = r600_bc_add_alu(ctx->bc, &alu); 2590 if (r) 2591 return r; 2592 2593 r = r600_bc_add_literal(ctx->bc, ctx->value); 2594 if (r) 2595 return r; 2596 } 2597 2598 /* result.w = 1.0; */ 2599 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2600 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2601 2602 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2603 alu.src[0].sel = V_SQ_ALU_SRC_1; 2604 alu.src[0].chan = 0; 2605 2606 alu.dst.sel = ctx->temp_reg; 2607 alu.dst.chan = 3; 2608 alu.dst.write = 1; 2609 alu.last = 1; 2610 2611 r = r600_bc_add_alu(ctx->bc, &alu); 2612 if (r) 2613 return r; 2614 2615 r = r600_bc_add_literal(ctx->bc, ctx->value); 2616 if (r) 2617 return r; 2618 } 2619 2620 return tgsi_helper_copy(ctx, inst); 2621} 2622 2623/* r6/7 only for now */ 2624static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2625{ 2626 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2627 struct r600_bc_alu alu; 2628 int r; 2629 2630 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2631 2632 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2633 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2634 if (r) 2635 return r; 2636 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2637 alu.last = 1; 2638 alu.dst.chan = 0; 2639 alu.dst.sel = ctx->temp_reg; 2640 alu.dst.write = 1; 2641 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2642 if (r) 2643 return r; 2644 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2645 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2646 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2647 if (r) 2648 return r; 2649 alu.src[0].sel = ctx->temp_reg; 2650 alu.src[0].chan = 0; 2651 alu.last = 1; 2652 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2653 if (r) 2654 return r; 2655 return 0; 2656} 2657static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2658{ 2659 /* TODO from r600c, ar values don't persist between clauses */ 2660 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2661 struct r600_bc_alu alu; 2662 int r; 2663 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2664 2665 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; 2666 2667 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2668 if (r) 2669 return r; 2670 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2671 2672 alu.last = 1; 2673 2674 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2675 if (r) 2676 return r; 2677 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2678 return 0; 2679} 2680 2681static int tgsi_opdst(struct r600_shader_ctx *ctx) 2682{ 2683 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2684 struct r600_bc_alu alu; 2685 int i, r = 0; 2686 2687 for (i = 0; i < 4; i++) { 2688 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2689 2690 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2691 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2692 if (r) 2693 return r; 2694 2695 if (i == 0 || i == 3) { 2696 alu.src[0].sel = V_SQ_ALU_SRC_1; 2697 } else { 2698 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2699 if (r) 2700 return r; 2701 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2702 } 2703 2704 if (i == 0 || i == 2) { 2705 alu.src[1].sel = V_SQ_ALU_SRC_1; 2706 } else { 2707 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); 2708 if (r) 2709 return r; 2710 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2711 } 2712 if (i == 3) 2713 alu.last = 1; 2714 r = r600_bc_add_alu(ctx->bc, &alu); 2715 if (r) 2716 return r; 2717 } 2718 return 0; 2719} 2720 2721static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2722{ 2723 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2724 struct r600_bc_alu alu; 2725 int r; 2726 2727 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2728 alu.inst = opcode; 2729 alu.predicate = 1; 2730 2731 alu.dst.sel = ctx->temp_reg; 2732 alu.dst.write = 1; 2733 alu.dst.chan = 0; 2734 2735 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2736 if (r) 2737 return r; 2738 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2739 alu.src[1].sel = V_SQ_ALU_SRC_0; 2740 alu.src[1].chan = 0; 2741 2742 alu.last = 1; 2743 2744 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2745 if (r) 2746 return r; 2747 return 0; 2748} 2749 2750static int pops(struct r600_shader_ctx *ctx, int pops) 2751{ 2752 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2753 ctx->bc->cf_last->pop_count = pops; 2754 return 0; 2755} 2756 2757static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2758{ 2759 switch(reason) { 2760 case FC_PUSH_VPM: 2761 ctx->bc->callstack[ctx->bc->call_sp].current--; 2762 break; 2763 case FC_PUSH_WQM: 2764 case FC_LOOP: 2765 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2766 break; 2767 case FC_REP: 2768 /* TOODO : for 16 vp asic should -= 2; */ 2769 ctx->bc->callstack[ctx->bc->call_sp].current --; 2770 break; 2771 } 2772} 2773 2774static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2775{ 2776 if (check_max_only) { 2777 int diff; 2778 switch (reason) { 2779 case FC_PUSH_VPM: 2780 diff = 1; 2781 break; 2782 case FC_PUSH_WQM: 2783 diff = 4; 2784 break; 2785 default: 2786 assert(0); 2787 diff = 0; 2788 } 2789 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2790 ctx->bc->callstack[ctx->bc->call_sp].max) { 2791 ctx->bc->callstack[ctx->bc->call_sp].max = 2792 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2793 } 2794 return; 2795 } 2796 switch (reason) { 2797 case FC_PUSH_VPM: 2798 ctx->bc->callstack[ctx->bc->call_sp].current++; 2799 break; 2800 case FC_PUSH_WQM: 2801 case FC_LOOP: 2802 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2803 break; 2804 case FC_REP: 2805 ctx->bc->callstack[ctx->bc->call_sp].current++; 2806 break; 2807 } 2808 2809 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2810 ctx->bc->callstack[ctx->bc->call_sp].max) { 2811 ctx->bc->callstack[ctx->bc->call_sp].max = 2812 ctx->bc->callstack[ctx->bc->call_sp].current; 2813 } 2814} 2815 2816static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2817{ 2818 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2819 2820 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2821 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2822 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2823 sp->num_mid++; 2824} 2825 2826static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2827{ 2828 ctx->bc->fc_sp++; 2829 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2830 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2831} 2832 2833static void fc_poplevel(struct r600_shader_ctx *ctx) 2834{ 2835 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2836 if (sp->mid) { 2837 free(sp->mid); 2838 sp->mid = NULL; 2839 } 2840 sp->num_mid = 0; 2841 sp->start = NULL; 2842 sp->type = 0; 2843 ctx->bc->fc_sp--; 2844} 2845 2846#if 0 2847static int emit_return(struct r600_shader_ctx *ctx) 2848{ 2849 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2850 return 0; 2851} 2852 2853static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2854{ 2855 2856 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2857 ctx->bc->cf_last->pop_count = pops; 2858 /* TODO work out offset */ 2859 return 0; 2860} 2861 2862static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2863{ 2864 return 0; 2865} 2866 2867static void emit_testflag(struct r600_shader_ctx *ctx) 2868{ 2869 2870} 2871 2872static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2873{ 2874 emit_testflag(ctx); 2875 emit_jump_to_offset(ctx, 1, 4); 2876 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2877 pops(ctx, ifidx + 1); 2878 emit_return(ctx); 2879} 2880 2881static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2882{ 2883 emit_testflag(ctx); 2884 2885 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2886 ctx->bc->cf_last->pop_count = 1; 2887 2888 fc_set_mid(ctx, fc_sp); 2889 2890 pops(ctx, 1); 2891} 2892#endif 2893 2894static int tgsi_if(struct r600_shader_ctx *ctx) 2895{ 2896 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2897 2898 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2899 2900 fc_pushlevel(ctx, FC_IF); 2901 2902 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2903 return 0; 2904} 2905 2906static int tgsi_else(struct r600_shader_ctx *ctx) 2907{ 2908 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2909 ctx->bc->cf_last->pop_count = 1; 2910 2911 fc_set_mid(ctx, ctx->bc->fc_sp); 2912 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2913 return 0; 2914} 2915 2916static int tgsi_endif(struct r600_shader_ctx *ctx) 2917{ 2918 pops(ctx, 1); 2919 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2920 R600_ERR("if/endif unbalanced in shader\n"); 2921 return -1; 2922 } 2923 2924 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2925 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2926 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2927 } else { 2928 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2929 } 2930 fc_poplevel(ctx); 2931 2932 callstack_decrease_current(ctx, FC_PUSH_VPM); 2933 return 0; 2934} 2935 2936static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2937{ 2938 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2939 2940 fc_pushlevel(ctx, FC_LOOP); 2941 2942 /* check stack depth */ 2943 callstack_check_depth(ctx, FC_LOOP, 0); 2944 return 0; 2945} 2946 2947static int tgsi_endloop(struct r600_shader_ctx *ctx) 2948{ 2949 int i; 2950 2951 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2952 2953 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2954 R600_ERR("loop/endloop in shader code are not paired.\n"); 2955 return -EINVAL; 2956 } 2957 2958 /* fixup loop pointers - from r600isa 2959 LOOP END points to CF after LOOP START, 2960 LOOP START point to CF after LOOP END 2961 BRK/CONT point to LOOP END CF 2962 */ 2963 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2964 2965 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2966 2967 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2968 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2969 } 2970 /* TODO add LOOPRET support */ 2971 fc_poplevel(ctx); 2972 callstack_decrease_current(ctx, FC_LOOP); 2973 return 0; 2974} 2975 2976static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2977{ 2978 unsigned int fscp; 2979 2980 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2981 { 2982 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2983 break; 2984 } 2985 2986 if (fscp == 0) { 2987 R600_ERR("Break not inside loop/endloop pair\n"); 2988 return -EINVAL; 2989 } 2990 2991 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2992 ctx->bc->cf_last->pop_count = 1; 2993 2994 fc_set_mid(ctx, fscp); 2995 2996 pops(ctx, 1); 2997 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2998 return 0; 2999} 3000 3001static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 3002 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3003 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3004 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3005 3006 /* FIXME: 3007 * For state trackers other than OpenGL, we'll want to use 3008 * _RECIP_IEEE instead. 3009 */ 3010 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 3011 3012 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 3013 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3014 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3015 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3016 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3017 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3018 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3019 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3020 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3021 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3022 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3023 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3024 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3025 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3026 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3027 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3028 /* gap */ 3029 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3030 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3031 /* gap */ 3032 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3033 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3034 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3035 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3036 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3037 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3038 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3039 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3040 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3041 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3042 /* gap */ 3043 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3044 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3045 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3046 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3047 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3048 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3049 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3050 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3051 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3052 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3053 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3054 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3055 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3056 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3057 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3058 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3059 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3060 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3061 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3062 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3063 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3064 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3065 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3066 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3067 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3068 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3069 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3070 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3071 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3072 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3073 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3074 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3075 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3076 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3077 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3078 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3079 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3080 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3081 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3082 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3083 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3084 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3085 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3086 /* gap */ 3087 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3088 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3089 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3090 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3091 /* gap */ 3092 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3093 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3094 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3095 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3096 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3097 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3098 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3099 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3100 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3101 /* gap */ 3102 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3103 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3104 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3105 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3106 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3107 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3108 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3109 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3110 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3111 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3112 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3113 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3114 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3115 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3116 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3117 /* gap */ 3118 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3119 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3120 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3121 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3122 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3123 /* gap */ 3124 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3125 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3126 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3127 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3128 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3129 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3130 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3131 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3132 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3133 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3134 /* gap */ 3135 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3136 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3137 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3138 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3139 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3140 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3141 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3142 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3143 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3144 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3145 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3146 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3147 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3148 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3149 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3150 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3151 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3152 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3153 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3154 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3155 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3156 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3157 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3158 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3159 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3160 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3161 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3162 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3163}; 3164 3165static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 3166 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3167 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3168 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3169 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3170 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 3171 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3172 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3173 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3174 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3175 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3176 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3177 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3178 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3179 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3180 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3181 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3182 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3183 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3184 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3185 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3186 /* gap */ 3187 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3188 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3189 /* gap */ 3190 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3191 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3192 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3193 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3194 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3195 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3196 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3197 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3198 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3199 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3200 /* gap */ 3201 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3202 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3203 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3204 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3205 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3206 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3207 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3208 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3209 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3210 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3211 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3212 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3213 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3214 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3215 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3216 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3217 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3218 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3219 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3220 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3221 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3222 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3223 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3224 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3225 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3226 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3227 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3228 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3229 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3230 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3231 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3232 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3233 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3234 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3235 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3236 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3237 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3238 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3239 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3240 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3241 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3242 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3243 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3244 /* gap */ 3245 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3246 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3247 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3248 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3249 /* gap */ 3250 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3251 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3252 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3253 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3254 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3255 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3256 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3257 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3258 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3259 /* gap */ 3260 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3261 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3262 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3263 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3264 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3265 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3266 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3267 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3268 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3269 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3270 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3271 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3272 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3273 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3274 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3275 /* gap */ 3276 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3277 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3278 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3279 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3280 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3281 /* gap */ 3282 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3283 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3284 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3285 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3286 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3287 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3288 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3289 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3290 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3291 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3292 /* gap */ 3293 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3294 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3295 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3296 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3297 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3298 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3299 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3300 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3301 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3302 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3303 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3304 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3305 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3306 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3307 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3308 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3309 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3310 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3311 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3312 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3313 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3314 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3315 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3316 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3317 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3318 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3319 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3320 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3321}; 3322