r600_shader.c revision 29c4a15bf61a76cd71ffa5b8f09706d0eab84281
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_pipe.h" 29#include "r600_asm.h" 30#include "r600_sq.h" 31#include "r600_opcodes.h" 32#include "r600d.h" 33#include <stdio.h> 34#include <errno.h> 35 36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) 37{ 38 struct r600_pipe_state *rstate = &shader->rstate; 39 struct r600_shader *rshader = &shader->shader; 40 unsigned spi_vs_out_id[10]; 41 unsigned i, tmp; 42 43 /* clear previous register */ 44 rstate->nregs = 0; 45 46 /* so far never got proper semantic id from tgsi */ 47 for (i = 0; i < 10; i++) { 48 spi_vs_out_id[i] = 0; 49 } 50 for (i = 0; i < 32; i++) { 51 tmp = i << ((i & 3) * 8); 52 spi_vs_out_id[i / 4] |= tmp; 53 } 54 for (i = 0; i < 10; i++) { 55 r600_pipe_state_add_reg(rstate, 56 R_028614_SPI_VS_OUT_ID_0 + i * 4, 57 spi_vs_out_id[i], 0xFFFFFFFF, NULL); 58 } 59 60 r600_pipe_state_add_reg(rstate, 61 R_0286C4_SPI_VS_OUT_CONFIG, 62 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), 63 0xFFFFFFFF, NULL); 64 r600_pipe_state_add_reg(rstate, 65 R_028868_SQ_PGM_RESOURCES_VS, 66 S_028868_NUM_GPRS(rshader->bc.ngpr) | 67 S_028868_STACK_SIZE(rshader->bc.nstack), 68 0xFFFFFFFF, NULL); 69 r600_pipe_state_add_reg(rstate, 70 R_0288A4_SQ_PGM_RESOURCES_FS, 71 0x00000000, 0xFFFFFFFF, NULL); 72 r600_pipe_state_add_reg(rstate, 73 R_0288D0_SQ_PGM_CF_OFFSET_VS, 74 0x00000000, 0xFFFFFFFF, NULL); 75 r600_pipe_state_add_reg(rstate, 76 R_0288DC_SQ_PGM_CF_OFFSET_FS, 77 0x00000000, 0xFFFFFFFF, NULL); 78 r600_pipe_state_add_reg(rstate, 79 R_028858_SQ_PGM_START_VS, 80 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 81 r600_pipe_state_add_reg(rstate, 82 R_028894_SQ_PGM_START_FS, 83 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 84 85 r600_pipe_state_add_reg(rstate, 86 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, 87 0xFFFFFFFF, NULL); 88 89} 90 91int r600_find_vs_semantic_index(struct r600_shader *vs, 92 struct r600_shader *ps, int id) 93{ 94 struct r600_shader_io *input = &ps->input[id]; 95 96 for (int i = 0; i < vs->noutput; i++) { 97 if (input->name == vs->output[i].name && 98 input->sid == vs->output[i].sid) { 99 return i - 1; 100 } 101 } 102 return 0; 103} 104 105static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) 106{ 107 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 108 struct r600_pipe_state *rstate = &shader->rstate; 109 struct r600_shader *rshader = &shader->shader; 110 unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; 111 int pos_index = -1, face_index = -1; 112 113 /* clear previous register */ 114 rstate->nregs = 0; 115 116 for (i = 0; i < rshader->ninput; i++) { 117 tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); 118 if (rshader->input[i].centroid) 119 tmp |= S_028644_SEL_CENTROID(1); 120 if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) 121 tmp |= S_028644_SEL_LINEAR(1); 122 123 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 124 pos_index = i; 125 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || 126 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || 127 rshader->input[i].name == TGSI_SEMANTIC_POSITION) { 128 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); 129 } 130 if (rshader->input[i].name == TGSI_SEMANTIC_FACE) 131 face_index = i; 132 if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && 133 rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { 134 tmp |= S_028644_PT_SPRITE_TEX(1); 135 } 136 r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); 137 } 138 for (i = 0; i < rshader->noutput; i++) { 139 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 140 r600_pipe_state_add_reg(rstate, 141 R_02880C_DB_SHADER_CONTROL, 142 S_02880C_Z_EXPORT_ENABLE(1), 143 S_02880C_Z_EXPORT_ENABLE(1), NULL); 144 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 145 r600_pipe_state_add_reg(rstate, 146 R_02880C_DB_SHADER_CONTROL, 147 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), 148 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); 149 } 150 151 exports_ps = 0; 152 num_cout = 0; 153 for (i = 0; i < rshader->noutput; i++) { 154 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 155 exports_ps |= 1; 156 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { 157 num_cout++; 158 } 159 } 160 exports_ps |= S_028854_EXPORT_COLORS(num_cout); 161 if (!exports_ps) { 162 /* always at least export 1 component per pixel */ 163 exports_ps = 2; 164 } 165 166 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | 167 S_0286CC_PERSP_GRADIENT_ENA(1); 168 spi_input_z = 0; 169 if (pos_index != -1) { 170 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | 171 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | 172 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | 173 S_0286CC_BARYC_SAMPLE_CNTL(1)); 174 spi_input_z |= 1; 175 } 176 177 spi_ps_in_control_1 = 0; 178 if (face_index != -1) { 179 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | 180 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); 181 } 182 183 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); 184 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); 185 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); 186 r600_pipe_state_add_reg(rstate, 187 R_028840_SQ_PGM_START_PS, 188 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 189 r600_pipe_state_add_reg(rstate, 190 R_028850_SQ_PGM_RESOURCES_PS, 191 S_028868_NUM_GPRS(rshader->bc.ngpr) | 192 S_028868_STACK_SIZE(rshader->bc.nstack), 193 0xFFFFFFFF, NULL); 194 r600_pipe_state_add_reg(rstate, 195 R_028854_SQ_PGM_EXPORTS_PS, 196 exports_ps, 0xFFFFFFFF, NULL); 197 r600_pipe_state_add_reg(rstate, 198 R_0288CC_SQ_PGM_CF_OFFSET_PS, 199 0x00000000, 0xFFFFFFFF, NULL); 200 201 if (rshader->uses_kill) { 202 /* only set some bits here, the other bits are set in the dsa state */ 203 r600_pipe_state_add_reg(rstate, 204 R_02880C_DB_SHADER_CONTROL, 205 S_02880C_KILL_ENABLE(1), 206 S_02880C_KILL_ENABLE(1), NULL); 207 } 208 r600_pipe_state_add_reg(rstate, 209 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, 210 0xFFFFFFFF, NULL); 211} 212 213static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 214{ 215 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 216 struct r600_shader *rshader = &shader->shader; 217 void *ptr; 218 219 /* copy new shader */ 220 if (shader->bo == NULL) { 221 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); 222 if (shader->bo == NULL) { 223 return -ENOMEM; 224 } 225 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 226 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); 227 r600_bo_unmap(rctx->radeon, shader->bo); 228 } 229 /* build state */ 230 rshader->flat_shade = rctx->flatshade; 231 switch (rshader->processor_type) { 232 case TGSI_PROCESSOR_VERTEX: 233 if (rshader->family >= CHIP_CEDAR) { 234 evergreen_pipe_shader_vs(ctx, shader); 235 } else { 236 r600_pipe_shader_vs(ctx, shader); 237 } 238 break; 239 case TGSI_PROCESSOR_FRAGMENT: 240 if (rshader->family >= CHIP_CEDAR) { 241 evergreen_pipe_shader_ps(ctx, shader); 242 } else { 243 r600_pipe_shader_ps(ctx, shader); 244 } 245 break; 246 default: 247 return -EINVAL; 248 } 249 r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); 250 return 0; 251} 252 253static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) 254{ 255 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 256 struct r600_shader *shader = &rshader->shader; 257 const struct util_format_description *desc; 258 enum pipe_format resource_format[160]; 259 unsigned i, nresources = 0; 260 struct r600_bc *bc = &shader->bc; 261 struct r600_bc_cf *cf; 262 struct r600_bc_vtx *vtx; 263 264 if (shader->processor_type != TGSI_PROCESSOR_VERTEX) 265 return 0; 266 /* doing a full memcmp fell over the refcount */ 267 if ((rshader->vertex_elements.count == rctx->vertex_elements->count) && 268 (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 32 * sizeof(struct pipe_vertex_element)))) { 269 return 0; 270 } 271 rshader->vertex_elements = *rctx->vertex_elements; 272 for (i = 0; i < rctx->vertex_elements->count; i++) { 273 resource_format[nresources++] = rctx->vertex_elements->hw_format[i]; 274 } 275 r600_bo_reference(rctx->radeon, &rshader->bo, NULL); 276 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 277 switch (cf->inst) { 278 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 279 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 280 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 281 desc = util_format_description(resource_format[vtx->buffer_id]); 282 if (desc == NULL) { 283 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); 284 return -EINVAL; 285 } 286 vtx->dst_sel_x = desc->swizzle[0]; 287 vtx->dst_sel_y = desc->swizzle[1]; 288 vtx->dst_sel_z = desc->swizzle[2]; 289 vtx->dst_sel_w = desc->swizzle[3]; 290 } 291 break; 292 default: 293 break; 294 } 295 } 296 return r600_bc_build(&shader->bc); 297} 298 299int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader) 300{ 301 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 302 int r; 303 304 if (shader == NULL) 305 return -EINVAL; 306 /* there should be enough input */ 307 if (rctx->vertex_elements->count < shader->shader.bc.nresource) { 308 R600_ERR("%d resources provided, expecting %d\n", 309 rctx->vertex_elements->count, shader->shader.bc.nresource); 310 return -EINVAL; 311 } 312 r = r600_shader_update(ctx, shader); 313 if (r) 314 return r; 315 return r600_pipe_shader(ctx, shader); 316} 317 318int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 319int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 320{ 321 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 322 int r; 323 324//fprintf(stderr, "--------------------------------------------------------------\n"); 325//tgsi_dump(tokens, 0); 326 shader->shader.family = r600_get_family(rctx->radeon); 327 r = r600_shader_from_tgsi(tokens, &shader->shader); 328 if (r) { 329 R600_ERR("translation from TGSI failed !\n"); 330 return r; 331 } 332 r = r600_bc_build(&shader->shader.bc); 333 if (r) { 334 R600_ERR("building bytecode failed !\n"); 335 return r; 336 } 337//fprintf(stderr, "______________________________________________________________\n"); 338 return 0; 339} 340 341void 342r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 343{ 344 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 345 346 r600_bo_reference(rctx->radeon, &shader->bo, NULL); 347 348 r600_bc_clear(&shader->shader.bc); 349 350 /* FIXME: is there more stuff to free? */ 351} 352 353/* 354 * tgsi -> r600 shader 355 */ 356struct r600_shader_tgsi_instruction; 357 358struct r600_shader_ctx { 359 struct tgsi_shader_info info; 360 struct tgsi_parse_context parse; 361 const struct tgsi_token *tokens; 362 unsigned type; 363 unsigned file_offset[TGSI_FILE_COUNT]; 364 unsigned temp_reg; 365 struct r600_shader_tgsi_instruction *inst_info; 366 struct r600_bc *bc; 367 struct r600_shader *shader; 368 u32 value[4]; 369 u32 *literals; 370 u32 nliterals; 371 u32 max_driver_temp_used; 372 /* needed for evergreen interpolation */ 373 boolean input_centroid; 374 boolean input_linear; 375 boolean input_perspective; 376 int num_interp_gpr; 377}; 378 379struct r600_shader_tgsi_instruction { 380 unsigned tgsi_opcode; 381 unsigned is_op3; 382 unsigned r600_opcode; 383 int (*process)(struct r600_shader_ctx *ctx); 384}; 385 386static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 387static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 388 389static int tgsi_is_supported(struct r600_shader_ctx *ctx) 390{ 391 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 392 int j; 393 394 if (i->Instruction.NumDstRegs > 1) { 395 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 396 return -EINVAL; 397 } 398 if (i->Instruction.Predicate) { 399 R600_ERR("predicate unsupported\n"); 400 return -EINVAL; 401 } 402#if 0 403 if (i->Instruction.Label) { 404 R600_ERR("label unsupported\n"); 405 return -EINVAL; 406 } 407#endif 408 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 409 if (i->Src[j].Register.Dimension) { 410 R600_ERR("unsupported src %d (dimension %d)\n", j, 411 i->Src[j].Register.Dimension); 412 return -EINVAL; 413 } 414 } 415 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 416 if (i->Dst[j].Register.Dimension) { 417 R600_ERR("unsupported dst (dimension)\n"); 418 return -EINVAL; 419 } 420 } 421 return 0; 422} 423 424static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 425{ 426 int i, r; 427 struct r600_bc_alu alu; 428 int gpr = 0, base_chan = 0; 429 int ij_index = 0; 430 431 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 432 ij_index = 0; 433 if (ctx->shader->input[input].centroid) 434 ij_index++; 435 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 436 ij_index = 0; 437 /* if we have perspective add one */ 438 if (ctx->input_perspective) { 439 ij_index++; 440 /* if we have perspective centroid */ 441 if (ctx->input_centroid) 442 ij_index++; 443 } 444 if (ctx->shader->input[input].centroid) 445 ij_index++; 446 } 447 448 /* work out gpr and base_chan from index */ 449 gpr = ij_index / 2; 450 base_chan = (2 * (ij_index % 2)) + 1; 451 452 for (i = 0; i < 8; i++) { 453 memset(&alu, 0, sizeof(struct r600_bc_alu)); 454 455 if (i < 4) 456 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 457 else 458 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 459 460 if ((i > 1) && (i < 6)) { 461 alu.dst.sel = ctx->shader->input[input].gpr; 462 alu.dst.write = 1; 463 } 464 465 alu.dst.chan = i % 4; 466 467 alu.src[0].sel = gpr; 468 alu.src[0].chan = (base_chan - (i % 2)); 469 470 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 471 472 alu.bank_swizzle_force = SQ_ALU_VEC_210; 473 if ((i % 4) == 3) 474 alu.last = 1; 475 r = r600_bc_add_alu(ctx->bc, &alu); 476 if (r) 477 return r; 478 } 479 return 0; 480} 481 482 483static int tgsi_declaration(struct r600_shader_ctx *ctx) 484{ 485 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 486 struct r600_bc_vtx vtx; 487 unsigned i; 488 int r; 489 490 switch (d->Declaration.File) { 491 case TGSI_FILE_INPUT: 492 i = ctx->shader->ninput++; 493 ctx->shader->input[i].name = d->Semantic.Name; 494 ctx->shader->input[i].sid = d->Semantic.Index; 495 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 496 ctx->shader->input[i].centroid = d->Declaration.Centroid; 497 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 498 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 499 /* turn input into fetch */ 500 memset(&vtx, 0, sizeof(struct r600_bc_vtx)); 501 vtx.inst = 0; 502 vtx.fetch_type = 0; 503 vtx.buffer_id = i; 504 /* register containing the index into the buffer */ 505 vtx.src_gpr = 0; 506 vtx.src_sel_x = 0; 507 vtx.mega_fetch_count = 0x1F; 508 vtx.dst_gpr = ctx->shader->input[i].gpr; 509 vtx.dst_sel_x = 0; 510 vtx.dst_sel_y = 1; 511 vtx.dst_sel_z = 2; 512 vtx.dst_sel_w = 3; 513 vtx.use_const_fields = 1; 514 r = r600_bc_add_vtx(ctx->bc, &vtx); 515 if (r) 516 return r; 517 } 518 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { 519 /* turn input into interpolate on EG */ 520 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 521 if (ctx->shader->input[i].interpolate > 0) { 522 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 523 evergreen_interp_alu(ctx, i); 524 } 525 } 526 } 527 break; 528 case TGSI_FILE_OUTPUT: 529 i = ctx->shader->noutput++; 530 ctx->shader->output[i].name = d->Semantic.Name; 531 ctx->shader->output[i].sid = d->Semantic.Index; 532 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 533 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 534 break; 535 case TGSI_FILE_CONSTANT: 536 case TGSI_FILE_TEMPORARY: 537 case TGSI_FILE_SAMPLER: 538 case TGSI_FILE_ADDRESS: 539 break; 540 default: 541 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 542 return -EINVAL; 543 } 544 return 0; 545} 546 547static int r600_get_temp(struct r600_shader_ctx *ctx) 548{ 549 return ctx->temp_reg + ctx->max_driver_temp_used++; 550} 551 552/* 553 * for evergreen we need to scan the shader to find the number of GPRs we need to 554 * reserve for interpolation. 555 * 556 * we need to know if we are going to emit 557 * any centroid inputs 558 * if perspective and linear are required 559*/ 560static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 561{ 562 int i; 563 int num_baryc; 564 565 ctx->input_linear = FALSE; 566 ctx->input_perspective = FALSE; 567 ctx->input_centroid = FALSE; 568 ctx->num_interp_gpr = 1; 569 570 /* any centroid inputs */ 571 for (i = 0; i < ctx->info.num_inputs; i++) { 572 /* skip position/face */ 573 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 574 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 575 continue; 576 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 577 ctx->input_linear = TRUE; 578 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 579 ctx->input_perspective = TRUE; 580 if (ctx->info.input_centroid[i]) 581 ctx->input_centroid = TRUE; 582 } 583 584 num_baryc = 0; 585 /* ignoring sample for now */ 586 if (ctx->input_perspective) 587 num_baryc++; 588 if (ctx->input_linear) 589 num_baryc++; 590 if (ctx->input_centroid) 591 num_baryc *= 2; 592 593 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 594 595 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 596 return ctx->num_interp_gpr; 597} 598 599int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 600{ 601 struct tgsi_full_immediate *immediate; 602 struct r600_shader_ctx ctx; 603 struct r600_bc_output output[32]; 604 unsigned output_done, noutput; 605 unsigned opcode; 606 int i, r = 0, pos0; 607 608 ctx.bc = &shader->bc; 609 ctx.shader = shader; 610 r = r600_bc_init(ctx.bc, shader->family); 611 if (r) 612 return r; 613 ctx.tokens = tokens; 614 tgsi_scan_shader(tokens, &ctx.info); 615 tgsi_parse_init(&ctx.parse, tokens); 616 ctx.type = ctx.parse.FullHeader.Processor.Processor; 617 shader->processor_type = ctx.type; 618 619 /* register allocations */ 620 /* Values [0,127] correspond to GPR[0..127]. 621 * Values [128,159] correspond to constant buffer bank 0 622 * Values [160,191] correspond to constant buffer bank 1 623 * Values [256,511] correspond to cfile constants c[0..255]. 624 * Other special values are shown in the list below. 625 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 626 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 627 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 628 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 629 * 248 SQ_ALU_SRC_0: special constant 0.0. 630 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 631 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 632 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 633 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 634 * 253 SQ_ALU_SRC_LITERAL: literal constant. 635 * 254 SQ_ALU_SRC_PV: previous vector result. 636 * 255 SQ_ALU_SRC_PS: previous scalar result. 637 */ 638 for (i = 0; i < TGSI_FILE_COUNT; i++) { 639 ctx.file_offset[i] = 0; 640 } 641 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 642 ctx.file_offset[TGSI_FILE_INPUT] = 1; 643 } 644 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) { 645 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 646 } 647 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 648 ctx.info.file_count[TGSI_FILE_INPUT]; 649 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 650 ctx.info.file_count[TGSI_FILE_OUTPUT]; 651 652 ctx.file_offset[TGSI_FILE_CONSTANT] = 128; 653 654 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 655 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 656 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 657 658 ctx.nliterals = 0; 659 ctx.literals = NULL; 660 661 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 662 tgsi_parse_token(&ctx.parse); 663 switch (ctx.parse.FullToken.Token.Type) { 664 case TGSI_TOKEN_TYPE_IMMEDIATE: 665 immediate = &ctx.parse.FullToken.FullImmediate; 666 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 667 if(ctx.literals == NULL) { 668 r = -ENOMEM; 669 goto out_err; 670 } 671 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 672 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 673 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 674 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 675 ctx.nliterals++; 676 break; 677 case TGSI_TOKEN_TYPE_DECLARATION: 678 r = tgsi_declaration(&ctx); 679 if (r) 680 goto out_err; 681 break; 682 case TGSI_TOKEN_TYPE_INSTRUCTION: 683 r = tgsi_is_supported(&ctx); 684 if (r) 685 goto out_err; 686 ctx.max_driver_temp_used = 0; 687 /* reserve first tmp for everyone */ 688 r600_get_temp(&ctx); 689 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 690 if (ctx.bc->chiprev == 2) 691 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 692 else 693 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 694 r = ctx.inst_info->process(&ctx); 695 if (r) 696 goto out_err; 697 r = r600_bc_add_literal(ctx.bc, ctx.value); 698 if (r) 699 goto out_err; 700 break; 701 default: 702 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 703 r = -EINVAL; 704 goto out_err; 705 } 706 } 707 /* export output */ 708 noutput = shader->noutput; 709 for (i = 0, pos0 = 0; i < noutput; i++) { 710 memset(&output[i], 0, sizeof(struct r600_bc_output)); 711 output[i].gpr = shader->output[i].gpr; 712 output[i].elem_size = 3; 713 output[i].swizzle_x = 0; 714 output[i].swizzle_y = 1; 715 output[i].swizzle_z = 2; 716 output[i].swizzle_w = 3; 717 output[i].barrier = 1; 718 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 719 output[i].array_base = i - pos0; 720 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 721 switch (ctx.type) { 722 case TGSI_PROCESSOR_VERTEX: 723 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 724 output[i].array_base = 60; 725 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 726 /* position doesn't count in array_base */ 727 pos0++; 728 } 729 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 730 output[i].array_base = 61; 731 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 732 /* position doesn't count in array_base */ 733 pos0++; 734 } 735 break; 736 case TGSI_PROCESSOR_FRAGMENT: 737 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 738 output[i].array_base = shader->output[i].sid; 739 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 740 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 741 output[i].array_base = 61; 742 output[i].swizzle_x = 2; 743 output[i].swizzle_y = 7; 744 output[i].swizzle_z = output[i].swizzle_w = 7; 745 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 746 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 747 output[i].array_base = 61; 748 output[i].swizzle_x = 7; 749 output[i].swizzle_y = 1; 750 output[i].swizzle_z = output[i].swizzle_w = 7; 751 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 752 } else { 753 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 754 r = -EINVAL; 755 goto out_err; 756 } 757 break; 758 default: 759 R600_ERR("unsupported processor type %d\n", ctx.type); 760 r = -EINVAL; 761 goto out_err; 762 } 763 } 764 /* add fake param output for vertex shader if no param is exported */ 765 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 766 for (i = 0, pos0 = 0; i < noutput; i++) { 767 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 768 pos0 = 1; 769 break; 770 } 771 } 772 if (!pos0) { 773 memset(&output[i], 0, sizeof(struct r600_bc_output)); 774 output[i].gpr = 0; 775 output[i].elem_size = 3; 776 output[i].swizzle_x = 0; 777 output[i].swizzle_y = 1; 778 output[i].swizzle_z = 2; 779 output[i].swizzle_w = 3; 780 output[i].barrier = 1; 781 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 782 output[i].array_base = 0; 783 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 784 noutput++; 785 } 786 } 787 /* add fake pixel export */ 788 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 789 memset(&output[0], 0, sizeof(struct r600_bc_output)); 790 output[0].gpr = 0; 791 output[0].elem_size = 3; 792 output[0].swizzle_x = 7; 793 output[0].swizzle_y = 7; 794 output[0].swizzle_z = 7; 795 output[0].swizzle_w = 7; 796 output[0].barrier = 1; 797 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 798 output[0].array_base = 0; 799 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 800 noutput++; 801 } 802 /* set export done on last export of each type */ 803 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 804 if (i == (noutput - 1)) { 805 output[i].end_of_program = 1; 806 } 807 if (!(output_done & (1 << output[i].type))) { 808 output_done |= (1 << output[i].type); 809 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 810 } 811 } 812 /* add output to bytecode */ 813 for (i = 0; i < noutput; i++) { 814 r = r600_bc_add_output(ctx.bc, &output[i]); 815 if (r) 816 goto out_err; 817 } 818 free(ctx.literals); 819 tgsi_parse_free(&ctx.parse); 820 return 0; 821out_err: 822 free(ctx.literals); 823 tgsi_parse_free(&ctx.parse); 824 return r; 825} 826 827static int tgsi_unsupported(struct r600_shader_ctx *ctx) 828{ 829 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 830 return -EINVAL; 831} 832 833static int tgsi_end(struct r600_shader_ctx *ctx) 834{ 835 return 0; 836} 837 838static int tgsi_src(struct r600_shader_ctx *ctx, 839 const struct tgsi_full_src_register *tgsi_src, 840 struct r600_bc_alu_src *r600_src) 841{ 842 int index; 843 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 844 r600_src->sel = tgsi_src->Register.Index; 845 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 846 r600_src->sel = 0; 847 index = tgsi_src->Register.Index; 848 ctx->value[0] = ctx->literals[index * 4 + 0]; 849 ctx->value[1] = ctx->literals[index * 4 + 1]; 850 ctx->value[2] = ctx->literals[index * 4 + 2]; 851 ctx->value[3] = ctx->literals[index * 4 + 3]; 852 } 853 if (tgsi_src->Register.Indirect) 854 r600_src->rel = V_SQ_REL_RELATIVE; 855 r600_src->neg = tgsi_src->Register.Negate; 856 r600_src->abs = tgsi_src->Register.Absolute; 857 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 858 return 0; 859} 860 861static int tgsi_dst(struct r600_shader_ctx *ctx, 862 const struct tgsi_full_dst_register *tgsi_dst, 863 unsigned swizzle, 864 struct r600_bc_alu_dst *r600_dst) 865{ 866 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 867 868 r600_dst->sel = tgsi_dst->Register.Index; 869 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 870 r600_dst->chan = swizzle; 871 r600_dst->write = 1; 872 if (tgsi_dst->Register.Indirect) 873 r600_dst->rel = V_SQ_REL_RELATIVE; 874 if (inst->Instruction.Saturate) { 875 r600_dst->clamp = 1; 876 } 877 return 0; 878} 879 880static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 881{ 882 switch (swizzle) { 883 case 0: 884 return tgsi_src->Register.SwizzleX; 885 case 1: 886 return tgsi_src->Register.SwizzleY; 887 case 2: 888 return tgsi_src->Register.SwizzleZ; 889 case 3: 890 return tgsi_src->Register.SwizzleW; 891 default: 892 return 0; 893 } 894} 895 896static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 897{ 898 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 899 struct r600_bc_alu alu; 900 int i, j, k, nconst, r; 901 902 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 903 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 904 nconst++; 905 } 906 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 907 if (r) { 908 return r; 909 } 910 } 911 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 912 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 913 int treg = r600_get_temp(ctx); 914 for (k = 0; k < 4; k++) { 915 memset(&alu, 0, sizeof(struct r600_bc_alu)); 916 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 917 alu.src[0].sel = r600_src[i].sel; 918 alu.src[0].chan = k; 919 alu.src[0].rel = r600_src[i].rel; 920 alu.dst.sel = treg; 921 alu.dst.chan = k; 922 alu.dst.write = 1; 923 if (k == 3) 924 alu.last = 1; 925 r = r600_bc_add_alu(ctx->bc, &alu); 926 if (r) 927 return r; 928 } 929 r600_src[i].sel = treg; 930 r600_src[i].rel =0; 931 j--; 932 } 933 } 934 return 0; 935} 936 937/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 938static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 939{ 940 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 941 struct r600_bc_alu alu; 942 int i, j, k, nliteral, r; 943 944 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 945 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 946 nliteral++; 947 } 948 } 949 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 950 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 951 int treg = r600_get_temp(ctx); 952 for (k = 0; k < 4; k++) { 953 memset(&alu, 0, sizeof(struct r600_bc_alu)); 954 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 955 alu.src[0].sel = r600_src[i].sel; 956 alu.src[0].chan = k; 957 alu.dst.sel = treg; 958 alu.dst.chan = k; 959 alu.dst.write = 1; 960 if (k == 3) 961 alu.last = 1; 962 r = r600_bc_add_alu(ctx->bc, &alu); 963 if (r) 964 return r; 965 } 966 r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); 967 if (r) 968 return r; 969 r600_src[i].sel = treg; 970 j--; 971 } 972 } 973 return 0; 974} 975 976static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 977{ 978 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 979 struct r600_bc_alu_src r600_src[3]; 980 struct r600_bc_alu alu; 981 int i, j, r; 982 int lasti = 0; 983 984 for (i = 0; i < 4; i++) { 985 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 986 lasti = i; 987 } 988 } 989 990 r = tgsi_split_constant(ctx, r600_src); 991 if (r) 992 return r; 993 r = tgsi_split_literal_constant(ctx, r600_src); 994 if (r) 995 return r; 996 for (i = 0; i < lasti + 1; i++) { 997 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 998 continue; 999 1000 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1001 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1002 if (r) 1003 return r; 1004 1005 alu.inst = ctx->inst_info->r600_opcode; 1006 if (!swap) { 1007 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1008 alu.src[j] = r600_src[j]; 1009 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1010 } 1011 } else { 1012 alu.src[0] = r600_src[1]; 1013 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 1014 1015 alu.src[1] = r600_src[0]; 1016 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1017 } 1018 /* handle some special cases */ 1019 switch (ctx->inst_info->tgsi_opcode) { 1020 case TGSI_OPCODE_SUB: 1021 alu.src[1].neg = 1; 1022 break; 1023 case TGSI_OPCODE_ABS: 1024 alu.src[0].abs = 1; 1025 break; 1026 default: 1027 break; 1028 } 1029 if (i == lasti) { 1030 alu.last = 1; 1031 } 1032 r = r600_bc_add_alu(ctx->bc, &alu); 1033 if (r) 1034 return r; 1035 } 1036 return 0; 1037} 1038 1039static int tgsi_op2(struct r600_shader_ctx *ctx) 1040{ 1041 return tgsi_op2_s(ctx, 0); 1042} 1043 1044static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1045{ 1046 return tgsi_op2_s(ctx, 1); 1047} 1048 1049/* 1050 * r600 - trunc to -PI..PI range 1051 * r700 - normalize by dividing by 2PI 1052 * see fdo bug 27901 1053 */ 1054static int tgsi_setup_trig(struct r600_shader_ctx *ctx, 1055 struct r600_bc_alu_src r600_src[3]) 1056{ 1057 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1058 int r; 1059 uint32_t lit_vals[4]; 1060 struct r600_bc_alu alu; 1061 1062 memset(lit_vals, 0, 4*4); 1063 r = tgsi_split_constant(ctx, r600_src); 1064 if (r) 1065 return r; 1066 r = tgsi_split_literal_constant(ctx, r600_src); 1067 if (r) 1068 return r; 1069 1070 r = tgsi_split_literal_constant(ctx, r600_src); 1071 if (r) 1072 return r; 1073 1074 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 1075 lit_vals[1] = fui(0.5f); 1076 1077 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1078 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1079 alu.is_op3 = 1; 1080 1081 alu.dst.chan = 0; 1082 alu.dst.sel = ctx->temp_reg; 1083 alu.dst.write = 1; 1084 1085 alu.src[0] = r600_src[0]; 1086 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1087 1088 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1089 alu.src[1].chan = 0; 1090 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1091 alu.src[2].chan = 1; 1092 alu.last = 1; 1093 r = r600_bc_add_alu(ctx->bc, &alu); 1094 if (r) 1095 return r; 1096 r = r600_bc_add_literal(ctx->bc, lit_vals); 1097 if (r) 1098 return r; 1099 1100 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1101 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1102 1103 alu.dst.chan = 0; 1104 alu.dst.sel = ctx->temp_reg; 1105 alu.dst.write = 1; 1106 1107 alu.src[0].sel = ctx->temp_reg; 1108 alu.src[0].chan = 0; 1109 alu.last = 1; 1110 r = r600_bc_add_alu(ctx->bc, &alu); 1111 if (r) 1112 return r; 1113 1114 if (ctx->bc->chiprev == 0) { 1115 lit_vals[0] = fui(3.1415926535897f * 2.0f); 1116 lit_vals[1] = fui(-3.1415926535897f); 1117 } else { 1118 lit_vals[0] = fui(1.0f); 1119 lit_vals[1] = fui(-0.5f); 1120 } 1121 1122 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1123 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1124 alu.is_op3 = 1; 1125 1126 alu.dst.chan = 0; 1127 alu.dst.sel = ctx->temp_reg; 1128 alu.dst.write = 1; 1129 1130 alu.src[0].sel = ctx->temp_reg; 1131 alu.src[0].chan = 0; 1132 1133 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1134 alu.src[1].chan = 0; 1135 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1136 alu.src[2].chan = 1; 1137 alu.last = 1; 1138 r = r600_bc_add_alu(ctx->bc, &alu); 1139 if (r) 1140 return r; 1141 r = r600_bc_add_literal(ctx->bc, lit_vals); 1142 if (r) 1143 return r; 1144 return 0; 1145} 1146 1147static int tgsi_trig(struct r600_shader_ctx *ctx) 1148{ 1149 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1150 struct r600_bc_alu_src r600_src[3]; 1151 struct r600_bc_alu alu; 1152 int i, r; 1153 int lasti = 0; 1154 1155 r = tgsi_setup_trig(ctx, r600_src); 1156 if (r) 1157 return r; 1158 1159 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1160 alu.inst = ctx->inst_info->r600_opcode; 1161 alu.dst.chan = 0; 1162 alu.dst.sel = ctx->temp_reg; 1163 alu.dst.write = 1; 1164 1165 alu.src[0].sel = ctx->temp_reg; 1166 alu.src[0].chan = 0; 1167 alu.last = 1; 1168 r = r600_bc_add_alu(ctx->bc, &alu); 1169 if (r) 1170 return r; 1171 1172 /* replicate result */ 1173 for (i = 0; i < 4; i++) { 1174 if (inst->Dst[0].Register.WriteMask & (1 << i)) 1175 lasti = i; 1176 } 1177 for (i = 0; i < lasti + 1; i++) { 1178 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1179 continue; 1180 1181 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1182 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1183 1184 alu.src[0].sel = ctx->temp_reg; 1185 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1186 if (r) 1187 return r; 1188 if (i == lasti) 1189 alu.last = 1; 1190 r = r600_bc_add_alu(ctx->bc, &alu); 1191 if (r) 1192 return r; 1193 } 1194 return 0; 1195} 1196 1197static int tgsi_scs(struct r600_shader_ctx *ctx) 1198{ 1199 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1200 struct r600_bc_alu_src r600_src[3]; 1201 struct r600_bc_alu alu; 1202 int r; 1203 1204 /* We'll only need the trig stuff if we are going to write to the 1205 * X or Y components of the destination vector. 1206 */ 1207 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1208 r = tgsi_setup_trig(ctx, r600_src); 1209 if (r) 1210 return r; 1211 } 1212 1213 /* dst.x = COS */ 1214 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1215 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1216 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1217 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1218 if (r) 1219 return r; 1220 1221 alu.src[0].sel = ctx->temp_reg; 1222 alu.src[0].chan = 0; 1223 alu.last = 1; 1224 r = r600_bc_add_alu(ctx->bc, &alu); 1225 if (r) 1226 return r; 1227 } 1228 1229 /* dst.y = SIN */ 1230 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1231 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1232 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1233 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1234 if (r) 1235 return r; 1236 1237 alu.src[0].sel = ctx->temp_reg; 1238 alu.src[0].chan = 0; 1239 alu.last = 1; 1240 r = r600_bc_add_alu(ctx->bc, &alu); 1241 if (r) 1242 return r; 1243 } 1244 1245 /* dst.z = 0.0; */ 1246 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1247 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1248 1249 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1250 1251 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1252 if (r) 1253 return r; 1254 1255 alu.src[0].sel = V_SQ_ALU_SRC_0; 1256 alu.src[0].chan = 0; 1257 1258 alu.last = 1; 1259 1260 r = r600_bc_add_alu(ctx->bc, &alu); 1261 if (r) 1262 return r; 1263 1264 r = r600_bc_add_literal(ctx->bc, ctx->value); 1265 if (r) 1266 return r; 1267 } 1268 1269 /* dst.w = 1.0; */ 1270 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1271 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1272 1273 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1274 1275 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1276 if (r) 1277 return r; 1278 1279 alu.src[0].sel = V_SQ_ALU_SRC_1; 1280 alu.src[0].chan = 0; 1281 1282 alu.last = 1; 1283 1284 r = r600_bc_add_alu(ctx->bc, &alu); 1285 if (r) 1286 return r; 1287 1288 r = r600_bc_add_literal(ctx->bc, ctx->value); 1289 if (r) 1290 return r; 1291 } 1292 1293 return 0; 1294} 1295 1296static int tgsi_kill(struct r600_shader_ctx *ctx) 1297{ 1298 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1299 struct r600_bc_alu alu; 1300 int i, r; 1301 1302 for (i = 0; i < 4; i++) { 1303 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1304 alu.inst = ctx->inst_info->r600_opcode; 1305 1306 alu.dst.chan = i; 1307 1308 alu.src[0].sel = V_SQ_ALU_SRC_0; 1309 1310 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1311 alu.src[1].sel = V_SQ_ALU_SRC_1; 1312 alu.src[1].neg = 1; 1313 } else { 1314 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1315 if (r) 1316 return r; 1317 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1318 } 1319 if (i == 3) { 1320 alu.last = 1; 1321 } 1322 r = r600_bc_add_alu(ctx->bc, &alu); 1323 if (r) 1324 return r; 1325 } 1326 r = r600_bc_add_literal(ctx->bc, ctx->value); 1327 if (r) 1328 return r; 1329 1330 /* kill must be last in ALU */ 1331 ctx->bc->force_add_cf = 1; 1332 ctx->shader->uses_kill = TRUE; 1333 return 0; 1334} 1335 1336static int tgsi_lit(struct r600_shader_ctx *ctx) 1337{ 1338 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1339 struct r600_bc_alu alu; 1340 struct r600_bc_alu_src r600_src[3]; 1341 int r; 1342 1343 r = tgsi_split_constant(ctx, r600_src); 1344 if (r) 1345 return r; 1346 r = tgsi_split_literal_constant(ctx, r600_src); 1347 if (r) 1348 return r; 1349 1350 /* dst.x, <- 1.0 */ 1351 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1352 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1353 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1354 alu.src[0].chan = 0; 1355 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1356 if (r) 1357 return r; 1358 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1359 r = r600_bc_add_alu(ctx->bc, &alu); 1360 if (r) 1361 return r; 1362 1363 /* dst.y = max(src.x, 0.0) */ 1364 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1365 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1366 alu.src[0] = r600_src[0]; 1367 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1368 alu.src[1].chan = 0; 1369 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1370 if (r) 1371 return r; 1372 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1373 r = r600_bc_add_alu(ctx->bc, &alu); 1374 if (r) 1375 return r; 1376 1377 /* dst.w, <- 1.0 */ 1378 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1379 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1380 alu.src[0].sel = V_SQ_ALU_SRC_1; 1381 alu.src[0].chan = 0; 1382 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1383 if (r) 1384 return r; 1385 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1386 alu.last = 1; 1387 r = r600_bc_add_alu(ctx->bc, &alu); 1388 if (r) 1389 return r; 1390 1391 r = r600_bc_add_literal(ctx->bc, ctx->value); 1392 if (r) 1393 return r; 1394 1395 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1396 { 1397 int chan; 1398 int sel; 1399 1400 /* dst.z = log(src.y) */ 1401 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1402 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1403 alu.src[0] = r600_src[0]; 1404 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1405 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1406 if (r) 1407 return r; 1408 alu.last = 1; 1409 r = r600_bc_add_alu(ctx->bc, &alu); 1410 if (r) 1411 return r; 1412 1413 r = r600_bc_add_literal(ctx->bc, ctx->value); 1414 if (r) 1415 return r; 1416 1417 chan = alu.dst.chan; 1418 sel = alu.dst.sel; 1419 1420 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1421 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1422 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1423 alu.src[0] = r600_src[0]; 1424 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1425 alu.src[1].sel = sel; 1426 alu.src[1].chan = chan; 1427 1428 alu.src[2] = r600_src[0]; 1429 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 1430 alu.dst.sel = ctx->temp_reg; 1431 alu.dst.chan = 0; 1432 alu.dst.write = 1; 1433 alu.is_op3 = 1; 1434 alu.last = 1; 1435 r = r600_bc_add_alu(ctx->bc, &alu); 1436 if (r) 1437 return r; 1438 1439 r = r600_bc_add_literal(ctx->bc, ctx->value); 1440 if (r) 1441 return r; 1442 /* dst.z = exp(tmp.x) */ 1443 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1444 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1445 alu.src[0].sel = ctx->temp_reg; 1446 alu.src[0].chan = 0; 1447 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1448 if (r) 1449 return r; 1450 alu.last = 1; 1451 r = r600_bc_add_alu(ctx->bc, &alu); 1452 if (r) 1453 return r; 1454 } 1455 return 0; 1456} 1457 1458static int tgsi_rsq(struct r600_shader_ctx *ctx) 1459{ 1460 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1461 struct r600_bc_alu alu; 1462 int i, r; 1463 1464 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1465 1466 /* FIXME: 1467 * For state trackers other than OpenGL, we'll want to use 1468 * _RECIPSQRT_IEEE instead. 1469 */ 1470 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1471 1472 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1473 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1474 if (r) 1475 return r; 1476 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1477 alu.src[i].abs = 1; 1478 } 1479 alu.dst.sel = ctx->temp_reg; 1480 alu.dst.write = 1; 1481 alu.last = 1; 1482 r = r600_bc_add_alu(ctx->bc, &alu); 1483 if (r) 1484 return r; 1485 r = r600_bc_add_literal(ctx->bc, ctx->value); 1486 if (r) 1487 return r; 1488 /* replicate result */ 1489 return tgsi_helper_tempx_replicate(ctx); 1490} 1491 1492static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1493{ 1494 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1495 struct r600_bc_alu alu; 1496 int i, r; 1497 1498 for (i = 0; i < 4; i++) { 1499 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1500 alu.src[0].sel = ctx->temp_reg; 1501 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1502 alu.dst.chan = i; 1503 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1504 if (r) 1505 return r; 1506 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1507 if (i == 3) 1508 alu.last = 1; 1509 r = r600_bc_add_alu(ctx->bc, &alu); 1510 if (r) 1511 return r; 1512 } 1513 return 0; 1514} 1515 1516static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1517{ 1518 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1519 struct r600_bc_alu alu; 1520 int i, r; 1521 1522 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1523 alu.inst = ctx->inst_info->r600_opcode; 1524 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1525 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1526 if (r) 1527 return r; 1528 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1529 } 1530 alu.dst.sel = ctx->temp_reg; 1531 alu.dst.write = 1; 1532 alu.last = 1; 1533 r = r600_bc_add_alu(ctx->bc, &alu); 1534 if (r) 1535 return r; 1536 r = r600_bc_add_literal(ctx->bc, ctx->value); 1537 if (r) 1538 return r; 1539 /* replicate result */ 1540 return tgsi_helper_tempx_replicate(ctx); 1541} 1542 1543static int tgsi_pow(struct r600_shader_ctx *ctx) 1544{ 1545 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1546 struct r600_bc_alu alu; 1547 int r; 1548 1549 /* LOG2(a) */ 1550 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1551 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1552 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1553 if (r) 1554 return r; 1555 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1556 alu.dst.sel = ctx->temp_reg; 1557 alu.dst.write = 1; 1558 alu.last = 1; 1559 r = r600_bc_add_alu(ctx->bc, &alu); 1560 if (r) 1561 return r; 1562 r = r600_bc_add_literal(ctx->bc,ctx->value); 1563 if (r) 1564 return r; 1565 /* b * LOG2(a) */ 1566 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1567 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); 1568 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1569 if (r) 1570 return r; 1571 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1572 alu.src[1].sel = ctx->temp_reg; 1573 alu.dst.sel = ctx->temp_reg; 1574 alu.dst.write = 1; 1575 alu.last = 1; 1576 r = r600_bc_add_alu(ctx->bc, &alu); 1577 if (r) 1578 return r; 1579 r = r600_bc_add_literal(ctx->bc,ctx->value); 1580 if (r) 1581 return r; 1582 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1583 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1584 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1585 alu.src[0].sel = ctx->temp_reg; 1586 alu.dst.sel = ctx->temp_reg; 1587 alu.dst.write = 1; 1588 alu.last = 1; 1589 r = r600_bc_add_alu(ctx->bc, &alu); 1590 if (r) 1591 return r; 1592 r = r600_bc_add_literal(ctx->bc,ctx->value); 1593 if (r) 1594 return r; 1595 return tgsi_helper_tempx_replicate(ctx); 1596} 1597 1598static int tgsi_ssg(struct r600_shader_ctx *ctx) 1599{ 1600 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1601 struct r600_bc_alu alu; 1602 struct r600_bc_alu_src r600_src[3]; 1603 int i, r; 1604 1605 r = tgsi_split_constant(ctx, r600_src); 1606 if (r) 1607 return r; 1608 r = tgsi_split_literal_constant(ctx, r600_src); 1609 if (r) 1610 return r; 1611 1612 /* tmp = (src > 0 ? 1 : src) */ 1613 for (i = 0; i < 4; i++) { 1614 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1615 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1616 alu.is_op3 = 1; 1617 1618 alu.dst.sel = ctx->temp_reg; 1619 alu.dst.chan = i; 1620 1621 alu.src[0] = r600_src[0]; 1622 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1623 1624 alu.src[1].sel = V_SQ_ALU_SRC_1; 1625 1626 alu.src[2] = r600_src[0]; 1627 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1628 if (i == 3) 1629 alu.last = 1; 1630 r = r600_bc_add_alu(ctx->bc, &alu); 1631 if (r) 1632 return r; 1633 } 1634 r = r600_bc_add_literal(ctx->bc, ctx->value); 1635 if (r) 1636 return r; 1637 1638 /* dst = (-tmp > 0 ? -1 : tmp) */ 1639 for (i = 0; i < 4; i++) { 1640 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1641 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1642 alu.is_op3 = 1; 1643 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1644 if (r) 1645 return r; 1646 1647 alu.src[0].sel = ctx->temp_reg; 1648 alu.src[0].chan = i; 1649 alu.src[0].neg = 1; 1650 1651 alu.src[1].sel = V_SQ_ALU_SRC_1; 1652 alu.src[1].neg = 1; 1653 1654 alu.src[2].sel = ctx->temp_reg; 1655 alu.src[2].chan = i; 1656 1657 if (i == 3) 1658 alu.last = 1; 1659 r = r600_bc_add_alu(ctx->bc, &alu); 1660 if (r) 1661 return r; 1662 } 1663 return 0; 1664} 1665 1666static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1667{ 1668 struct r600_bc_alu alu; 1669 int i, r; 1670 1671 r = r600_bc_add_literal(ctx->bc, ctx->value); 1672 if (r) 1673 return r; 1674 for (i = 0; i < 4; i++) { 1675 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1676 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1677 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1678 alu.dst.chan = i; 1679 } else { 1680 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1681 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1682 if (r) 1683 return r; 1684 alu.src[0].sel = ctx->temp_reg; 1685 alu.src[0].chan = i; 1686 } 1687 if (i == 3) { 1688 alu.last = 1; 1689 } 1690 r = r600_bc_add_alu(ctx->bc, &alu); 1691 if (r) 1692 return r; 1693 } 1694 return 0; 1695} 1696 1697static int tgsi_op3(struct r600_shader_ctx *ctx) 1698{ 1699 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1700 struct r600_bc_alu_src r600_src[3]; 1701 struct r600_bc_alu alu; 1702 int i, j, r; 1703 1704 r = tgsi_split_constant(ctx, r600_src); 1705 if (r) 1706 return r; 1707 r = tgsi_split_literal_constant(ctx, r600_src); 1708 if (r) 1709 return r; 1710 /* do it in 2 step as op3 doesn't support writemask */ 1711 for (i = 0; i < 4; i++) { 1712 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1713 alu.inst = ctx->inst_info->r600_opcode; 1714 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1715 alu.src[j] = r600_src[j]; 1716 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1717 } 1718 alu.dst.sel = ctx->temp_reg; 1719 alu.dst.chan = i; 1720 alu.dst.write = 1; 1721 alu.is_op3 = 1; 1722 if (i == 3) { 1723 alu.last = 1; 1724 } 1725 r = r600_bc_add_alu(ctx->bc, &alu); 1726 if (r) 1727 return r; 1728 } 1729 return tgsi_helper_copy(ctx, inst); 1730} 1731 1732static int tgsi_dp(struct r600_shader_ctx *ctx) 1733{ 1734 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1735 struct r600_bc_alu_src r600_src[3]; 1736 struct r600_bc_alu alu; 1737 int i, j, r; 1738 1739 r = tgsi_split_constant(ctx, r600_src); 1740 if (r) 1741 return r; 1742 r = tgsi_split_literal_constant(ctx, r600_src); 1743 if (r) 1744 return r; 1745 for (i = 0; i < 4; i++) { 1746 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1747 alu.inst = ctx->inst_info->r600_opcode; 1748 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1749 alu.src[j] = r600_src[j]; 1750 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1751 } 1752 alu.dst.sel = ctx->temp_reg; 1753 alu.dst.chan = i; 1754 alu.dst.write = 1; 1755 /* handle some special cases */ 1756 switch (ctx->inst_info->tgsi_opcode) { 1757 case TGSI_OPCODE_DP2: 1758 if (i > 1) { 1759 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1760 alu.src[0].chan = alu.src[1].chan = 0; 1761 } 1762 break; 1763 case TGSI_OPCODE_DP3: 1764 if (i > 2) { 1765 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1766 alu.src[0].chan = alu.src[1].chan = 0; 1767 } 1768 break; 1769 case TGSI_OPCODE_DPH: 1770 if (i == 3) { 1771 alu.src[0].sel = V_SQ_ALU_SRC_1; 1772 alu.src[0].chan = 0; 1773 alu.src[0].neg = 0; 1774 } 1775 break; 1776 default: 1777 break; 1778 } 1779 if (i == 3) { 1780 alu.last = 1; 1781 } 1782 r = r600_bc_add_alu(ctx->bc, &alu); 1783 if (r) 1784 return r; 1785 } 1786 return tgsi_helper_copy(ctx, inst); 1787} 1788 1789static int tgsi_tex(struct r600_shader_ctx *ctx) 1790{ 1791 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1792 struct r600_bc_tex tex; 1793 struct r600_bc_alu alu; 1794 unsigned src_gpr; 1795 int r, i; 1796 int opcode; 1797 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; 1798 uint32_t lit_vals[4]; 1799 1800 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1801 1802 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1803 /* Add perspective divide */ 1804 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1805 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1806 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1807 if (r) 1808 return r; 1809 1810 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1811 alu.dst.sel = ctx->temp_reg; 1812 alu.dst.chan = 3; 1813 alu.last = 1; 1814 alu.dst.write = 1; 1815 r = r600_bc_add_alu(ctx->bc, &alu); 1816 if (r) 1817 return r; 1818 1819 for (i = 0; i < 3; i++) { 1820 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1821 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1822 alu.src[0].sel = ctx->temp_reg; 1823 alu.src[0].chan = 3; 1824 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1825 if (r) 1826 return r; 1827 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1828 alu.dst.sel = ctx->temp_reg; 1829 alu.dst.chan = i; 1830 alu.dst.write = 1; 1831 r = r600_bc_add_alu(ctx->bc, &alu); 1832 if (r) 1833 return r; 1834 } 1835 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1836 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1837 alu.src[0].sel = V_SQ_ALU_SRC_1; 1838 alu.src[0].chan = 0; 1839 alu.dst.sel = ctx->temp_reg; 1840 alu.dst.chan = 3; 1841 alu.last = 1; 1842 alu.dst.write = 1; 1843 r = r600_bc_add_alu(ctx->bc, &alu); 1844 if (r) 1845 return r; 1846 src_not_temp = FALSE; 1847 src_gpr = ctx->temp_reg; 1848 } 1849 1850 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1851 int src_chan, src2_chan; 1852 1853 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1854 for (i = 0; i < 4; i++) { 1855 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1856 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1857 switch (i) { 1858 case 0: 1859 src_chan = 2; 1860 src2_chan = 1; 1861 break; 1862 case 1: 1863 src_chan = 2; 1864 src2_chan = 0; 1865 break; 1866 case 2: 1867 src_chan = 0; 1868 src2_chan = 2; 1869 break; 1870 case 3: 1871 src_chan = 1; 1872 src2_chan = 2; 1873 break; 1874 default: 1875 assert(0); 1876 src_chan = 0; 1877 src2_chan = 0; 1878 break; 1879 } 1880 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1881 if (r) 1882 return r; 1883 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); 1884 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1885 if (r) 1886 return r; 1887 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); 1888 alu.dst.sel = ctx->temp_reg; 1889 alu.dst.chan = i; 1890 if (i == 3) 1891 alu.last = 1; 1892 alu.dst.write = 1; 1893 r = r600_bc_add_alu(ctx->bc, &alu); 1894 if (r) 1895 return r; 1896 } 1897 1898 /* tmp1.z = RCP_e(|tmp1.z|) */ 1899 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1900 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1901 alu.src[0].sel = ctx->temp_reg; 1902 alu.src[0].chan = 2; 1903 alu.src[0].abs = 1; 1904 alu.dst.sel = ctx->temp_reg; 1905 alu.dst.chan = 2; 1906 alu.dst.write = 1; 1907 alu.last = 1; 1908 r = r600_bc_add_alu(ctx->bc, &alu); 1909 if (r) 1910 return r; 1911 1912 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1913 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1914 * muladd has no writemask, have to use another temp 1915 */ 1916 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1917 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1918 alu.is_op3 = 1; 1919 1920 alu.src[0].sel = ctx->temp_reg; 1921 alu.src[0].chan = 0; 1922 alu.src[1].sel = ctx->temp_reg; 1923 alu.src[1].chan = 2; 1924 1925 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1926 alu.src[2].chan = 0; 1927 1928 alu.dst.sel = ctx->temp_reg; 1929 alu.dst.chan = 0; 1930 alu.dst.write = 1; 1931 1932 r = r600_bc_add_alu(ctx->bc, &alu); 1933 if (r) 1934 return r; 1935 1936 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1937 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1938 alu.is_op3 = 1; 1939 1940 alu.src[0].sel = ctx->temp_reg; 1941 alu.src[0].chan = 1; 1942 alu.src[1].sel = ctx->temp_reg; 1943 alu.src[1].chan = 2; 1944 1945 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1946 alu.src[2].chan = 0; 1947 1948 alu.dst.sel = ctx->temp_reg; 1949 alu.dst.chan = 1; 1950 alu.dst.write = 1; 1951 1952 alu.last = 1; 1953 r = r600_bc_add_alu(ctx->bc, &alu); 1954 if (r) 1955 return r; 1956 1957 lit_vals[0] = fui(1.5f); 1958 1959 r = r600_bc_add_literal(ctx->bc, lit_vals); 1960 if (r) 1961 return r; 1962 src_not_temp = FALSE; 1963 src_gpr = ctx->temp_reg; 1964 } 1965 1966 if (src_not_temp) { 1967 for (i = 0; i < 4; i++) { 1968 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1969 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1970 alu.src[0].sel = src_gpr; 1971 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1972 alu.dst.sel = ctx->temp_reg; 1973 alu.dst.chan = i; 1974 if (i == 3) 1975 alu.last = 1; 1976 alu.dst.write = 1; 1977 r = r600_bc_add_alu(ctx->bc, &alu); 1978 if (r) 1979 return r; 1980 } 1981 src_gpr = ctx->temp_reg; 1982 } 1983 1984 opcode = ctx->inst_info->r600_opcode; 1985 if (opcode == SQ_TEX_INST_SAMPLE && 1986 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1987 opcode = SQ_TEX_INST_SAMPLE_C; 1988 1989 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1990 tex.inst = opcode; 1991 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1992 tex.resource_id = tex.sampler_id; 1993 if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX) 1994 tex.resource_id += PIPE_MAX_ATTRIBS; 1995 tex.src_gpr = src_gpr; 1996 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1997 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 1998 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 1999 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 2000 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 2001 tex.src_sel_x = 0; 2002 tex.src_sel_y = 1; 2003 tex.src_sel_z = 2; 2004 tex.src_sel_w = 3; 2005 2006 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2007 tex.src_sel_x = 1; 2008 tex.src_sel_y = 0; 2009 tex.src_sel_z = 3; 2010 tex.src_sel_w = 1; 2011 } 2012 2013 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 2014 tex.coord_type_x = 1; 2015 tex.coord_type_y = 1; 2016 tex.coord_type_z = 1; 2017 tex.coord_type_w = 1; 2018 } 2019 2020 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 2021 tex.src_sel_w = 2; 2022 2023 r = r600_bc_add_tex(ctx->bc, &tex); 2024 if (r) 2025 return r; 2026 2027 /* add shadow ambient support - gallium doesn't do it yet */ 2028 return 0; 2029 2030} 2031 2032static int tgsi_lrp(struct r600_shader_ctx *ctx) 2033{ 2034 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2035 struct r600_bc_alu_src r600_src[3]; 2036 struct r600_bc_alu alu; 2037 unsigned i; 2038 int r; 2039 2040 r = tgsi_split_constant(ctx, r600_src); 2041 if (r) 2042 return r; 2043 r = tgsi_split_literal_constant(ctx, r600_src); 2044 if (r) 2045 return r; 2046 /* 1 - src0 */ 2047 for (i = 0; i < 4; i++) { 2048 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2049 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2050 alu.src[0].sel = V_SQ_ALU_SRC_1; 2051 alu.src[0].chan = 0; 2052 alu.src[1] = r600_src[0]; 2053 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 2054 alu.src[1].neg = 1; 2055 alu.dst.sel = ctx->temp_reg; 2056 alu.dst.chan = i; 2057 if (i == 3) { 2058 alu.last = 1; 2059 } 2060 alu.dst.write = 1; 2061 r = r600_bc_add_alu(ctx->bc, &alu); 2062 if (r) 2063 return r; 2064 } 2065 r = r600_bc_add_literal(ctx->bc, ctx->value); 2066 if (r) 2067 return r; 2068 2069 /* (1 - src0) * src2 */ 2070 for (i = 0; i < 4; i++) { 2071 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2072 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2073 alu.src[0].sel = ctx->temp_reg; 2074 alu.src[0].chan = i; 2075 alu.src[1] = r600_src[2]; 2076 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2077 alu.dst.sel = ctx->temp_reg; 2078 alu.dst.chan = i; 2079 if (i == 3) { 2080 alu.last = 1; 2081 } 2082 alu.dst.write = 1; 2083 r = r600_bc_add_alu(ctx->bc, &alu); 2084 if (r) 2085 return r; 2086 } 2087 r = r600_bc_add_literal(ctx->bc, ctx->value); 2088 if (r) 2089 return r; 2090 2091 /* src0 * src1 + (1 - src0) * src2 */ 2092 for (i = 0; i < 4; i++) { 2093 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2094 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2095 alu.is_op3 = 1; 2096 alu.src[0] = r600_src[0]; 2097 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2098 alu.src[1] = r600_src[1]; 2099 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2100 alu.src[2].sel = ctx->temp_reg; 2101 alu.src[2].chan = i; 2102 alu.dst.sel = ctx->temp_reg; 2103 alu.dst.chan = i; 2104 if (i == 3) { 2105 alu.last = 1; 2106 } 2107 r = r600_bc_add_alu(ctx->bc, &alu); 2108 if (r) 2109 return r; 2110 } 2111 return tgsi_helper_copy(ctx, inst); 2112} 2113 2114static int tgsi_cmp(struct r600_shader_ctx *ctx) 2115{ 2116 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2117 struct r600_bc_alu_src r600_src[3]; 2118 struct r600_bc_alu alu; 2119 int use_temp = 0; 2120 int i, r; 2121 2122 r = tgsi_split_constant(ctx, r600_src); 2123 if (r) 2124 return r; 2125 r = tgsi_split_literal_constant(ctx, r600_src); 2126 if (r) 2127 return r; 2128 2129 if (inst->Dst[0].Register.WriteMask != 0xf) 2130 use_temp = 1; 2131 2132 for (i = 0; i < 4; i++) { 2133 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2134 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2135 alu.src[0] = r600_src[0]; 2136 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2137 2138 alu.src[1] = r600_src[2]; 2139 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2140 2141 alu.src[2] = r600_src[1]; 2142 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 2143 2144 if (use_temp) 2145 alu.dst.sel = ctx->temp_reg; 2146 else { 2147 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2148 if (r) 2149 return r; 2150 } 2151 alu.dst.chan = i; 2152 alu.dst.write = 1; 2153 alu.is_op3 = 1; 2154 if (i == 3) 2155 alu.last = 1; 2156 r = r600_bc_add_alu(ctx->bc, &alu); 2157 if (r) 2158 return r; 2159 } 2160 if (use_temp) 2161 return tgsi_helper_copy(ctx, inst); 2162 return 0; 2163} 2164 2165static int tgsi_xpd(struct r600_shader_ctx *ctx) 2166{ 2167 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2168 struct r600_bc_alu_src r600_src[3]; 2169 struct r600_bc_alu alu; 2170 uint32_t use_temp = 0; 2171 int i, r; 2172 2173 if (inst->Dst[0].Register.WriteMask != 0xf) 2174 use_temp = 1; 2175 2176 r = tgsi_split_constant(ctx, r600_src); 2177 if (r) 2178 return r; 2179 r = tgsi_split_literal_constant(ctx, r600_src); 2180 if (r) 2181 return r; 2182 2183 for (i = 0; i < 4; i++) { 2184 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2185 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2186 2187 alu.src[0] = r600_src[0]; 2188 switch (i) { 2189 case 0: 2190 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2191 break; 2192 case 1: 2193 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2194 break; 2195 case 2: 2196 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2197 break; 2198 case 3: 2199 alu.src[0].sel = V_SQ_ALU_SRC_0; 2200 alu.src[0].chan = i; 2201 } 2202 2203 alu.src[1] = r600_src[1]; 2204 switch (i) { 2205 case 0: 2206 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2207 break; 2208 case 1: 2209 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2210 break; 2211 case 2: 2212 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2213 break; 2214 case 3: 2215 alu.src[1].sel = V_SQ_ALU_SRC_0; 2216 alu.src[1].chan = i; 2217 } 2218 2219 alu.dst.sel = ctx->temp_reg; 2220 alu.dst.chan = i; 2221 alu.dst.write = 1; 2222 2223 if (i == 3) 2224 alu.last = 1; 2225 r = r600_bc_add_alu(ctx->bc, &alu); 2226 if (r) 2227 return r; 2228 2229 r = r600_bc_add_literal(ctx->bc, ctx->value); 2230 if (r) 2231 return r; 2232 } 2233 2234 for (i = 0; i < 4; i++) { 2235 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2236 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2237 2238 alu.src[0] = r600_src[0]; 2239 switch (i) { 2240 case 0: 2241 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2242 break; 2243 case 1: 2244 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2245 break; 2246 case 2: 2247 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2248 break; 2249 case 3: 2250 alu.src[0].sel = V_SQ_ALU_SRC_0; 2251 alu.src[0].chan = i; 2252 } 2253 2254 alu.src[1] = r600_src[1]; 2255 switch (i) { 2256 case 0: 2257 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2258 break; 2259 case 1: 2260 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2261 break; 2262 case 2: 2263 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2264 break; 2265 case 3: 2266 alu.src[1].sel = V_SQ_ALU_SRC_0; 2267 alu.src[1].chan = i; 2268 } 2269 2270 alu.src[2].sel = ctx->temp_reg; 2271 alu.src[2].neg = 1; 2272 alu.src[2].chan = i; 2273 2274 if (use_temp) 2275 alu.dst.sel = ctx->temp_reg; 2276 else { 2277 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2278 if (r) 2279 return r; 2280 } 2281 alu.dst.chan = i; 2282 alu.dst.write = 1; 2283 alu.is_op3 = 1; 2284 if (i == 3) 2285 alu.last = 1; 2286 r = r600_bc_add_alu(ctx->bc, &alu); 2287 if (r) 2288 return r; 2289 2290 r = r600_bc_add_literal(ctx->bc, ctx->value); 2291 if (r) 2292 return r; 2293 } 2294 if (use_temp) 2295 return tgsi_helper_copy(ctx, inst); 2296 return 0; 2297} 2298 2299static int tgsi_exp(struct r600_shader_ctx *ctx) 2300{ 2301 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2302 struct r600_bc_alu_src r600_src[3] = { { 0 } }; 2303 struct r600_bc_alu alu; 2304 int r; 2305 2306 /* result.x = 2^floor(src); */ 2307 if (inst->Dst[0].Register.WriteMask & 1) { 2308 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2309 2310 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2311 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2312 if (r) 2313 return r; 2314 2315 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2316 2317 alu.dst.sel = ctx->temp_reg; 2318 alu.dst.chan = 0; 2319 alu.dst.write = 1; 2320 alu.last = 1; 2321 r = r600_bc_add_alu(ctx->bc, &alu); 2322 if (r) 2323 return r; 2324 2325 r = r600_bc_add_literal(ctx->bc, ctx->value); 2326 if (r) 2327 return r; 2328 2329 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2330 alu.src[0].sel = ctx->temp_reg; 2331 alu.src[0].chan = 0; 2332 2333 alu.dst.sel = ctx->temp_reg; 2334 alu.dst.chan = 0; 2335 alu.dst.write = 1; 2336 alu.last = 1; 2337 r = r600_bc_add_alu(ctx->bc, &alu); 2338 if (r) 2339 return r; 2340 2341 r = r600_bc_add_literal(ctx->bc, ctx->value); 2342 if (r) 2343 return r; 2344 } 2345 2346 /* result.y = tmp - floor(tmp); */ 2347 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2348 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2349 2350 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2351 alu.src[0] = r600_src[0]; 2352 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2353 if (r) 2354 return r; 2355 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2356 2357 alu.dst.sel = ctx->temp_reg; 2358// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2359// if (r) 2360// return r; 2361 alu.dst.write = 1; 2362 alu.dst.chan = 1; 2363 2364 alu.last = 1; 2365 2366 r = r600_bc_add_alu(ctx->bc, &alu); 2367 if (r) 2368 return r; 2369 r = r600_bc_add_literal(ctx->bc, ctx->value); 2370 if (r) 2371 return r; 2372 } 2373 2374 /* result.z = RoughApprox2ToX(tmp);*/ 2375 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2376 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2377 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2378 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2379 if (r) 2380 return r; 2381 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2382 2383 alu.dst.sel = ctx->temp_reg; 2384 alu.dst.write = 1; 2385 alu.dst.chan = 2; 2386 2387 alu.last = 1; 2388 2389 r = r600_bc_add_alu(ctx->bc, &alu); 2390 if (r) 2391 return r; 2392 r = r600_bc_add_literal(ctx->bc, ctx->value); 2393 if (r) 2394 return r; 2395 } 2396 2397 /* result.w = 1.0;*/ 2398 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2399 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2400 2401 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2402 alu.src[0].sel = V_SQ_ALU_SRC_1; 2403 alu.src[0].chan = 0; 2404 2405 alu.dst.sel = ctx->temp_reg; 2406 alu.dst.chan = 3; 2407 alu.dst.write = 1; 2408 alu.last = 1; 2409 r = r600_bc_add_alu(ctx->bc, &alu); 2410 if (r) 2411 return r; 2412 r = r600_bc_add_literal(ctx->bc, ctx->value); 2413 if (r) 2414 return r; 2415 } 2416 return tgsi_helper_copy(ctx, inst); 2417} 2418 2419static int tgsi_log(struct r600_shader_ctx *ctx) 2420{ 2421 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2422 struct r600_bc_alu alu; 2423 int r; 2424 2425 /* result.x = floor(log2(src)); */ 2426 if (inst->Dst[0].Register.WriteMask & 1) { 2427 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2428 2429 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2430 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2431 if (r) 2432 return r; 2433 2434 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2435 2436 alu.dst.sel = ctx->temp_reg; 2437 alu.dst.chan = 0; 2438 alu.dst.write = 1; 2439 alu.last = 1; 2440 r = r600_bc_add_alu(ctx->bc, &alu); 2441 if (r) 2442 return r; 2443 2444 r = r600_bc_add_literal(ctx->bc, ctx->value); 2445 if (r) 2446 return r; 2447 2448 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2449 alu.src[0].sel = ctx->temp_reg; 2450 alu.src[0].chan = 0; 2451 2452 alu.dst.sel = ctx->temp_reg; 2453 alu.dst.chan = 0; 2454 alu.dst.write = 1; 2455 alu.last = 1; 2456 2457 r = r600_bc_add_alu(ctx->bc, &alu); 2458 if (r) 2459 return r; 2460 2461 r = r600_bc_add_literal(ctx->bc, ctx->value); 2462 if (r) 2463 return r; 2464 } 2465 2466 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2467 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2468 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2469 2470 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2471 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2472 if (r) 2473 return r; 2474 2475 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2476 2477 alu.dst.sel = ctx->temp_reg; 2478 alu.dst.chan = 1; 2479 alu.dst.write = 1; 2480 alu.last = 1; 2481 2482 r = r600_bc_add_alu(ctx->bc, &alu); 2483 if (r) 2484 return r; 2485 2486 r = r600_bc_add_literal(ctx->bc, ctx->value); 2487 if (r) 2488 return r; 2489 2490 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2491 2492 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2493 alu.src[0].sel = ctx->temp_reg; 2494 alu.src[0].chan = 1; 2495 2496 alu.dst.sel = ctx->temp_reg; 2497 alu.dst.chan = 1; 2498 alu.dst.write = 1; 2499 alu.last = 1; 2500 2501 r = r600_bc_add_alu(ctx->bc, &alu); 2502 if (r) 2503 return r; 2504 2505 r = r600_bc_add_literal(ctx->bc, ctx->value); 2506 if (r) 2507 return r; 2508 2509 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2510 2511 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2512 alu.src[0].sel = ctx->temp_reg; 2513 alu.src[0].chan = 1; 2514 2515 alu.dst.sel = ctx->temp_reg; 2516 alu.dst.chan = 1; 2517 alu.dst.write = 1; 2518 alu.last = 1; 2519 2520 r = r600_bc_add_alu(ctx->bc, &alu); 2521 if (r) 2522 return r; 2523 2524 r = r600_bc_add_literal(ctx->bc, ctx->value); 2525 if (r) 2526 return r; 2527 2528 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2529 2530 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2531 alu.src[0].sel = ctx->temp_reg; 2532 alu.src[0].chan = 1; 2533 2534 alu.dst.sel = ctx->temp_reg; 2535 alu.dst.chan = 1; 2536 alu.dst.write = 1; 2537 alu.last = 1; 2538 2539 r = r600_bc_add_alu(ctx->bc, &alu); 2540 if (r) 2541 return r; 2542 2543 r = r600_bc_add_literal(ctx->bc, ctx->value); 2544 if (r) 2545 return r; 2546 2547 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2548 2549 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2550 2551 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2552 if (r) 2553 return r; 2554 2555 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2556 2557 alu.src[1].sel = ctx->temp_reg; 2558 alu.src[1].chan = 1; 2559 2560 alu.dst.sel = ctx->temp_reg; 2561 alu.dst.chan = 1; 2562 alu.dst.write = 1; 2563 alu.last = 1; 2564 2565 r = r600_bc_add_alu(ctx->bc, &alu); 2566 if (r) 2567 return r; 2568 2569 r = r600_bc_add_literal(ctx->bc, ctx->value); 2570 if (r) 2571 return r; 2572 } 2573 2574 /* result.z = log2(src);*/ 2575 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2576 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2577 2578 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2579 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2580 if (r) 2581 return r; 2582 2583 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2584 2585 alu.dst.sel = ctx->temp_reg; 2586 alu.dst.write = 1; 2587 alu.dst.chan = 2; 2588 alu.last = 1; 2589 2590 r = r600_bc_add_alu(ctx->bc, &alu); 2591 if (r) 2592 return r; 2593 2594 r = r600_bc_add_literal(ctx->bc, ctx->value); 2595 if (r) 2596 return r; 2597 } 2598 2599 /* result.w = 1.0; */ 2600 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2601 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2602 2603 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2604 alu.src[0].sel = V_SQ_ALU_SRC_1; 2605 alu.src[0].chan = 0; 2606 2607 alu.dst.sel = ctx->temp_reg; 2608 alu.dst.chan = 3; 2609 alu.dst.write = 1; 2610 alu.last = 1; 2611 2612 r = r600_bc_add_alu(ctx->bc, &alu); 2613 if (r) 2614 return r; 2615 2616 r = r600_bc_add_literal(ctx->bc, ctx->value); 2617 if (r) 2618 return r; 2619 } 2620 2621 return tgsi_helper_copy(ctx, inst); 2622} 2623 2624/* r6/7 only for now */ 2625static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2626{ 2627 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2628 struct r600_bc_alu alu; 2629 int r; 2630 2631 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2632 2633 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2634 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2635 if (r) 2636 return r; 2637 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2638 alu.last = 1; 2639 alu.dst.chan = 0; 2640 alu.dst.sel = ctx->temp_reg; 2641 alu.dst.write = 1; 2642 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2643 if (r) 2644 return r; 2645 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2646 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2647 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2648 if (r) 2649 return r; 2650 alu.src[0].sel = ctx->temp_reg; 2651 alu.src[0].chan = 0; 2652 alu.last = 1; 2653 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2654 if (r) 2655 return r; 2656 return 0; 2657} 2658static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2659{ 2660 /* TODO from r600c, ar values don't persist between clauses */ 2661 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2662 struct r600_bc_alu alu; 2663 int r; 2664 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2665 2666 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; 2667 2668 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2669 if (r) 2670 return r; 2671 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2672 2673 alu.last = 1; 2674 2675 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2676 if (r) 2677 return r; 2678 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2679 return 0; 2680} 2681 2682static int tgsi_opdst(struct r600_shader_ctx *ctx) 2683{ 2684 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2685 struct r600_bc_alu alu; 2686 int i, r = 0; 2687 2688 for (i = 0; i < 4; i++) { 2689 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2690 2691 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2692 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2693 if (r) 2694 return r; 2695 2696 if (i == 0 || i == 3) { 2697 alu.src[0].sel = V_SQ_ALU_SRC_1; 2698 } else { 2699 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2700 if (r) 2701 return r; 2702 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2703 } 2704 2705 if (i == 0 || i == 2) { 2706 alu.src[1].sel = V_SQ_ALU_SRC_1; 2707 } else { 2708 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); 2709 if (r) 2710 return r; 2711 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2712 } 2713 if (i == 3) 2714 alu.last = 1; 2715 r = r600_bc_add_alu(ctx->bc, &alu); 2716 if (r) 2717 return r; 2718 } 2719 return 0; 2720} 2721 2722static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2723{ 2724 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2725 struct r600_bc_alu alu; 2726 int r; 2727 2728 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2729 alu.inst = opcode; 2730 alu.predicate = 1; 2731 2732 alu.dst.sel = ctx->temp_reg; 2733 alu.dst.write = 1; 2734 alu.dst.chan = 0; 2735 2736 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2737 if (r) 2738 return r; 2739 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2740 alu.src[1].sel = V_SQ_ALU_SRC_0; 2741 alu.src[1].chan = 0; 2742 2743 alu.last = 1; 2744 2745 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2746 if (r) 2747 return r; 2748 return 0; 2749} 2750 2751static int pops(struct r600_shader_ctx *ctx, int pops) 2752{ 2753 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2754 ctx->bc->cf_last->pop_count = pops; 2755 return 0; 2756} 2757 2758static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2759{ 2760 switch(reason) { 2761 case FC_PUSH_VPM: 2762 ctx->bc->callstack[ctx->bc->call_sp].current--; 2763 break; 2764 case FC_PUSH_WQM: 2765 case FC_LOOP: 2766 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2767 break; 2768 case FC_REP: 2769 /* TOODO : for 16 vp asic should -= 2; */ 2770 ctx->bc->callstack[ctx->bc->call_sp].current --; 2771 break; 2772 } 2773} 2774 2775static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2776{ 2777 if (check_max_only) { 2778 int diff; 2779 switch (reason) { 2780 case FC_PUSH_VPM: 2781 diff = 1; 2782 break; 2783 case FC_PUSH_WQM: 2784 diff = 4; 2785 break; 2786 default: 2787 assert(0); 2788 diff = 0; 2789 } 2790 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2791 ctx->bc->callstack[ctx->bc->call_sp].max) { 2792 ctx->bc->callstack[ctx->bc->call_sp].max = 2793 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2794 } 2795 return; 2796 } 2797 switch (reason) { 2798 case FC_PUSH_VPM: 2799 ctx->bc->callstack[ctx->bc->call_sp].current++; 2800 break; 2801 case FC_PUSH_WQM: 2802 case FC_LOOP: 2803 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2804 break; 2805 case FC_REP: 2806 ctx->bc->callstack[ctx->bc->call_sp].current++; 2807 break; 2808 } 2809 2810 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2811 ctx->bc->callstack[ctx->bc->call_sp].max) { 2812 ctx->bc->callstack[ctx->bc->call_sp].max = 2813 ctx->bc->callstack[ctx->bc->call_sp].current; 2814 } 2815} 2816 2817static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2818{ 2819 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2820 2821 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2822 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2823 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2824 sp->num_mid++; 2825} 2826 2827static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2828{ 2829 ctx->bc->fc_sp++; 2830 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2831 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2832} 2833 2834static void fc_poplevel(struct r600_shader_ctx *ctx) 2835{ 2836 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2837 if (sp->mid) { 2838 free(sp->mid); 2839 sp->mid = NULL; 2840 } 2841 sp->num_mid = 0; 2842 sp->start = NULL; 2843 sp->type = 0; 2844 ctx->bc->fc_sp--; 2845} 2846 2847#if 0 2848static int emit_return(struct r600_shader_ctx *ctx) 2849{ 2850 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2851 return 0; 2852} 2853 2854static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2855{ 2856 2857 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2858 ctx->bc->cf_last->pop_count = pops; 2859 /* TODO work out offset */ 2860 return 0; 2861} 2862 2863static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2864{ 2865 return 0; 2866} 2867 2868static void emit_testflag(struct r600_shader_ctx *ctx) 2869{ 2870 2871} 2872 2873static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2874{ 2875 emit_testflag(ctx); 2876 emit_jump_to_offset(ctx, 1, 4); 2877 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2878 pops(ctx, ifidx + 1); 2879 emit_return(ctx); 2880} 2881 2882static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2883{ 2884 emit_testflag(ctx); 2885 2886 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2887 ctx->bc->cf_last->pop_count = 1; 2888 2889 fc_set_mid(ctx, fc_sp); 2890 2891 pops(ctx, 1); 2892} 2893#endif 2894 2895static int tgsi_if(struct r600_shader_ctx *ctx) 2896{ 2897 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2898 2899 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2900 2901 fc_pushlevel(ctx, FC_IF); 2902 2903 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2904 return 0; 2905} 2906 2907static int tgsi_else(struct r600_shader_ctx *ctx) 2908{ 2909 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2910 ctx->bc->cf_last->pop_count = 1; 2911 2912 fc_set_mid(ctx, ctx->bc->fc_sp); 2913 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2914 return 0; 2915} 2916 2917static int tgsi_endif(struct r600_shader_ctx *ctx) 2918{ 2919 pops(ctx, 1); 2920 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2921 R600_ERR("if/endif unbalanced in shader\n"); 2922 return -1; 2923 } 2924 2925 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2926 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2927 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2928 } else { 2929 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2930 } 2931 fc_poplevel(ctx); 2932 2933 callstack_decrease_current(ctx, FC_PUSH_VPM); 2934 return 0; 2935} 2936 2937static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2938{ 2939 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2940 2941 fc_pushlevel(ctx, FC_LOOP); 2942 2943 /* check stack depth */ 2944 callstack_check_depth(ctx, FC_LOOP, 0); 2945 return 0; 2946} 2947 2948static int tgsi_endloop(struct r600_shader_ctx *ctx) 2949{ 2950 int i; 2951 2952 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2953 2954 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2955 R600_ERR("loop/endloop in shader code are not paired.\n"); 2956 return -EINVAL; 2957 } 2958 2959 /* fixup loop pointers - from r600isa 2960 LOOP END points to CF after LOOP START, 2961 LOOP START point to CF after LOOP END 2962 BRK/CONT point to LOOP END CF 2963 */ 2964 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2965 2966 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2967 2968 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2969 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2970 } 2971 /* TODO add LOOPRET support */ 2972 fc_poplevel(ctx); 2973 callstack_decrease_current(ctx, FC_LOOP); 2974 return 0; 2975} 2976 2977static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2978{ 2979 unsigned int fscp; 2980 2981 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2982 { 2983 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2984 break; 2985 } 2986 2987 if (fscp == 0) { 2988 R600_ERR("Break not inside loop/endloop pair\n"); 2989 return -EINVAL; 2990 } 2991 2992 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2993 ctx->bc->cf_last->pop_count = 1; 2994 2995 fc_set_mid(ctx, fscp); 2996 2997 pops(ctx, 1); 2998 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2999 return 0; 3000} 3001 3002static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 3003 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3004 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3005 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3006 3007 /* FIXME: 3008 * For state trackers other than OpenGL, we'll want to use 3009 * _RECIP_IEEE instead. 3010 */ 3011 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 3012 3013 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 3014 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3015 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3016 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3017 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3018 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3019 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3020 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3021 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3022 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3023 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3024 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3025 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3026 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3027 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3028 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3029 /* gap */ 3030 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3031 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3032 /* gap */ 3033 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3034 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3035 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3036 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3037 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3038 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3039 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3040 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3041 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3042 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3043 /* gap */ 3044 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3045 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3046 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3047 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3048 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3049 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3050 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3051 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3052 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3053 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3054 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3055 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3056 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3057 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3058 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3059 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3060 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3061 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3062 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3063 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3064 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3065 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3066 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3067 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3068 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3069 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3070 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3071 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3072 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3073 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3074 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3075 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3076 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3077 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3078 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3079 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3080 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3081 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3082 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3083 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3084 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3085 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3086 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3087 /* gap */ 3088 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3089 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3090 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3091 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3092 /* gap */ 3093 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3094 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3095 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3096 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3097 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3098 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3099 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3100 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3101 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3102 /* gap */ 3103 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3104 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3105 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3106 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3107 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3108 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3109 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3110 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3111 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3112 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3113 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3114 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3115 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3116 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3117 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3118 /* gap */ 3119 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3120 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3121 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3122 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3123 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3124 /* gap */ 3125 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3126 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3127 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3128 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3129 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3130 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3131 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3132 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3133 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3134 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3135 /* gap */ 3136 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3137 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3138 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3139 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3140 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3141 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3142 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3143 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3144 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3145 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3146 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3147 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3148 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3149 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3150 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3151 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3152 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3153 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3154 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3155 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3156 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3157 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3158 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3159 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3160 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3161 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3162 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3163 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3164}; 3165 3166static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 3167 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3168 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3169 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3170 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3171 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 3172 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3173 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3174 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3175 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3176 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3177 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3178 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3179 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3180 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3181 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3182 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3183 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3184 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3185 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3186 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3187 /* gap */ 3188 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3189 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3190 /* gap */ 3191 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3192 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3193 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3194 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3195 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3196 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3197 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3198 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3199 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3200 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3201 /* gap */ 3202 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3203 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3204 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3205 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3206 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3207 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3208 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3209 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3210 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3211 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3212 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3213 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3214 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3215 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3216 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3217 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3218 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3219 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3220 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3221 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3222 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3223 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3224 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3225 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3226 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3227 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3228 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3229 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3230 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3231 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3232 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3233 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3234 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3235 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3236 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3237 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3238 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3239 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3240 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3241 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3242 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3243 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3244 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3245 /* gap */ 3246 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3247 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3248 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3249 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3250 /* gap */ 3251 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3252 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3253 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3254 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3255 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3256 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3257 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3258 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3259 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3260 /* gap */ 3261 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3262 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3263 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3264 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3265 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3266 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3267 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3268 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3269 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3270 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3271 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3272 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3273 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3274 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3275 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3276 /* gap */ 3277 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3278 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3279 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3280 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3281 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3282 /* gap */ 3283 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3284 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3285 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3286 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3287 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3288 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3289 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3290 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3291 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3292 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3293 /* gap */ 3294 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3295 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3296 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3297 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3298 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3299 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3300 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3301 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3302 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3303 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3304 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3305 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3306 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3307 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3308 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3309 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3310 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3311 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3312 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3313 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3314 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3315 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3316 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3317 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3318 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3319 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3320 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3321 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3322}; 3323