r600_shader.c revision 833f3a488a7ba0fa59e25f1e518f6b4616270143
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_pipe.h" 29#include "r600_asm.h" 30#include "r600_sq.h" 31#include "r600_opcodes.h" 32#include "r600d.h" 33#include <stdio.h> 34#include <errno.h> 35 36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) 37{ 38 struct r600_pipe_state *rstate = &shader->rstate; 39 struct r600_shader *rshader = &shader->shader; 40 unsigned spi_vs_out_id[10]; 41 unsigned i, tmp; 42 43 /* clear previous register */ 44 rstate->nregs = 0; 45 46 /* so far never got proper semantic id from tgsi */ 47 for (i = 0; i < 10; i++) { 48 spi_vs_out_id[i] = 0; 49 } 50 for (i = 0; i < 32; i++) { 51 tmp = i << ((i & 3) * 8); 52 spi_vs_out_id[i / 4] |= tmp; 53 } 54 for (i = 0; i < 10; i++) { 55 r600_pipe_state_add_reg(rstate, 56 R_028614_SPI_VS_OUT_ID_0 + i * 4, 57 spi_vs_out_id[i], 0xFFFFFFFF, NULL); 58 } 59 60 r600_pipe_state_add_reg(rstate, 61 R_0286C4_SPI_VS_OUT_CONFIG, 62 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), 63 0xFFFFFFFF, NULL); 64 r600_pipe_state_add_reg(rstate, 65 R_028868_SQ_PGM_RESOURCES_VS, 66 S_028868_NUM_GPRS(rshader->bc.ngpr) | 67 S_028868_STACK_SIZE(rshader->bc.nstack), 68 0xFFFFFFFF, NULL); 69 r600_pipe_state_add_reg(rstate, 70 R_0288A4_SQ_PGM_RESOURCES_FS, 71 0x00000000, 0xFFFFFFFF, NULL); 72 r600_pipe_state_add_reg(rstate, 73 R_0288D0_SQ_PGM_CF_OFFSET_VS, 74 0x00000000, 0xFFFFFFFF, NULL); 75 r600_pipe_state_add_reg(rstate, 76 R_0288DC_SQ_PGM_CF_OFFSET_FS, 77 0x00000000, 0xFFFFFFFF, NULL); 78 r600_pipe_state_add_reg(rstate, 79 R_028858_SQ_PGM_START_VS, 80 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 81 r600_pipe_state_add_reg(rstate, 82 R_028894_SQ_PGM_START_FS, 83 r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch); 84 85 r600_pipe_state_add_reg(rstate, 86 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, 87 0xFFFFFFFF, NULL); 88 89} 90 91int r600_find_vs_semantic_index(struct r600_shader *vs, 92 struct r600_shader *ps, int id) 93{ 94 struct r600_shader_io *input = &ps->input[id]; 95 96 for (int i = 0; i < vs->noutput; i++) { 97 if (input->name == vs->output[i].name && 98 input->sid == vs->output[i].sid) { 99 return i - 1; 100 } 101 } 102 return 0; 103} 104 105static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) 106{ 107 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 108 struct r600_pipe_state *rstate = &shader->rstate; 109 struct r600_shader *rshader = &shader->shader; 110 unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; 111 int pos_index = -1, face_index = -1; 112 113 /* clear previous register */ 114 rstate->nregs = 0; 115 116 for (i = 0; i < rshader->ninput; i++) { 117 tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); 118 if (rshader->input[i].centroid) 119 tmp |= S_028644_SEL_CENTROID(1); 120 if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) 121 tmp |= S_028644_SEL_LINEAR(1); 122 123 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 124 pos_index = i; 125 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || 126 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || 127 rshader->input[i].name == TGSI_SEMANTIC_POSITION) { 128 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); 129 } 130 if (rshader->input[i].name == TGSI_SEMANTIC_FACE) 131 face_index = i; 132 if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && 133 rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { 134 tmp |= S_028644_PT_SPRITE_TEX(1); 135 } 136 r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); 137 } 138 for (i = 0; i < rshader->noutput; i++) { 139 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 140 r600_pipe_state_add_reg(rstate, 141 R_02880C_DB_SHADER_CONTROL, 142 S_02880C_Z_EXPORT_ENABLE(1), 143 S_02880C_Z_EXPORT_ENABLE(1), NULL); 144 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 145 r600_pipe_state_add_reg(rstate, 146 R_02880C_DB_SHADER_CONTROL, 147 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), 148 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); 149 } 150 151 exports_ps = 0; 152 num_cout = 0; 153 for (i = 0; i < rshader->noutput; i++) { 154 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 155 exports_ps |= 1; 156 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { 157 num_cout++; 158 } 159 } 160 exports_ps |= S_028854_EXPORT_COLORS(num_cout); 161 if (!exports_ps) { 162 /* always at least export 1 component per pixel */ 163 exports_ps = 2; 164 } 165 166 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | 167 S_0286CC_PERSP_GRADIENT_ENA(1); 168 spi_input_z = 0; 169 if (pos_index != -1) { 170 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | 171 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | 172 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | 173 S_0286CC_BARYC_SAMPLE_CNTL(1)); 174 spi_input_z |= 1; 175 } 176 177 spi_ps_in_control_1 = 0; 178 if (face_index != -1) { 179 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | 180 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); 181 } 182 183 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); 184 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); 185 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); 186 r600_pipe_state_add_reg(rstate, 187 R_028840_SQ_PGM_START_PS, 188 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 189 r600_pipe_state_add_reg(rstate, 190 R_028850_SQ_PGM_RESOURCES_PS, 191 S_028868_NUM_GPRS(rshader->bc.ngpr) | 192 S_028868_STACK_SIZE(rshader->bc.nstack), 193 0xFFFFFFFF, NULL); 194 r600_pipe_state_add_reg(rstate, 195 R_028854_SQ_PGM_EXPORTS_PS, 196 exports_ps, 0xFFFFFFFF, NULL); 197 r600_pipe_state_add_reg(rstate, 198 R_0288CC_SQ_PGM_CF_OFFSET_PS, 199 0x00000000, 0xFFFFFFFF, NULL); 200 201 if (rshader->uses_kill) { 202 /* only set some bits here, the other bits are set in the dsa state */ 203 r600_pipe_state_add_reg(rstate, 204 R_02880C_DB_SHADER_CONTROL, 205 S_02880C_KILL_ENABLE(1), 206 S_02880C_KILL_ENABLE(1), NULL); 207 } 208 r600_pipe_state_add_reg(rstate, 209 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, 210 0xFFFFFFFF, NULL); 211} 212 213static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 214{ 215 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 216 struct r600_shader *rshader = &shader->shader; 217 void *ptr; 218 219 /* copy new shader */ 220 if (rshader->processor_type == TGSI_PROCESSOR_VERTEX && shader->bo_fetch == NULL) { 221 shader->bo_fetch = r600_bo(rctx->radeon, rshader->bc_fetch.ndw * 4, 4096, 0, 0); 222 if (shader->bo_fetch == NULL) { 223 return -ENOMEM; 224 } 225 ptr = r600_bo_map(rctx->radeon, shader->bo_fetch, 0, NULL); 226 memcpy(ptr, rshader->bc_fetch.bytecode, rshader->bc_fetch.ndw * 4); 227 r600_bo_unmap(rctx->radeon, shader->bo_fetch); 228 } 229 if (shader->bo == NULL) { 230 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); 231 if (shader->bo == NULL) { 232 return -ENOMEM; 233 } 234 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 235 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); 236 r600_bo_unmap(rctx->radeon, shader->bo); 237 } 238 /* build state */ 239 rshader->flat_shade = rctx->flatshade; 240 switch (rshader->processor_type) { 241 case TGSI_PROCESSOR_VERTEX: 242 if (rshader->family >= CHIP_CEDAR) { 243 evergreen_pipe_shader_vs(ctx, shader); 244 } else { 245 r600_pipe_shader_vs(ctx, shader); 246 } 247 break; 248 case TGSI_PROCESSOR_FRAGMENT: 249 if (rshader->family >= CHIP_CEDAR) { 250 evergreen_pipe_shader_ps(ctx, shader); 251 } else { 252 r600_pipe_shader_ps(ctx, shader); 253 } 254 break; 255 default: 256 return -EINVAL; 257 } 258 r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); 259 return 0; 260} 261 262static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) 263{ 264 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 265 struct r600_shader *shader = &rshader->shader; 266 const struct util_format_description *desc; 267 enum pipe_format resource_format[160]; 268 unsigned i, nresources = 0; 269 struct r600_bc *bc = &shader->bc_fetch; 270 struct r600_bc_cf *cf; 271 struct r600_bc_vtx *vtx; 272 273 if (shader->processor_type != TGSI_PROCESSOR_VERTEX) 274 return 0; 275 /* doing a full memcmp fell over the refcount */ 276 if ((rshader->vertex_elements.count == rctx->vertex_elements->count) && 277 (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 278 rctx->vertex_elements->count * sizeof(struct pipe_vertex_element)))) { 279 return 0; 280 } 281 rshader->vertex_elements = *rctx->vertex_elements; 282 for (i = 0; i < rctx->vertex_elements->count; i++) { 283 resource_format[nresources++] = rctx->vertex_elements->hw_format[i]; 284 } 285 r600_bo_reference(rctx->radeon, &rshader->bo_fetch, NULL); 286 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 287 switch (cf->inst) { 288 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 289 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 290 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 291 desc = util_format_description(resource_format[vtx->buffer_id]); 292 if (desc == NULL) { 293 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); 294 return -EINVAL; 295 } 296 vtx->dst_sel_x = desc->swizzle[0]; 297 vtx->dst_sel_y = desc->swizzle[1]; 298 vtx->dst_sel_z = desc->swizzle[2]; 299 vtx->dst_sel_w = desc->swizzle[3]; 300 } 301 break; 302 default: 303 break; 304 } 305 } 306 return r600_bc_build(&shader->bc_fetch); 307} 308 309int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader) 310{ 311 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 312 int r; 313 314 if (shader == NULL) 315 return -EINVAL; 316 /* there should be enough input */ 317 if (rctx->vertex_elements->count < shader->shader.bc.nresource) { 318 R600_ERR("%d resources provided, expecting %d\n", 319 rctx->vertex_elements->count, shader->shader.bc.nresource); 320 return -EINVAL; 321 } 322 r = r600_shader_update(ctx, shader); 323 if (r) 324 return r; 325 return r600_pipe_shader(ctx, shader); 326} 327 328int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 329int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 330{ 331 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 332 int r; 333 334//fprintf(stderr, "--------------------------------------------------------------\n"); 335//tgsi_dump(tokens, 0); 336 shader->shader.family = r600_get_family(rctx->radeon); 337 r = r600_shader_from_tgsi(tokens, &shader->shader); 338 if (r) { 339 R600_ERR("translation from TGSI failed !\n"); 340 return r; 341 } 342 r = r600_bc_build(&shader->shader.bc); 343 if (r) { 344 R600_ERR("building bytecode failed !\n"); 345 return r; 346 } 347 if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) { 348 r = r600_bc_build(&shader->shader.bc_fetch); 349 if (r) { 350 R600_ERR("building bytecode failed !\n"); 351 return r; 352 } 353 } 354//r600_bc_dump(&shader->shader.bc); 355//fprintf(stderr, "______________________________________________________________\n"); 356 return 0; 357} 358 359void 360r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 361{ 362 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 363 364 r600_bo_reference(rctx->radeon, &shader->bo, NULL); 365 366 r600_bc_clear(&shader->shader.bc); 367 368 /* FIXME: is there more stuff to free? */ 369} 370 371/* 372 * tgsi -> r600 shader 373 */ 374struct r600_shader_tgsi_instruction; 375 376struct r600_shader_ctx { 377 struct tgsi_shader_info info; 378 struct tgsi_parse_context parse; 379 const struct tgsi_token *tokens; 380 unsigned type; 381 unsigned file_offset[TGSI_FILE_COUNT]; 382 unsigned temp_reg; 383 struct r600_shader_tgsi_instruction *inst_info; 384 struct r600_bc *bc; 385 struct r600_bc *bc_fetch; 386 struct r600_shader *shader; 387 u32 value[4]; 388 u32 *literals; 389 u32 nliterals; 390 u32 max_driver_temp_used; 391 /* needed for evergreen interpolation */ 392 boolean input_centroid; 393 boolean input_linear; 394 boolean input_perspective; 395 int num_interp_gpr; 396}; 397 398struct r600_shader_tgsi_instruction { 399 unsigned tgsi_opcode; 400 unsigned is_op3; 401 unsigned r600_opcode; 402 int (*process)(struct r600_shader_ctx *ctx); 403}; 404 405static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 406static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 407 408static int tgsi_is_supported(struct r600_shader_ctx *ctx) 409{ 410 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 411 int j; 412 413 if (i->Instruction.NumDstRegs > 1) { 414 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 415 return -EINVAL; 416 } 417 if (i->Instruction.Predicate) { 418 R600_ERR("predicate unsupported\n"); 419 return -EINVAL; 420 } 421#if 0 422 if (i->Instruction.Label) { 423 R600_ERR("label unsupported\n"); 424 return -EINVAL; 425 } 426#endif 427 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 428 if (i->Src[j].Register.Dimension) { 429 R600_ERR("unsupported src %d (dimension %d)\n", j, 430 i->Src[j].Register.Dimension); 431 return -EINVAL; 432 } 433 } 434 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 435 if (i->Dst[j].Register.Dimension) { 436 R600_ERR("unsupported dst (dimension)\n"); 437 return -EINVAL; 438 } 439 } 440 return 0; 441} 442 443static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 444{ 445 int i, r; 446 struct r600_bc_alu alu; 447 int gpr = 0, base_chan = 0; 448 int ij_index = 0; 449 450 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 451 ij_index = 0; 452 if (ctx->shader->input[input].centroid) 453 ij_index++; 454 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 455 ij_index = 0; 456 /* if we have perspective add one */ 457 if (ctx->input_perspective) { 458 ij_index++; 459 /* if we have perspective centroid */ 460 if (ctx->input_centroid) 461 ij_index++; 462 } 463 if (ctx->shader->input[input].centroid) 464 ij_index++; 465 } 466 467 /* work out gpr and base_chan from index */ 468 gpr = ij_index / 2; 469 base_chan = (2 * (ij_index % 2)) + 1; 470 471 for (i = 0; i < 8; i++) { 472 memset(&alu, 0, sizeof(struct r600_bc_alu)); 473 474 if (i < 4) 475 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 476 else 477 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 478 479 if ((i > 1) && (i < 6)) { 480 alu.dst.sel = ctx->shader->input[input].gpr; 481 alu.dst.write = 1; 482 } 483 484 alu.dst.chan = i % 4; 485 486 alu.src[0].sel = gpr; 487 alu.src[0].chan = (base_chan - (i % 2)); 488 489 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 490 491 alu.bank_swizzle_force = SQ_ALU_VEC_210; 492 if ((i % 4) == 3) 493 alu.last = 1; 494 r = r600_bc_add_alu(ctx->bc, &alu); 495 if (r) 496 return r; 497 } 498 return 0; 499} 500 501 502static int tgsi_declaration(struct r600_shader_ctx *ctx) 503{ 504 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 505 struct r600_bc_vtx vtx; 506 unsigned i; 507 int r; 508 509 switch (d->Declaration.File) { 510 case TGSI_FILE_INPUT: 511 i = ctx->shader->ninput++; 512 ctx->shader->input[i].name = d->Semantic.Name; 513 ctx->shader->input[i].sid = d->Semantic.Index; 514 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 515 ctx->shader->input[i].centroid = d->Declaration.Centroid; 516 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 517 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 518 /* turn input into fetch */ 519 memset(&vtx, 0, sizeof(struct r600_bc_vtx)); 520 vtx.inst = 0; 521 vtx.fetch_type = 0; 522 vtx.buffer_id = i; 523 /* register containing the index into the buffer */ 524 vtx.src_gpr = 0; 525 vtx.src_sel_x = 0; 526 vtx.mega_fetch_count = 0x1F; 527 vtx.dst_gpr = ctx->shader->input[i].gpr; 528 vtx.dst_sel_x = 0; 529 vtx.dst_sel_y = 1; 530 vtx.dst_sel_z = 2; 531 vtx.dst_sel_w = 3; 532 vtx.use_const_fields = 1; 533 r = r600_bc_add_vtx(ctx->bc_fetch, &vtx); 534 if (r) 535 return r; 536 } 537 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { 538 /* turn input into interpolate on EG */ 539 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 540 if (ctx->shader->input[i].interpolate > 0) { 541 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 542 evergreen_interp_alu(ctx, i); 543 } 544 } 545 } 546 break; 547 case TGSI_FILE_OUTPUT: 548 i = ctx->shader->noutput++; 549 ctx->shader->output[i].name = d->Semantic.Name; 550 ctx->shader->output[i].sid = d->Semantic.Index; 551 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 552 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 553 break; 554 case TGSI_FILE_CONSTANT: 555 case TGSI_FILE_TEMPORARY: 556 case TGSI_FILE_SAMPLER: 557 case TGSI_FILE_ADDRESS: 558 break; 559 default: 560 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 561 return -EINVAL; 562 } 563 return 0; 564} 565 566static int r600_get_temp(struct r600_shader_ctx *ctx) 567{ 568 return ctx->temp_reg + ctx->max_driver_temp_used++; 569} 570 571/* 572 * for evergreen we need to scan the shader to find the number of GPRs we need to 573 * reserve for interpolation. 574 * 575 * we need to know if we are going to emit 576 * any centroid inputs 577 * if perspective and linear are required 578*/ 579static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 580{ 581 int i; 582 int num_baryc; 583 584 ctx->input_linear = FALSE; 585 ctx->input_perspective = FALSE; 586 ctx->input_centroid = FALSE; 587 ctx->num_interp_gpr = 1; 588 589 /* any centroid inputs */ 590 for (i = 0; i < ctx->info.num_inputs; i++) { 591 /* skip position/face */ 592 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 593 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 594 continue; 595 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 596 ctx->input_linear = TRUE; 597 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 598 ctx->input_perspective = TRUE; 599 if (ctx->info.input_centroid[i]) 600 ctx->input_centroid = TRUE; 601 } 602 603 num_baryc = 0; 604 /* ignoring sample for now */ 605 if (ctx->input_perspective) 606 num_baryc++; 607 if (ctx->input_linear) 608 num_baryc++; 609 if (ctx->input_centroid) 610 num_baryc *= 2; 611 612 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 613 614 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 615 return ctx->num_interp_gpr; 616} 617 618int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 619{ 620 struct tgsi_full_immediate *immediate; 621 struct r600_shader_ctx ctx; 622 struct r600_bc_output output[32]; 623 unsigned output_done, noutput; 624 unsigned opcode; 625 int i, r = 0, pos0; 626 627 ctx.bc = &shader->bc; 628 ctx.bc_fetch = &shader->bc_fetch; 629 ctx.shader = shader; 630 r = r600_bc_init(ctx.bc, shader->family); 631 if (r) 632 return r; 633 ctx.tokens = tokens; 634 tgsi_scan_shader(tokens, &ctx.info); 635 tgsi_parse_init(&ctx.parse, tokens); 636 ctx.type = ctx.parse.FullHeader.Processor.Processor; 637 shader->processor_type = ctx.type; 638 if (shader->processor_type == TGSI_PROCESSOR_VERTEX) { 639 r = r600_bc_init(ctx.bc_fetch, shader->family); 640 if (r) 641 return r; 642 ctx.bc_fetch->type = -1; 643 } 644 ctx.bc->type = shader->processor_type; 645 646 /* register allocations */ 647 /* Values [0,127] correspond to GPR[0..127]. 648 * Values [128,159] correspond to constant buffer bank 0 649 * Values [160,191] correspond to constant buffer bank 1 650 * Values [256,511] correspond to cfile constants c[0..255]. 651 * Other special values are shown in the list below. 652 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 653 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 654 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 655 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 656 * 248 SQ_ALU_SRC_0: special constant 0.0. 657 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 658 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 659 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 660 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 661 * 253 SQ_ALU_SRC_LITERAL: literal constant. 662 * 254 SQ_ALU_SRC_PV: previous vector result. 663 * 255 SQ_ALU_SRC_PS: previous scalar result. 664 */ 665 for (i = 0; i < TGSI_FILE_COUNT; i++) { 666 ctx.file_offset[i] = 0; 667 } 668 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 669 ctx.file_offset[TGSI_FILE_INPUT] = 1; 670 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { 671 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 672 } else { 673 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 674 } 675 } 676 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) { 677 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 678 } 679 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 680 ctx.info.file_count[TGSI_FILE_INPUT]; 681 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 682 ctx.info.file_count[TGSI_FILE_OUTPUT]; 683 684 ctx.file_offset[TGSI_FILE_CONSTANT] = 128; 685 686 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 687 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 688 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 689 690 ctx.nliterals = 0; 691 ctx.literals = NULL; 692 693 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 694 tgsi_parse_token(&ctx.parse); 695 switch (ctx.parse.FullToken.Token.Type) { 696 case TGSI_TOKEN_TYPE_IMMEDIATE: 697 immediate = &ctx.parse.FullToken.FullImmediate; 698 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 699 if(ctx.literals == NULL) { 700 r = -ENOMEM; 701 goto out_err; 702 } 703 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 704 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 705 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 706 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 707 ctx.nliterals++; 708 break; 709 case TGSI_TOKEN_TYPE_DECLARATION: 710 r = tgsi_declaration(&ctx); 711 if (r) 712 goto out_err; 713 break; 714 case TGSI_TOKEN_TYPE_INSTRUCTION: 715 r = tgsi_is_supported(&ctx); 716 if (r) 717 goto out_err; 718 ctx.max_driver_temp_used = 0; 719 /* reserve first tmp for everyone */ 720 r600_get_temp(&ctx); 721 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 722 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) 723 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 724 else 725 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 726 r = ctx.inst_info->process(&ctx); 727 if (r) 728 goto out_err; 729 r = r600_bc_add_literal(ctx.bc, ctx.value); 730 if (r) 731 goto out_err; 732 break; 733 default: 734 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 735 r = -EINVAL; 736 goto out_err; 737 } 738 } 739 /* export output */ 740 noutput = shader->noutput; 741 for (i = 0, pos0 = 0; i < noutput; i++) { 742 memset(&output[i], 0, sizeof(struct r600_bc_output)); 743 output[i].gpr = shader->output[i].gpr; 744 output[i].elem_size = 3; 745 output[i].swizzle_x = 0; 746 output[i].swizzle_y = 1; 747 output[i].swizzle_z = 2; 748 output[i].swizzle_w = 3; 749 output[i].barrier = 1; 750 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 751 output[i].array_base = i - pos0; 752 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 753 switch (ctx.type) { 754 case TGSI_PROCESSOR_VERTEX: 755 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 756 output[i].array_base = 60; 757 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 758 /* position doesn't count in array_base */ 759 pos0++; 760 } 761 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 762 output[i].array_base = 61; 763 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 764 /* position doesn't count in array_base */ 765 pos0++; 766 } 767 break; 768 case TGSI_PROCESSOR_FRAGMENT: 769 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 770 output[i].array_base = shader->output[i].sid; 771 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 772 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 773 output[i].array_base = 61; 774 output[i].swizzle_x = 2; 775 output[i].swizzle_y = 7; 776 output[i].swizzle_z = output[i].swizzle_w = 7; 777 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 778 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 779 output[i].array_base = 61; 780 output[i].swizzle_x = 7; 781 output[i].swizzle_y = 1; 782 output[i].swizzle_z = output[i].swizzle_w = 7; 783 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 784 } else { 785 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 786 r = -EINVAL; 787 goto out_err; 788 } 789 break; 790 default: 791 R600_ERR("unsupported processor type %d\n", ctx.type); 792 r = -EINVAL; 793 goto out_err; 794 } 795 } 796 /* add fake param output for vertex shader if no param is exported */ 797 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 798 for (i = 0, pos0 = 0; i < noutput; i++) { 799 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 800 pos0 = 1; 801 break; 802 } 803 } 804 if (!pos0) { 805 memset(&output[i], 0, sizeof(struct r600_bc_output)); 806 output[i].gpr = 0; 807 output[i].elem_size = 3; 808 output[i].swizzle_x = 0; 809 output[i].swizzle_y = 1; 810 output[i].swizzle_z = 2; 811 output[i].swizzle_w = 3; 812 output[i].barrier = 1; 813 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 814 output[i].array_base = 0; 815 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 816 noutput++; 817 } 818 } 819 /* add fake pixel export */ 820 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 821 memset(&output[0], 0, sizeof(struct r600_bc_output)); 822 output[0].gpr = 0; 823 output[0].elem_size = 3; 824 output[0].swizzle_x = 7; 825 output[0].swizzle_y = 7; 826 output[0].swizzle_z = 7; 827 output[0].swizzle_w = 7; 828 output[0].barrier = 1; 829 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 830 output[0].array_base = 0; 831 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 832 noutput++; 833 } 834 /* set export done on last export of each type */ 835 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 836 if (i == (noutput - 1)) { 837 output[i].end_of_program = 1; 838 } 839 if (!(output_done & (1 << output[i].type))) { 840 output_done |= (1 << output[i].type); 841 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 842 } 843 } 844 /* add return to fetch shader */ 845 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 846 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { 847 r600_bc_add_cfinst(ctx.bc_fetch, EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 848 } else { 849 r600_bc_add_cfinst(ctx.bc_fetch, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 850 } 851 } 852 /* add output to bytecode */ 853 for (i = 0; i < noutput; i++) { 854 r = r600_bc_add_output(ctx.bc, &output[i]); 855 if (r) 856 goto out_err; 857 } 858 free(ctx.literals); 859 tgsi_parse_free(&ctx.parse); 860 return 0; 861out_err: 862 free(ctx.literals); 863 tgsi_parse_free(&ctx.parse); 864 return r; 865} 866 867static int tgsi_unsupported(struct r600_shader_ctx *ctx) 868{ 869 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 870 return -EINVAL; 871} 872 873static int tgsi_end(struct r600_shader_ctx *ctx) 874{ 875 return 0; 876} 877 878static int tgsi_src(struct r600_shader_ctx *ctx, 879 const struct tgsi_full_src_register *tgsi_src, 880 struct r600_bc_alu_src *r600_src) 881{ 882 int index; 883 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 884 r600_src->sel = tgsi_src->Register.Index; 885 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 886 r600_src->sel = 0; 887 index = tgsi_src->Register.Index; 888 ctx->value[0] = ctx->literals[index * 4 + 0]; 889 ctx->value[1] = ctx->literals[index * 4 + 1]; 890 ctx->value[2] = ctx->literals[index * 4 + 2]; 891 ctx->value[3] = ctx->literals[index * 4 + 3]; 892 } 893 if (tgsi_src->Register.Indirect) 894 r600_src->rel = V_SQ_REL_RELATIVE; 895 r600_src->neg = tgsi_src->Register.Negate; 896 r600_src->abs = tgsi_src->Register.Absolute; 897 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 898 return 0; 899} 900 901static int tgsi_dst(struct r600_shader_ctx *ctx, 902 const struct tgsi_full_dst_register *tgsi_dst, 903 unsigned swizzle, 904 struct r600_bc_alu_dst *r600_dst) 905{ 906 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 907 908 r600_dst->sel = tgsi_dst->Register.Index; 909 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 910 r600_dst->chan = swizzle; 911 r600_dst->write = 1; 912 if (tgsi_dst->Register.Indirect) 913 r600_dst->rel = V_SQ_REL_RELATIVE; 914 if (inst->Instruction.Saturate) { 915 r600_dst->clamp = 1; 916 } 917 return 0; 918} 919 920static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 921{ 922 switch (swizzle) { 923 case 0: 924 return tgsi_src->Register.SwizzleX; 925 case 1: 926 return tgsi_src->Register.SwizzleY; 927 case 2: 928 return tgsi_src->Register.SwizzleZ; 929 case 3: 930 return tgsi_src->Register.SwizzleW; 931 default: 932 return 0; 933 } 934} 935 936static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 937{ 938 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 939 struct r600_bc_alu alu; 940 int i, j, k, nconst, r; 941 942 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 943 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 944 nconst++; 945 } 946 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 947 if (r) { 948 return r; 949 } 950 } 951 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 952 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 953 int treg = r600_get_temp(ctx); 954 for (k = 0; k < 4; k++) { 955 memset(&alu, 0, sizeof(struct r600_bc_alu)); 956 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 957 alu.src[0].sel = r600_src[i].sel; 958 alu.src[0].chan = k; 959 alu.src[0].rel = r600_src[i].rel; 960 alu.dst.sel = treg; 961 alu.dst.chan = k; 962 alu.dst.write = 1; 963 if (k == 3) 964 alu.last = 1; 965 r = r600_bc_add_alu(ctx->bc, &alu); 966 if (r) 967 return r; 968 } 969 r600_src[i].sel = treg; 970 r600_src[i].rel =0; 971 j--; 972 } 973 } 974 return 0; 975} 976 977/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 978static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 979{ 980 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 981 struct r600_bc_alu alu; 982 int i, j, k, nliteral, r; 983 984 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 985 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 986 nliteral++; 987 } 988 } 989 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 990 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 991 int treg = r600_get_temp(ctx); 992 for (k = 0; k < 4; k++) { 993 memset(&alu, 0, sizeof(struct r600_bc_alu)); 994 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 995 alu.src[0].sel = r600_src[i].sel; 996 alu.src[0].chan = k; 997 alu.dst.sel = treg; 998 alu.dst.chan = k; 999 alu.dst.write = 1; 1000 if (k == 3) 1001 alu.last = 1; 1002 r = r600_bc_add_alu(ctx->bc, &alu); 1003 if (r) 1004 return r; 1005 } 1006 r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); 1007 if (r) 1008 return r; 1009 r600_src[i].sel = treg; 1010 j--; 1011 } 1012 } 1013 return 0; 1014} 1015 1016static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 1017{ 1018 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1019 struct r600_bc_alu_src r600_src[3]; 1020 struct r600_bc_alu alu; 1021 int i, j, r; 1022 int lasti = 0; 1023 1024 for (i = 0; i < 4; i++) { 1025 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 1026 lasti = i; 1027 } 1028 } 1029 1030 r = tgsi_split_constant(ctx, r600_src); 1031 if (r) 1032 return r; 1033 r = tgsi_split_literal_constant(ctx, r600_src); 1034 if (r) 1035 return r; 1036 for (i = 0; i < lasti + 1; i++) { 1037 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1038 continue; 1039 1040 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1041 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1042 if (r) 1043 return r; 1044 1045 alu.inst = ctx->inst_info->r600_opcode; 1046 if (!swap) { 1047 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1048 alu.src[j] = r600_src[j]; 1049 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1050 } 1051 } else { 1052 alu.src[0] = r600_src[1]; 1053 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 1054 1055 alu.src[1] = r600_src[0]; 1056 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1057 } 1058 /* handle some special cases */ 1059 switch (ctx->inst_info->tgsi_opcode) { 1060 case TGSI_OPCODE_SUB: 1061 alu.src[1].neg = 1; 1062 break; 1063 case TGSI_OPCODE_ABS: 1064 alu.src[0].abs = 1; 1065 break; 1066 default: 1067 break; 1068 } 1069 if (i == lasti) { 1070 alu.last = 1; 1071 } 1072 r = r600_bc_add_alu(ctx->bc, &alu); 1073 if (r) 1074 return r; 1075 } 1076 return 0; 1077} 1078 1079static int tgsi_op2(struct r600_shader_ctx *ctx) 1080{ 1081 return tgsi_op2_s(ctx, 0); 1082} 1083 1084static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1085{ 1086 return tgsi_op2_s(ctx, 1); 1087} 1088 1089/* 1090 * r600 - trunc to -PI..PI range 1091 * r700 - normalize by dividing by 2PI 1092 * see fdo bug 27901 1093 */ 1094static int tgsi_setup_trig(struct r600_shader_ctx *ctx, 1095 struct r600_bc_alu_src r600_src[3]) 1096{ 1097 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1098 int r; 1099 uint32_t lit_vals[4]; 1100 struct r600_bc_alu alu; 1101 1102 memset(lit_vals, 0, 4*4); 1103 r = tgsi_split_constant(ctx, r600_src); 1104 if (r) 1105 return r; 1106 r = tgsi_split_literal_constant(ctx, r600_src); 1107 if (r) 1108 return r; 1109 1110 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 1111 lit_vals[1] = fui(0.5f); 1112 1113 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1114 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1115 alu.is_op3 = 1; 1116 1117 alu.dst.chan = 0; 1118 alu.dst.sel = ctx->temp_reg; 1119 alu.dst.write = 1; 1120 1121 alu.src[0] = r600_src[0]; 1122 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1123 1124 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1125 alu.src[1].chan = 0; 1126 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1127 alu.src[2].chan = 1; 1128 alu.last = 1; 1129 r = r600_bc_add_alu(ctx->bc, &alu); 1130 if (r) 1131 return r; 1132 r = r600_bc_add_literal(ctx->bc, lit_vals); 1133 if (r) 1134 return r; 1135 1136 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1137 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1138 1139 alu.dst.chan = 0; 1140 alu.dst.sel = ctx->temp_reg; 1141 alu.dst.write = 1; 1142 1143 alu.src[0].sel = ctx->temp_reg; 1144 alu.src[0].chan = 0; 1145 alu.last = 1; 1146 r = r600_bc_add_alu(ctx->bc, &alu); 1147 if (r) 1148 return r; 1149 1150 if (ctx->bc->chiprev == CHIPREV_R600) { 1151 lit_vals[0] = fui(3.1415926535897f * 2.0f); 1152 lit_vals[1] = fui(-3.1415926535897f); 1153 } else { 1154 lit_vals[0] = fui(1.0f); 1155 lit_vals[1] = fui(-0.5f); 1156 } 1157 1158 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1159 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1160 alu.is_op3 = 1; 1161 1162 alu.dst.chan = 0; 1163 alu.dst.sel = ctx->temp_reg; 1164 alu.dst.write = 1; 1165 1166 alu.src[0].sel = ctx->temp_reg; 1167 alu.src[0].chan = 0; 1168 1169 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1170 alu.src[1].chan = 0; 1171 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1172 alu.src[2].chan = 1; 1173 alu.last = 1; 1174 r = r600_bc_add_alu(ctx->bc, &alu); 1175 if (r) 1176 return r; 1177 r = r600_bc_add_literal(ctx->bc, lit_vals); 1178 if (r) 1179 return r; 1180 return 0; 1181} 1182 1183static int tgsi_trig(struct r600_shader_ctx *ctx) 1184{ 1185 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1186 struct r600_bc_alu_src r600_src[3]; 1187 struct r600_bc_alu alu; 1188 int i, r; 1189 int lasti = 0; 1190 1191 r = tgsi_setup_trig(ctx, r600_src); 1192 if (r) 1193 return r; 1194 1195 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1196 alu.inst = ctx->inst_info->r600_opcode; 1197 alu.dst.chan = 0; 1198 alu.dst.sel = ctx->temp_reg; 1199 alu.dst.write = 1; 1200 1201 alu.src[0].sel = ctx->temp_reg; 1202 alu.src[0].chan = 0; 1203 alu.last = 1; 1204 r = r600_bc_add_alu(ctx->bc, &alu); 1205 if (r) 1206 return r; 1207 1208 /* replicate result */ 1209 for (i = 0; i < 4; i++) { 1210 if (inst->Dst[0].Register.WriteMask & (1 << i)) 1211 lasti = i; 1212 } 1213 for (i = 0; i < lasti + 1; i++) { 1214 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1215 continue; 1216 1217 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1218 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1219 1220 alu.src[0].sel = ctx->temp_reg; 1221 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1222 if (r) 1223 return r; 1224 if (i == lasti) 1225 alu.last = 1; 1226 r = r600_bc_add_alu(ctx->bc, &alu); 1227 if (r) 1228 return r; 1229 } 1230 return 0; 1231} 1232 1233static int tgsi_scs(struct r600_shader_ctx *ctx) 1234{ 1235 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1236 struct r600_bc_alu_src r600_src[3]; 1237 struct r600_bc_alu alu; 1238 int r; 1239 1240 /* We'll only need the trig stuff if we are going to write to the 1241 * X or Y components of the destination vector. 1242 */ 1243 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1244 r = tgsi_setup_trig(ctx, r600_src); 1245 if (r) 1246 return r; 1247 } 1248 1249 /* dst.x = COS */ 1250 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1251 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1252 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1253 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1254 if (r) 1255 return r; 1256 1257 alu.src[0].sel = ctx->temp_reg; 1258 alu.src[0].chan = 0; 1259 alu.last = 1; 1260 r = r600_bc_add_alu(ctx->bc, &alu); 1261 if (r) 1262 return r; 1263 } 1264 1265 /* dst.y = SIN */ 1266 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1267 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1268 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1269 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1270 if (r) 1271 return r; 1272 1273 alu.src[0].sel = ctx->temp_reg; 1274 alu.src[0].chan = 0; 1275 alu.last = 1; 1276 r = r600_bc_add_alu(ctx->bc, &alu); 1277 if (r) 1278 return r; 1279 } 1280 1281 /* dst.z = 0.0; */ 1282 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1283 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1284 1285 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1286 1287 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1288 if (r) 1289 return r; 1290 1291 alu.src[0].sel = V_SQ_ALU_SRC_0; 1292 alu.src[0].chan = 0; 1293 1294 alu.last = 1; 1295 1296 r = r600_bc_add_alu(ctx->bc, &alu); 1297 if (r) 1298 return r; 1299 1300 r = r600_bc_add_literal(ctx->bc, ctx->value); 1301 if (r) 1302 return r; 1303 } 1304 1305 /* dst.w = 1.0; */ 1306 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1307 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1308 1309 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1310 1311 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1312 if (r) 1313 return r; 1314 1315 alu.src[0].sel = V_SQ_ALU_SRC_1; 1316 alu.src[0].chan = 0; 1317 1318 alu.last = 1; 1319 1320 r = r600_bc_add_alu(ctx->bc, &alu); 1321 if (r) 1322 return r; 1323 1324 r = r600_bc_add_literal(ctx->bc, ctx->value); 1325 if (r) 1326 return r; 1327 } 1328 1329 return 0; 1330} 1331 1332static int tgsi_kill(struct r600_shader_ctx *ctx) 1333{ 1334 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1335 struct r600_bc_alu alu; 1336 int i, r; 1337 1338 for (i = 0; i < 4; i++) { 1339 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1340 alu.inst = ctx->inst_info->r600_opcode; 1341 1342 alu.dst.chan = i; 1343 1344 alu.src[0].sel = V_SQ_ALU_SRC_0; 1345 1346 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1347 alu.src[1].sel = V_SQ_ALU_SRC_1; 1348 alu.src[1].neg = 1; 1349 } else { 1350 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1351 if (r) 1352 return r; 1353 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1354 } 1355 if (i == 3) { 1356 alu.last = 1; 1357 } 1358 r = r600_bc_add_alu(ctx->bc, &alu); 1359 if (r) 1360 return r; 1361 } 1362 r = r600_bc_add_literal(ctx->bc, ctx->value); 1363 if (r) 1364 return r; 1365 1366 /* kill must be last in ALU */ 1367 ctx->bc->force_add_cf = 1; 1368 ctx->shader->uses_kill = TRUE; 1369 return 0; 1370} 1371 1372static int tgsi_lit(struct r600_shader_ctx *ctx) 1373{ 1374 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1375 struct r600_bc_alu alu; 1376 struct r600_bc_alu_src r600_src[3]; 1377 int r; 1378 1379 r = tgsi_split_constant(ctx, r600_src); 1380 if (r) 1381 return r; 1382 r = tgsi_split_literal_constant(ctx, r600_src); 1383 if (r) 1384 return r; 1385 1386 /* dst.x, <- 1.0 */ 1387 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1388 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1389 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1390 alu.src[0].chan = 0; 1391 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1392 if (r) 1393 return r; 1394 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1395 r = r600_bc_add_alu(ctx->bc, &alu); 1396 if (r) 1397 return r; 1398 1399 /* dst.y = max(src.x, 0.0) */ 1400 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1401 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1402 alu.src[0] = r600_src[0]; 1403 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1404 alu.src[1].chan = 0; 1405 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1406 if (r) 1407 return r; 1408 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1409 r = r600_bc_add_alu(ctx->bc, &alu); 1410 if (r) 1411 return r; 1412 1413 /* dst.w, <- 1.0 */ 1414 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1415 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1416 alu.src[0].sel = V_SQ_ALU_SRC_1; 1417 alu.src[0].chan = 0; 1418 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1419 if (r) 1420 return r; 1421 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1422 alu.last = 1; 1423 r = r600_bc_add_alu(ctx->bc, &alu); 1424 if (r) 1425 return r; 1426 1427 r = r600_bc_add_literal(ctx->bc, ctx->value); 1428 if (r) 1429 return r; 1430 1431 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1432 { 1433 int chan; 1434 int sel; 1435 1436 /* dst.z = log(src.y) */ 1437 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1438 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1439 alu.src[0] = r600_src[0]; 1440 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1441 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1442 if (r) 1443 return r; 1444 alu.last = 1; 1445 r = r600_bc_add_alu(ctx->bc, &alu); 1446 if (r) 1447 return r; 1448 1449 r = r600_bc_add_literal(ctx->bc, ctx->value); 1450 if (r) 1451 return r; 1452 1453 chan = alu.dst.chan; 1454 sel = alu.dst.sel; 1455 1456 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1457 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1458 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1459 alu.src[0] = r600_src[0]; 1460 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1461 alu.src[1].sel = sel; 1462 alu.src[1].chan = chan; 1463 1464 alu.src[2] = r600_src[0]; 1465 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 1466 alu.dst.sel = ctx->temp_reg; 1467 alu.dst.chan = 0; 1468 alu.dst.write = 1; 1469 alu.is_op3 = 1; 1470 alu.last = 1; 1471 r = r600_bc_add_alu(ctx->bc, &alu); 1472 if (r) 1473 return r; 1474 1475 r = r600_bc_add_literal(ctx->bc, ctx->value); 1476 if (r) 1477 return r; 1478 /* dst.z = exp(tmp.x) */ 1479 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1480 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1481 alu.src[0].sel = ctx->temp_reg; 1482 alu.src[0].chan = 0; 1483 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1484 if (r) 1485 return r; 1486 alu.last = 1; 1487 r = r600_bc_add_alu(ctx->bc, &alu); 1488 if (r) 1489 return r; 1490 } 1491 return 0; 1492} 1493 1494static int tgsi_rsq(struct r600_shader_ctx *ctx) 1495{ 1496 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1497 struct r600_bc_alu alu; 1498 int i, r; 1499 1500 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1501 1502 /* FIXME: 1503 * For state trackers other than OpenGL, we'll want to use 1504 * _RECIPSQRT_IEEE instead. 1505 */ 1506 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1507 1508 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1509 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1510 if (r) 1511 return r; 1512 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1513 alu.src[i].abs = 1; 1514 } 1515 alu.dst.sel = ctx->temp_reg; 1516 alu.dst.write = 1; 1517 alu.last = 1; 1518 r = r600_bc_add_alu(ctx->bc, &alu); 1519 if (r) 1520 return r; 1521 r = r600_bc_add_literal(ctx->bc, ctx->value); 1522 if (r) 1523 return r; 1524 /* replicate result */ 1525 return tgsi_helper_tempx_replicate(ctx); 1526} 1527 1528static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1529{ 1530 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1531 struct r600_bc_alu alu; 1532 int i, r; 1533 1534 for (i = 0; i < 4; i++) { 1535 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1536 alu.src[0].sel = ctx->temp_reg; 1537 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1538 alu.dst.chan = i; 1539 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1540 if (r) 1541 return r; 1542 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1543 if (i == 3) 1544 alu.last = 1; 1545 r = r600_bc_add_alu(ctx->bc, &alu); 1546 if (r) 1547 return r; 1548 } 1549 return 0; 1550} 1551 1552static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1553{ 1554 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1555 struct r600_bc_alu alu; 1556 int i, r; 1557 1558 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1559 alu.inst = ctx->inst_info->r600_opcode; 1560 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1561 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1562 if (r) 1563 return r; 1564 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1565 } 1566 alu.dst.sel = ctx->temp_reg; 1567 alu.dst.write = 1; 1568 alu.last = 1; 1569 r = r600_bc_add_alu(ctx->bc, &alu); 1570 if (r) 1571 return r; 1572 r = r600_bc_add_literal(ctx->bc, ctx->value); 1573 if (r) 1574 return r; 1575 /* replicate result */ 1576 return tgsi_helper_tempx_replicate(ctx); 1577} 1578 1579static int tgsi_pow(struct r600_shader_ctx *ctx) 1580{ 1581 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1582 struct r600_bc_alu alu; 1583 int r; 1584 1585 /* LOG2(a) */ 1586 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1587 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1588 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1589 if (r) 1590 return r; 1591 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1592 alu.dst.sel = ctx->temp_reg; 1593 alu.dst.write = 1; 1594 alu.last = 1; 1595 r = r600_bc_add_alu(ctx->bc, &alu); 1596 if (r) 1597 return r; 1598 r = r600_bc_add_literal(ctx->bc,ctx->value); 1599 if (r) 1600 return r; 1601 /* b * LOG2(a) */ 1602 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1603 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); 1604 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1605 if (r) 1606 return r; 1607 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1608 alu.src[1].sel = ctx->temp_reg; 1609 alu.dst.sel = ctx->temp_reg; 1610 alu.dst.write = 1; 1611 alu.last = 1; 1612 r = r600_bc_add_alu(ctx->bc, &alu); 1613 if (r) 1614 return r; 1615 r = r600_bc_add_literal(ctx->bc,ctx->value); 1616 if (r) 1617 return r; 1618 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1619 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1620 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1621 alu.src[0].sel = ctx->temp_reg; 1622 alu.dst.sel = ctx->temp_reg; 1623 alu.dst.write = 1; 1624 alu.last = 1; 1625 r = r600_bc_add_alu(ctx->bc, &alu); 1626 if (r) 1627 return r; 1628 r = r600_bc_add_literal(ctx->bc,ctx->value); 1629 if (r) 1630 return r; 1631 return tgsi_helper_tempx_replicate(ctx); 1632} 1633 1634static int tgsi_ssg(struct r600_shader_ctx *ctx) 1635{ 1636 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1637 struct r600_bc_alu alu; 1638 struct r600_bc_alu_src r600_src[3]; 1639 int i, r; 1640 1641 r = tgsi_split_constant(ctx, r600_src); 1642 if (r) 1643 return r; 1644 r = tgsi_split_literal_constant(ctx, r600_src); 1645 if (r) 1646 return r; 1647 1648 /* tmp = (src > 0 ? 1 : src) */ 1649 for (i = 0; i < 4; i++) { 1650 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1651 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1652 alu.is_op3 = 1; 1653 1654 alu.dst.sel = ctx->temp_reg; 1655 alu.dst.chan = i; 1656 1657 alu.src[0] = r600_src[0]; 1658 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1659 1660 alu.src[1].sel = V_SQ_ALU_SRC_1; 1661 1662 alu.src[2] = r600_src[0]; 1663 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1664 if (i == 3) 1665 alu.last = 1; 1666 r = r600_bc_add_alu(ctx->bc, &alu); 1667 if (r) 1668 return r; 1669 } 1670 r = r600_bc_add_literal(ctx->bc, ctx->value); 1671 if (r) 1672 return r; 1673 1674 /* dst = (-tmp > 0 ? -1 : tmp) */ 1675 for (i = 0; i < 4; i++) { 1676 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1677 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1678 alu.is_op3 = 1; 1679 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1680 if (r) 1681 return r; 1682 1683 alu.src[0].sel = ctx->temp_reg; 1684 alu.src[0].chan = i; 1685 alu.src[0].neg = 1; 1686 1687 alu.src[1].sel = V_SQ_ALU_SRC_1; 1688 alu.src[1].neg = 1; 1689 1690 alu.src[2].sel = ctx->temp_reg; 1691 alu.src[2].chan = i; 1692 1693 if (i == 3) 1694 alu.last = 1; 1695 r = r600_bc_add_alu(ctx->bc, &alu); 1696 if (r) 1697 return r; 1698 } 1699 return 0; 1700} 1701 1702static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1703{ 1704 struct r600_bc_alu alu; 1705 int i, r; 1706 1707 r = r600_bc_add_literal(ctx->bc, ctx->value); 1708 if (r) 1709 return r; 1710 for (i = 0; i < 4; i++) { 1711 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1712 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1713 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1714 alu.dst.chan = i; 1715 } else { 1716 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1717 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1718 if (r) 1719 return r; 1720 alu.src[0].sel = ctx->temp_reg; 1721 alu.src[0].chan = i; 1722 } 1723 if (i == 3) { 1724 alu.last = 1; 1725 } 1726 r = r600_bc_add_alu(ctx->bc, &alu); 1727 if (r) 1728 return r; 1729 } 1730 return 0; 1731} 1732 1733static int tgsi_op3(struct r600_shader_ctx *ctx) 1734{ 1735 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1736 struct r600_bc_alu_src r600_src[3]; 1737 struct r600_bc_alu alu; 1738 int i, j, r; 1739 1740 r = tgsi_split_constant(ctx, r600_src); 1741 if (r) 1742 return r; 1743 r = tgsi_split_literal_constant(ctx, r600_src); 1744 if (r) 1745 return r; 1746 /* do it in 2 step as op3 doesn't support writemask */ 1747 for (i = 0; i < 4; i++) { 1748 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1749 alu.inst = ctx->inst_info->r600_opcode; 1750 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1751 alu.src[j] = r600_src[j]; 1752 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1753 } 1754 alu.dst.sel = ctx->temp_reg; 1755 alu.dst.chan = i; 1756 alu.dst.write = 1; 1757 alu.is_op3 = 1; 1758 if (i == 3) { 1759 alu.last = 1; 1760 } 1761 r = r600_bc_add_alu(ctx->bc, &alu); 1762 if (r) 1763 return r; 1764 } 1765 return tgsi_helper_copy(ctx, inst); 1766} 1767 1768static int tgsi_dp(struct r600_shader_ctx *ctx) 1769{ 1770 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1771 struct r600_bc_alu_src r600_src[3]; 1772 struct r600_bc_alu alu; 1773 int i, j, r; 1774 1775 r = tgsi_split_constant(ctx, r600_src); 1776 if (r) 1777 return r; 1778 r = tgsi_split_literal_constant(ctx, r600_src); 1779 if (r) 1780 return r; 1781 for (i = 0; i < 4; i++) { 1782 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1783 alu.inst = ctx->inst_info->r600_opcode; 1784 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1785 alu.src[j] = r600_src[j]; 1786 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1787 } 1788 alu.dst.sel = ctx->temp_reg; 1789 alu.dst.chan = i; 1790 alu.dst.write = 1; 1791 /* handle some special cases */ 1792 switch (ctx->inst_info->tgsi_opcode) { 1793 case TGSI_OPCODE_DP2: 1794 if (i > 1) { 1795 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1796 alu.src[0].chan = alu.src[1].chan = 0; 1797 } 1798 break; 1799 case TGSI_OPCODE_DP3: 1800 if (i > 2) { 1801 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1802 alu.src[0].chan = alu.src[1].chan = 0; 1803 } 1804 break; 1805 case TGSI_OPCODE_DPH: 1806 if (i == 3) { 1807 alu.src[0].sel = V_SQ_ALU_SRC_1; 1808 alu.src[0].chan = 0; 1809 alu.src[0].neg = 0; 1810 } 1811 break; 1812 default: 1813 break; 1814 } 1815 if (i == 3) { 1816 alu.last = 1; 1817 } 1818 r = r600_bc_add_alu(ctx->bc, &alu); 1819 if (r) 1820 return r; 1821 } 1822 return tgsi_helper_copy(ctx, inst); 1823} 1824 1825static int tgsi_tex(struct r600_shader_ctx *ctx) 1826{ 1827 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1828 struct r600_bc_tex tex; 1829 struct r600_bc_alu alu; 1830 unsigned src_gpr; 1831 int r, i; 1832 int opcode; 1833 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; 1834 uint32_t lit_vals[4]; 1835 1836 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1837 1838 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1839 /* Add perspective divide */ 1840 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1841 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1842 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1843 if (r) 1844 return r; 1845 1846 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1847 alu.dst.sel = ctx->temp_reg; 1848 alu.dst.chan = 3; 1849 alu.last = 1; 1850 alu.dst.write = 1; 1851 r = r600_bc_add_alu(ctx->bc, &alu); 1852 if (r) 1853 return r; 1854 1855 for (i = 0; i < 3; i++) { 1856 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1857 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1858 alu.src[0].sel = ctx->temp_reg; 1859 alu.src[0].chan = 3; 1860 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1861 if (r) 1862 return r; 1863 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1864 alu.dst.sel = ctx->temp_reg; 1865 alu.dst.chan = i; 1866 alu.dst.write = 1; 1867 r = r600_bc_add_alu(ctx->bc, &alu); 1868 if (r) 1869 return r; 1870 } 1871 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1872 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1873 alu.src[0].sel = V_SQ_ALU_SRC_1; 1874 alu.src[0].chan = 0; 1875 alu.dst.sel = ctx->temp_reg; 1876 alu.dst.chan = 3; 1877 alu.last = 1; 1878 alu.dst.write = 1; 1879 r = r600_bc_add_alu(ctx->bc, &alu); 1880 if (r) 1881 return r; 1882 src_not_temp = FALSE; 1883 src_gpr = ctx->temp_reg; 1884 } 1885 1886 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1887 int src_chan, src2_chan; 1888 1889 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1890 for (i = 0; i < 4; i++) { 1891 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1892 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1893 switch (i) { 1894 case 0: 1895 src_chan = 2; 1896 src2_chan = 1; 1897 break; 1898 case 1: 1899 src_chan = 2; 1900 src2_chan = 0; 1901 break; 1902 case 2: 1903 src_chan = 0; 1904 src2_chan = 2; 1905 break; 1906 case 3: 1907 src_chan = 1; 1908 src2_chan = 2; 1909 break; 1910 default: 1911 assert(0); 1912 src_chan = 0; 1913 src2_chan = 0; 1914 break; 1915 } 1916 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1917 if (r) 1918 return r; 1919 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); 1920 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1921 if (r) 1922 return r; 1923 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); 1924 alu.dst.sel = ctx->temp_reg; 1925 alu.dst.chan = i; 1926 if (i == 3) 1927 alu.last = 1; 1928 alu.dst.write = 1; 1929 r = r600_bc_add_alu(ctx->bc, &alu); 1930 if (r) 1931 return r; 1932 } 1933 1934 /* tmp1.z = RCP_e(|tmp1.z|) */ 1935 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1936 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1937 alu.src[0].sel = ctx->temp_reg; 1938 alu.src[0].chan = 2; 1939 alu.src[0].abs = 1; 1940 alu.dst.sel = ctx->temp_reg; 1941 alu.dst.chan = 2; 1942 alu.dst.write = 1; 1943 alu.last = 1; 1944 r = r600_bc_add_alu(ctx->bc, &alu); 1945 if (r) 1946 return r; 1947 1948 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1949 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1950 * muladd has no writemask, have to use another temp 1951 */ 1952 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1953 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1954 alu.is_op3 = 1; 1955 1956 alu.src[0].sel = ctx->temp_reg; 1957 alu.src[0].chan = 0; 1958 alu.src[1].sel = ctx->temp_reg; 1959 alu.src[1].chan = 2; 1960 1961 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1962 alu.src[2].chan = 0; 1963 1964 alu.dst.sel = ctx->temp_reg; 1965 alu.dst.chan = 0; 1966 alu.dst.write = 1; 1967 1968 r = r600_bc_add_alu(ctx->bc, &alu); 1969 if (r) 1970 return r; 1971 1972 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1973 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1974 alu.is_op3 = 1; 1975 1976 alu.src[0].sel = ctx->temp_reg; 1977 alu.src[0].chan = 1; 1978 alu.src[1].sel = ctx->temp_reg; 1979 alu.src[1].chan = 2; 1980 1981 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1982 alu.src[2].chan = 0; 1983 1984 alu.dst.sel = ctx->temp_reg; 1985 alu.dst.chan = 1; 1986 alu.dst.write = 1; 1987 1988 alu.last = 1; 1989 r = r600_bc_add_alu(ctx->bc, &alu); 1990 if (r) 1991 return r; 1992 1993 lit_vals[0] = fui(1.5f); 1994 1995 r = r600_bc_add_literal(ctx->bc, lit_vals); 1996 if (r) 1997 return r; 1998 src_not_temp = FALSE; 1999 src_gpr = ctx->temp_reg; 2000 } 2001 2002 if (src_not_temp) { 2003 for (i = 0; i < 4; i++) { 2004 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2005 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2006 alu.src[0].sel = src_gpr; 2007 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2008 alu.dst.sel = ctx->temp_reg; 2009 alu.dst.chan = i; 2010 if (i == 3) 2011 alu.last = 1; 2012 alu.dst.write = 1; 2013 r = r600_bc_add_alu(ctx->bc, &alu); 2014 if (r) 2015 return r; 2016 } 2017 src_gpr = ctx->temp_reg; 2018 } 2019 2020 opcode = ctx->inst_info->r600_opcode; 2021 if (opcode == SQ_TEX_INST_SAMPLE && 2022 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 2023 opcode = SQ_TEX_INST_SAMPLE_C; 2024 2025 memset(&tex, 0, sizeof(struct r600_bc_tex)); 2026 tex.inst = opcode; 2027 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 2028 tex.resource_id = tex.sampler_id; 2029 tex.src_gpr = src_gpr; 2030 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 2031 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 2032 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 2033 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 2034 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 2035 tex.src_sel_x = 0; 2036 tex.src_sel_y = 1; 2037 tex.src_sel_z = 2; 2038 tex.src_sel_w = 3; 2039 2040 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2041 tex.src_sel_x = 1; 2042 tex.src_sel_y = 0; 2043 tex.src_sel_z = 3; 2044 tex.src_sel_w = 1; 2045 } 2046 2047 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 2048 tex.coord_type_x = 1; 2049 tex.coord_type_y = 1; 2050 tex.coord_type_z = 1; 2051 tex.coord_type_w = 1; 2052 } 2053 2054 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 2055 tex.src_sel_w = 2; 2056 2057 r = r600_bc_add_tex(ctx->bc, &tex); 2058 if (r) 2059 return r; 2060 2061 /* add shadow ambient support - gallium doesn't do it yet */ 2062 return 0; 2063} 2064 2065static int tgsi_lrp(struct r600_shader_ctx *ctx) 2066{ 2067 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2068 struct r600_bc_alu_src r600_src[3]; 2069 struct r600_bc_alu alu; 2070 unsigned i; 2071 int r; 2072 2073 r = tgsi_split_constant(ctx, r600_src); 2074 if (r) 2075 return r; 2076 r = tgsi_split_literal_constant(ctx, r600_src); 2077 if (r) 2078 return r; 2079 /* 1 - src0 */ 2080 for (i = 0; i < 4; i++) { 2081 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2082 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2083 alu.src[0].sel = V_SQ_ALU_SRC_1; 2084 alu.src[0].chan = 0; 2085 alu.src[1] = r600_src[0]; 2086 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 2087 alu.src[1].neg = 1; 2088 alu.dst.sel = ctx->temp_reg; 2089 alu.dst.chan = i; 2090 if (i == 3) { 2091 alu.last = 1; 2092 } 2093 alu.dst.write = 1; 2094 r = r600_bc_add_alu(ctx->bc, &alu); 2095 if (r) 2096 return r; 2097 } 2098 r = r600_bc_add_literal(ctx->bc, ctx->value); 2099 if (r) 2100 return r; 2101 2102 /* (1 - src0) * src2 */ 2103 for (i = 0; i < 4; i++) { 2104 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2105 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2106 alu.src[0].sel = ctx->temp_reg; 2107 alu.src[0].chan = i; 2108 alu.src[1] = r600_src[2]; 2109 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2110 alu.dst.sel = ctx->temp_reg; 2111 alu.dst.chan = i; 2112 if (i == 3) { 2113 alu.last = 1; 2114 } 2115 alu.dst.write = 1; 2116 r = r600_bc_add_alu(ctx->bc, &alu); 2117 if (r) 2118 return r; 2119 } 2120 r = r600_bc_add_literal(ctx->bc, ctx->value); 2121 if (r) 2122 return r; 2123 2124 /* src0 * src1 + (1 - src0) * src2 */ 2125 for (i = 0; i < 4; i++) { 2126 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2127 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2128 alu.is_op3 = 1; 2129 alu.src[0] = r600_src[0]; 2130 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2131 alu.src[1] = r600_src[1]; 2132 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2133 alu.src[2].sel = ctx->temp_reg; 2134 alu.src[2].chan = i; 2135 alu.dst.sel = ctx->temp_reg; 2136 alu.dst.chan = i; 2137 if (i == 3) { 2138 alu.last = 1; 2139 } 2140 r = r600_bc_add_alu(ctx->bc, &alu); 2141 if (r) 2142 return r; 2143 } 2144 return tgsi_helper_copy(ctx, inst); 2145} 2146 2147static int tgsi_cmp(struct r600_shader_ctx *ctx) 2148{ 2149 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2150 struct r600_bc_alu_src r600_src[3]; 2151 struct r600_bc_alu alu; 2152 int use_temp = 0; 2153 int i, r; 2154 2155 r = tgsi_split_constant(ctx, r600_src); 2156 if (r) 2157 return r; 2158 r = tgsi_split_literal_constant(ctx, r600_src); 2159 if (r) 2160 return r; 2161 2162 if (inst->Dst[0].Register.WriteMask != 0xf) 2163 use_temp = 1; 2164 2165 for (i = 0; i < 4; i++) { 2166 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2167 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2168 alu.src[0] = r600_src[0]; 2169 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2170 2171 alu.src[1] = r600_src[2]; 2172 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2173 2174 alu.src[2] = r600_src[1]; 2175 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 2176 2177 if (use_temp) 2178 alu.dst.sel = ctx->temp_reg; 2179 else { 2180 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2181 if (r) 2182 return r; 2183 } 2184 alu.dst.chan = i; 2185 alu.dst.write = 1; 2186 alu.is_op3 = 1; 2187 if (i == 3) 2188 alu.last = 1; 2189 r = r600_bc_add_alu(ctx->bc, &alu); 2190 if (r) 2191 return r; 2192 } 2193 if (use_temp) 2194 return tgsi_helper_copy(ctx, inst); 2195 return 0; 2196} 2197 2198static int tgsi_xpd(struct r600_shader_ctx *ctx) 2199{ 2200 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2201 struct r600_bc_alu_src r600_src[3]; 2202 struct r600_bc_alu alu; 2203 uint32_t use_temp = 0; 2204 int i, r; 2205 2206 if (inst->Dst[0].Register.WriteMask != 0xf) 2207 use_temp = 1; 2208 2209 r = tgsi_split_constant(ctx, r600_src); 2210 if (r) 2211 return r; 2212 r = tgsi_split_literal_constant(ctx, r600_src); 2213 if (r) 2214 return r; 2215 2216 for (i = 0; i < 4; i++) { 2217 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2218 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2219 2220 alu.src[0] = r600_src[0]; 2221 switch (i) { 2222 case 0: 2223 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2224 break; 2225 case 1: 2226 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2227 break; 2228 case 2: 2229 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2230 break; 2231 case 3: 2232 alu.src[0].sel = V_SQ_ALU_SRC_0; 2233 alu.src[0].chan = i; 2234 } 2235 2236 alu.src[1] = r600_src[1]; 2237 switch (i) { 2238 case 0: 2239 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2240 break; 2241 case 1: 2242 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2243 break; 2244 case 2: 2245 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2246 break; 2247 case 3: 2248 alu.src[1].sel = V_SQ_ALU_SRC_0; 2249 alu.src[1].chan = i; 2250 } 2251 2252 alu.dst.sel = ctx->temp_reg; 2253 alu.dst.chan = i; 2254 alu.dst.write = 1; 2255 2256 if (i == 3) 2257 alu.last = 1; 2258 r = r600_bc_add_alu(ctx->bc, &alu); 2259 if (r) 2260 return r; 2261 2262 r = r600_bc_add_literal(ctx->bc, ctx->value); 2263 if (r) 2264 return r; 2265 } 2266 2267 for (i = 0; i < 4; i++) { 2268 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2269 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2270 2271 alu.src[0] = r600_src[0]; 2272 switch (i) { 2273 case 0: 2274 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2275 break; 2276 case 1: 2277 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2278 break; 2279 case 2: 2280 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2281 break; 2282 case 3: 2283 alu.src[0].sel = V_SQ_ALU_SRC_0; 2284 alu.src[0].chan = i; 2285 } 2286 2287 alu.src[1] = r600_src[1]; 2288 switch (i) { 2289 case 0: 2290 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2291 break; 2292 case 1: 2293 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2294 break; 2295 case 2: 2296 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2297 break; 2298 case 3: 2299 alu.src[1].sel = V_SQ_ALU_SRC_0; 2300 alu.src[1].chan = i; 2301 } 2302 2303 alu.src[2].sel = ctx->temp_reg; 2304 alu.src[2].neg = 1; 2305 alu.src[2].chan = i; 2306 2307 if (use_temp) 2308 alu.dst.sel = ctx->temp_reg; 2309 else { 2310 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2311 if (r) 2312 return r; 2313 } 2314 alu.dst.chan = i; 2315 alu.dst.write = 1; 2316 alu.is_op3 = 1; 2317 if (i == 3) 2318 alu.last = 1; 2319 r = r600_bc_add_alu(ctx->bc, &alu); 2320 if (r) 2321 return r; 2322 2323 r = r600_bc_add_literal(ctx->bc, ctx->value); 2324 if (r) 2325 return r; 2326 } 2327 if (use_temp) 2328 return tgsi_helper_copy(ctx, inst); 2329 return 0; 2330} 2331 2332static int tgsi_exp(struct r600_shader_ctx *ctx) 2333{ 2334 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2335 struct r600_bc_alu_src r600_src[3] = { { 0 } }; 2336 struct r600_bc_alu alu; 2337 int r; 2338 2339 /* result.x = 2^floor(src); */ 2340 if (inst->Dst[0].Register.WriteMask & 1) { 2341 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2342 2343 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2344 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2345 if (r) 2346 return r; 2347 2348 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2349 2350 alu.dst.sel = ctx->temp_reg; 2351 alu.dst.chan = 0; 2352 alu.dst.write = 1; 2353 alu.last = 1; 2354 r = r600_bc_add_alu(ctx->bc, &alu); 2355 if (r) 2356 return r; 2357 2358 r = r600_bc_add_literal(ctx->bc, ctx->value); 2359 if (r) 2360 return r; 2361 2362 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2363 alu.src[0].sel = ctx->temp_reg; 2364 alu.src[0].chan = 0; 2365 2366 alu.dst.sel = ctx->temp_reg; 2367 alu.dst.chan = 0; 2368 alu.dst.write = 1; 2369 alu.last = 1; 2370 r = r600_bc_add_alu(ctx->bc, &alu); 2371 if (r) 2372 return r; 2373 2374 r = r600_bc_add_literal(ctx->bc, ctx->value); 2375 if (r) 2376 return r; 2377 } 2378 2379 /* result.y = tmp - floor(tmp); */ 2380 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2381 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2382 2383 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2384 alu.src[0] = r600_src[0]; 2385 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2386 if (r) 2387 return r; 2388 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2389 2390 alu.dst.sel = ctx->temp_reg; 2391// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2392// if (r) 2393// return r; 2394 alu.dst.write = 1; 2395 alu.dst.chan = 1; 2396 2397 alu.last = 1; 2398 2399 r = r600_bc_add_alu(ctx->bc, &alu); 2400 if (r) 2401 return r; 2402 r = r600_bc_add_literal(ctx->bc, ctx->value); 2403 if (r) 2404 return r; 2405 } 2406 2407 /* result.z = RoughApprox2ToX(tmp);*/ 2408 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2409 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2410 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2411 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2412 if (r) 2413 return r; 2414 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2415 2416 alu.dst.sel = ctx->temp_reg; 2417 alu.dst.write = 1; 2418 alu.dst.chan = 2; 2419 2420 alu.last = 1; 2421 2422 r = r600_bc_add_alu(ctx->bc, &alu); 2423 if (r) 2424 return r; 2425 r = r600_bc_add_literal(ctx->bc, ctx->value); 2426 if (r) 2427 return r; 2428 } 2429 2430 /* result.w = 1.0;*/ 2431 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2432 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2433 2434 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2435 alu.src[0].sel = V_SQ_ALU_SRC_1; 2436 alu.src[0].chan = 0; 2437 2438 alu.dst.sel = ctx->temp_reg; 2439 alu.dst.chan = 3; 2440 alu.dst.write = 1; 2441 alu.last = 1; 2442 r = r600_bc_add_alu(ctx->bc, &alu); 2443 if (r) 2444 return r; 2445 r = r600_bc_add_literal(ctx->bc, ctx->value); 2446 if (r) 2447 return r; 2448 } 2449 return tgsi_helper_copy(ctx, inst); 2450} 2451 2452static int tgsi_log(struct r600_shader_ctx *ctx) 2453{ 2454 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2455 struct r600_bc_alu alu; 2456 int r; 2457 2458 /* result.x = floor(log2(src)); */ 2459 if (inst->Dst[0].Register.WriteMask & 1) { 2460 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2461 2462 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2463 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2464 if (r) 2465 return r; 2466 2467 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2468 2469 alu.dst.sel = ctx->temp_reg; 2470 alu.dst.chan = 0; 2471 alu.dst.write = 1; 2472 alu.last = 1; 2473 r = r600_bc_add_alu(ctx->bc, &alu); 2474 if (r) 2475 return r; 2476 2477 r = r600_bc_add_literal(ctx->bc, ctx->value); 2478 if (r) 2479 return r; 2480 2481 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2482 alu.src[0].sel = ctx->temp_reg; 2483 alu.src[0].chan = 0; 2484 2485 alu.dst.sel = ctx->temp_reg; 2486 alu.dst.chan = 0; 2487 alu.dst.write = 1; 2488 alu.last = 1; 2489 2490 r = r600_bc_add_alu(ctx->bc, &alu); 2491 if (r) 2492 return r; 2493 2494 r = r600_bc_add_literal(ctx->bc, ctx->value); 2495 if (r) 2496 return r; 2497 } 2498 2499 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2500 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2501 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2502 2503 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2504 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2505 if (r) 2506 return r; 2507 2508 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2509 2510 alu.dst.sel = ctx->temp_reg; 2511 alu.dst.chan = 1; 2512 alu.dst.write = 1; 2513 alu.last = 1; 2514 2515 r = r600_bc_add_alu(ctx->bc, &alu); 2516 if (r) 2517 return r; 2518 2519 r = r600_bc_add_literal(ctx->bc, ctx->value); 2520 if (r) 2521 return r; 2522 2523 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2524 2525 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2526 alu.src[0].sel = ctx->temp_reg; 2527 alu.src[0].chan = 1; 2528 2529 alu.dst.sel = ctx->temp_reg; 2530 alu.dst.chan = 1; 2531 alu.dst.write = 1; 2532 alu.last = 1; 2533 2534 r = r600_bc_add_alu(ctx->bc, &alu); 2535 if (r) 2536 return r; 2537 2538 r = r600_bc_add_literal(ctx->bc, ctx->value); 2539 if (r) 2540 return r; 2541 2542 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2543 2544 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2545 alu.src[0].sel = ctx->temp_reg; 2546 alu.src[0].chan = 1; 2547 2548 alu.dst.sel = ctx->temp_reg; 2549 alu.dst.chan = 1; 2550 alu.dst.write = 1; 2551 alu.last = 1; 2552 2553 r = r600_bc_add_alu(ctx->bc, &alu); 2554 if (r) 2555 return r; 2556 2557 r = r600_bc_add_literal(ctx->bc, ctx->value); 2558 if (r) 2559 return r; 2560 2561 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2562 2563 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2564 alu.src[0].sel = ctx->temp_reg; 2565 alu.src[0].chan = 1; 2566 2567 alu.dst.sel = ctx->temp_reg; 2568 alu.dst.chan = 1; 2569 alu.dst.write = 1; 2570 alu.last = 1; 2571 2572 r = r600_bc_add_alu(ctx->bc, &alu); 2573 if (r) 2574 return r; 2575 2576 r = r600_bc_add_literal(ctx->bc, ctx->value); 2577 if (r) 2578 return r; 2579 2580 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2581 2582 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2583 2584 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2585 if (r) 2586 return r; 2587 2588 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2589 2590 alu.src[1].sel = ctx->temp_reg; 2591 alu.src[1].chan = 1; 2592 2593 alu.dst.sel = ctx->temp_reg; 2594 alu.dst.chan = 1; 2595 alu.dst.write = 1; 2596 alu.last = 1; 2597 2598 r = r600_bc_add_alu(ctx->bc, &alu); 2599 if (r) 2600 return r; 2601 2602 r = r600_bc_add_literal(ctx->bc, ctx->value); 2603 if (r) 2604 return r; 2605 } 2606 2607 /* result.z = log2(src);*/ 2608 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2609 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2610 2611 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2612 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2613 if (r) 2614 return r; 2615 2616 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2617 2618 alu.dst.sel = ctx->temp_reg; 2619 alu.dst.write = 1; 2620 alu.dst.chan = 2; 2621 alu.last = 1; 2622 2623 r = r600_bc_add_alu(ctx->bc, &alu); 2624 if (r) 2625 return r; 2626 2627 r = r600_bc_add_literal(ctx->bc, ctx->value); 2628 if (r) 2629 return r; 2630 } 2631 2632 /* result.w = 1.0; */ 2633 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2634 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2635 2636 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2637 alu.src[0].sel = V_SQ_ALU_SRC_1; 2638 alu.src[0].chan = 0; 2639 2640 alu.dst.sel = ctx->temp_reg; 2641 alu.dst.chan = 3; 2642 alu.dst.write = 1; 2643 alu.last = 1; 2644 2645 r = r600_bc_add_alu(ctx->bc, &alu); 2646 if (r) 2647 return r; 2648 2649 r = r600_bc_add_literal(ctx->bc, ctx->value); 2650 if (r) 2651 return r; 2652 } 2653 2654 return tgsi_helper_copy(ctx, inst); 2655} 2656 2657static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2658{ 2659 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2660 struct r600_bc_alu alu; 2661 int r; 2662 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2663 2664 switch (inst->Instruction.Opcode) { 2665 case TGSI_OPCODE_ARL: 2666 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2667 break; 2668 case TGSI_OPCODE_ARR: 2669 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2670 break; 2671 default: 2672 assert(0); 2673 return -1; 2674 } 2675 2676 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2677 if (r) 2678 return r; 2679 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2680 alu.last = 1; 2681 alu.dst.chan = 0; 2682 alu.dst.sel = ctx->temp_reg; 2683 alu.dst.write = 1; 2684 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2685 if (r) 2686 return r; 2687 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2688 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2689 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2690 if (r) 2691 return r; 2692 alu.src[0].sel = ctx->temp_reg; 2693 alu.src[0].chan = 0; 2694 alu.last = 1; 2695 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2696 if (r) 2697 return r; 2698 return 0; 2699} 2700static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2701{ 2702 /* TODO from r600c, ar values don't persist between clauses */ 2703 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2704 struct r600_bc_alu alu; 2705 int r; 2706 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2707 2708 switch (inst->Instruction.Opcode) { 2709 case TGSI_OPCODE_ARL: 2710 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; 2711 break; 2712 case TGSI_OPCODE_ARR: 2713 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; 2714 break; 2715 default: 2716 assert(0); 2717 return -1; 2718 } 2719 2720 2721 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2722 if (r) 2723 return r; 2724 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2725 2726 alu.last = 1; 2727 2728 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2729 if (r) 2730 return r; 2731 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2732 return 0; 2733} 2734 2735static int tgsi_opdst(struct r600_shader_ctx *ctx) 2736{ 2737 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2738 struct r600_bc_alu alu; 2739 int i, r = 0; 2740 2741 for (i = 0; i < 4; i++) { 2742 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2743 2744 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2745 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2746 if (r) 2747 return r; 2748 2749 if (i == 0 || i == 3) { 2750 alu.src[0].sel = V_SQ_ALU_SRC_1; 2751 } else { 2752 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2753 if (r) 2754 return r; 2755 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2756 } 2757 2758 if (i == 0 || i == 2) { 2759 alu.src[1].sel = V_SQ_ALU_SRC_1; 2760 } else { 2761 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); 2762 if (r) 2763 return r; 2764 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2765 } 2766 if (i == 3) 2767 alu.last = 1; 2768 r = r600_bc_add_alu(ctx->bc, &alu); 2769 if (r) 2770 return r; 2771 } 2772 return 0; 2773} 2774 2775static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2776{ 2777 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2778 struct r600_bc_alu alu; 2779 int r; 2780 2781 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2782 alu.inst = opcode; 2783 alu.predicate = 1; 2784 2785 alu.dst.sel = ctx->temp_reg; 2786 alu.dst.write = 1; 2787 alu.dst.chan = 0; 2788 2789 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2790 if (r) 2791 return r; 2792 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2793 alu.src[1].sel = V_SQ_ALU_SRC_0; 2794 alu.src[1].chan = 0; 2795 2796 alu.last = 1; 2797 2798 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2799 if (r) 2800 return r; 2801 return 0; 2802} 2803 2804static int pops(struct r600_shader_ctx *ctx, int pops) 2805{ 2806 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2807 ctx->bc->cf_last->pop_count = pops; 2808 return 0; 2809} 2810 2811static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2812{ 2813 switch(reason) { 2814 case FC_PUSH_VPM: 2815 ctx->bc->callstack[ctx->bc->call_sp].current--; 2816 break; 2817 case FC_PUSH_WQM: 2818 case FC_LOOP: 2819 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2820 break; 2821 case FC_REP: 2822 /* TOODO : for 16 vp asic should -= 2; */ 2823 ctx->bc->callstack[ctx->bc->call_sp].current --; 2824 break; 2825 } 2826} 2827 2828static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2829{ 2830 if (check_max_only) { 2831 int diff; 2832 switch (reason) { 2833 case FC_PUSH_VPM: 2834 diff = 1; 2835 break; 2836 case FC_PUSH_WQM: 2837 diff = 4; 2838 break; 2839 default: 2840 assert(0); 2841 diff = 0; 2842 } 2843 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2844 ctx->bc->callstack[ctx->bc->call_sp].max) { 2845 ctx->bc->callstack[ctx->bc->call_sp].max = 2846 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2847 } 2848 return; 2849 } 2850 switch (reason) { 2851 case FC_PUSH_VPM: 2852 ctx->bc->callstack[ctx->bc->call_sp].current++; 2853 break; 2854 case FC_PUSH_WQM: 2855 case FC_LOOP: 2856 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2857 break; 2858 case FC_REP: 2859 ctx->bc->callstack[ctx->bc->call_sp].current++; 2860 break; 2861 } 2862 2863 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2864 ctx->bc->callstack[ctx->bc->call_sp].max) { 2865 ctx->bc->callstack[ctx->bc->call_sp].max = 2866 ctx->bc->callstack[ctx->bc->call_sp].current; 2867 } 2868} 2869 2870static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2871{ 2872 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2873 2874 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2875 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2876 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2877 sp->num_mid++; 2878} 2879 2880static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2881{ 2882 ctx->bc->fc_sp++; 2883 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2884 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2885} 2886 2887static void fc_poplevel(struct r600_shader_ctx *ctx) 2888{ 2889 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2890 if (sp->mid) { 2891 free(sp->mid); 2892 sp->mid = NULL; 2893 } 2894 sp->num_mid = 0; 2895 sp->start = NULL; 2896 sp->type = 0; 2897 ctx->bc->fc_sp--; 2898} 2899 2900#if 0 2901static int emit_return(struct r600_shader_ctx *ctx) 2902{ 2903 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2904 return 0; 2905} 2906 2907static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2908{ 2909 2910 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2911 ctx->bc->cf_last->pop_count = pops; 2912 /* TODO work out offset */ 2913 return 0; 2914} 2915 2916static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2917{ 2918 return 0; 2919} 2920 2921static void emit_testflag(struct r600_shader_ctx *ctx) 2922{ 2923 2924} 2925 2926static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2927{ 2928 emit_testflag(ctx); 2929 emit_jump_to_offset(ctx, 1, 4); 2930 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2931 pops(ctx, ifidx + 1); 2932 emit_return(ctx); 2933} 2934 2935static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2936{ 2937 emit_testflag(ctx); 2938 2939 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2940 ctx->bc->cf_last->pop_count = 1; 2941 2942 fc_set_mid(ctx, fc_sp); 2943 2944 pops(ctx, 1); 2945} 2946#endif 2947 2948static int tgsi_if(struct r600_shader_ctx *ctx) 2949{ 2950 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2951 2952 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2953 2954 fc_pushlevel(ctx, FC_IF); 2955 2956 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2957 return 0; 2958} 2959 2960static int tgsi_else(struct r600_shader_ctx *ctx) 2961{ 2962 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2963 ctx->bc->cf_last->pop_count = 1; 2964 2965 fc_set_mid(ctx, ctx->bc->fc_sp); 2966 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2967 return 0; 2968} 2969 2970static int tgsi_endif(struct r600_shader_ctx *ctx) 2971{ 2972 pops(ctx, 1); 2973 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2974 R600_ERR("if/endif unbalanced in shader\n"); 2975 return -1; 2976 } 2977 2978 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2979 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2980 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2981 } else { 2982 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2983 } 2984 fc_poplevel(ctx); 2985 2986 callstack_decrease_current(ctx, FC_PUSH_VPM); 2987 return 0; 2988} 2989 2990static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2991{ 2992 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2993 2994 fc_pushlevel(ctx, FC_LOOP); 2995 2996 /* check stack depth */ 2997 callstack_check_depth(ctx, FC_LOOP, 0); 2998 return 0; 2999} 3000 3001static int tgsi_endloop(struct r600_shader_ctx *ctx) 3002{ 3003 int i; 3004 3005 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 3006 3007 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 3008 R600_ERR("loop/endloop in shader code are not paired.\n"); 3009 return -EINVAL; 3010 } 3011 3012 /* fixup loop pointers - from r600isa 3013 LOOP END points to CF after LOOP START, 3014 LOOP START point to CF after LOOP END 3015 BRK/CONT point to LOOP END CF 3016 */ 3017 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 3018 3019 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3020 3021 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 3022 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 3023 } 3024 /* TODO add LOOPRET support */ 3025 fc_poplevel(ctx); 3026 callstack_decrease_current(ctx, FC_LOOP); 3027 return 0; 3028} 3029 3030static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 3031{ 3032 unsigned int fscp; 3033 3034 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 3035 { 3036 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 3037 break; 3038 } 3039 3040 if (fscp == 0) { 3041 R600_ERR("Break not inside loop/endloop pair\n"); 3042 return -EINVAL; 3043 } 3044 3045 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3046 ctx->bc->cf_last->pop_count = 1; 3047 3048 fc_set_mid(ctx, fscp); 3049 3050 pops(ctx, 1); 3051 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 3052 return 0; 3053} 3054 3055static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 3056 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3057 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3058 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3059 3060 /* FIXME: 3061 * For state trackers other than OpenGL, we'll want to use 3062 * _RECIP_IEEE instead. 3063 */ 3064 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 3065 3066 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 3067 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3068 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3069 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3070 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3071 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3072 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3073 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3074 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3075 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3076 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3077 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3078 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3079 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3080 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3081 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3082 /* gap */ 3083 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3084 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3085 /* gap */ 3086 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3087 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3088 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3089 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3090 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3091 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3092 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3093 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3094 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3095 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3096 /* gap */ 3097 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3098 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3099 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3100 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3101 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3102 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3103 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3104 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3105 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3106 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3107 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3108 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3109 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3110 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3111 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3112 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3113 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3114 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3115 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3116 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3117 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3118 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3119 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3120 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3121 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3122 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3123 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3124 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3125 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3126 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3127 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3128 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3129 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3130 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3131 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3132 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3133 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3134 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3135 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3136 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3137 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3138 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3139 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3140 /* gap */ 3141 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3142 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3143 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3144 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3145 /* gap */ 3146 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3147 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3148 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3149 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3150 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3151 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3152 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3153 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3154 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3155 /* gap */ 3156 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3157 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3158 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3159 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3160 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3161 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3162 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3163 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3164 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3165 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3166 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3167 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3168 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3169 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3170 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3171 /* gap */ 3172 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3173 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3174 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3175 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3176 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3177 /* gap */ 3178 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3179 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3180 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3181 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3182 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3183 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3184 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3185 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3186 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3187 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3188 /* gap */ 3189 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3190 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3191 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3192 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3193 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3194 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3195 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3196 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3197 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3198 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3199 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3200 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3201 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3202 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3203 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3204 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3205 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3206 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3207 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3208 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3209 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3210 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3211 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3212 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3213 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3214 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3215 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3216 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3217}; 3218 3219static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 3220 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3221 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3222 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3223 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3224 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 3225 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3226 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3227 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3228 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3229 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3230 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3231 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3232 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3233 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3234 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3235 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3236 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3237 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3238 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3239 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3240 /* gap */ 3241 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3242 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3243 /* gap */ 3244 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3245 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3246 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3247 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3248 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3249 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3250 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3251 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3252 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3253 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3254 /* gap */ 3255 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3256 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3257 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3258 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3259 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3260 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3261 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3262 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3263 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3264 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3265 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3266 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3267 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3268 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3269 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3270 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3271 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3272 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3273 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3274 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3275 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3276 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3277 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3278 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3279 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3280 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3281 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3282 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3283 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3284 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3285 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3286 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3287 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3288 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3289 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3290 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3291 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3292 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3293 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3294 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3295 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3296 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3297 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3298 /* gap */ 3299 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3300 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3301 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3302 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3303 /* gap */ 3304 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3305 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3306 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3307 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3308 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3309 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3310 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3311 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3312 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3313 /* gap */ 3314 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3315 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3316 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3317 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3318 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3319 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3320 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3321 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3322 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3323 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3324 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3325 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3326 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3327 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3328 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3329 /* gap */ 3330 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3331 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3332 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3333 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3334 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3335 /* gap */ 3336 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3337 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3338 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3339 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3340 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3341 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3342 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3343 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3344 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3345 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3346 /* gap */ 3347 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3348 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3349 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3350 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3351 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3352 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3353 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3354 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3355 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3356 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3357 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3358 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3359 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3360 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3361 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3362 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3363 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3364 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3365 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3366 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3367 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3368 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3369 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3370 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3371 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3372 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3373 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3374 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3375}; 3376