r600_shader.c revision 07e0424a172970a6ea06e09fe92c1681d8f0f260
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_pipe.h" 29#include "r600_asm.h" 30#include "r600_sq.h" 31#include "r600_opcodes.h" 32#include "r600d.h" 33#include <stdio.h> 34#include <errno.h> 35 36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) 37{ 38 struct r600_pipe_state *rstate = &shader->rstate; 39 struct r600_shader *rshader = &shader->shader; 40 unsigned spi_vs_out_id[10]; 41 unsigned i, tmp; 42 43 /* clear previous register */ 44 rstate->nregs = 0; 45 46 /* so far never got proper semantic id from tgsi */ 47 for (i = 0; i < 10; i++) { 48 spi_vs_out_id[i] = 0; 49 } 50 for (i = 0; i < 32; i++) { 51 tmp = i << ((i & 3) * 8); 52 spi_vs_out_id[i / 4] |= tmp; 53 } 54 for (i = 0; i < 10; i++) { 55 r600_pipe_state_add_reg(rstate, 56 R_028614_SPI_VS_OUT_ID_0 + i * 4, 57 spi_vs_out_id[i], 0xFFFFFFFF, NULL); 58 } 59 60 r600_pipe_state_add_reg(rstate, 61 R_0286C4_SPI_VS_OUT_CONFIG, 62 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), 63 0xFFFFFFFF, NULL); 64 r600_pipe_state_add_reg(rstate, 65 R_028868_SQ_PGM_RESOURCES_VS, 66 S_028868_NUM_GPRS(rshader->bc.ngpr) | 67 S_028868_STACK_SIZE(rshader->bc.nstack), 68 0xFFFFFFFF, NULL); 69 r600_pipe_state_add_reg(rstate, 70 R_0288A4_SQ_PGM_RESOURCES_FS, 71 0x00000000, 0xFFFFFFFF, NULL); 72 r600_pipe_state_add_reg(rstate, 73 R_0288D0_SQ_PGM_CF_OFFSET_VS, 74 0x00000000, 0xFFFFFFFF, NULL); 75 r600_pipe_state_add_reg(rstate, 76 R_0288DC_SQ_PGM_CF_OFFSET_FS, 77 0x00000000, 0xFFFFFFFF, NULL); 78 r600_pipe_state_add_reg(rstate, 79 R_028858_SQ_PGM_START_VS, 80 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 81 r600_pipe_state_add_reg(rstate, 82 R_028894_SQ_PGM_START_FS, 83 r600_bo_offset(shader->bo_fetch) >> 8, 0xFFFFFFFF, shader->bo_fetch); 84 85 r600_pipe_state_add_reg(rstate, 86 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, 87 0xFFFFFFFF, NULL); 88 89} 90 91int r600_find_vs_semantic_index(struct r600_shader *vs, 92 struct r600_shader *ps, int id) 93{ 94 struct r600_shader_io *input = &ps->input[id]; 95 96 for (int i = 0; i < vs->noutput; i++) { 97 if (input->name == vs->output[i].name && 98 input->sid == vs->output[i].sid) { 99 return i - 1; 100 } 101 } 102 return 0; 103} 104 105static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) 106{ 107 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 108 struct r600_pipe_state *rstate = &shader->rstate; 109 struct r600_shader *rshader = &shader->shader; 110 unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1; 111 int pos_index = -1, face_index = -1; 112 113 /* clear previous register */ 114 rstate->nregs = 0; 115 116 for (i = 0; i < rshader->ninput; i++) { 117 tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); 118 if (rshader->input[i].centroid) 119 tmp |= S_028644_SEL_CENTROID(1); 120 if (rshader->input[i].interpolate == TGSI_INTERPOLATE_LINEAR) 121 tmp |= S_028644_SEL_LINEAR(1); 122 123 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 124 pos_index = i; 125 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || 126 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || 127 rshader->input[i].name == TGSI_SEMANTIC_POSITION) { 128 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); 129 } 130 if (rshader->input[i].name == TGSI_SEMANTIC_FACE) 131 face_index = i; 132 if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && 133 rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { 134 tmp |= S_028644_PT_SPRITE_TEX(1); 135 } 136 r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); 137 } 138 for (i = 0; i < rshader->noutput; i++) { 139 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 140 r600_pipe_state_add_reg(rstate, 141 R_02880C_DB_SHADER_CONTROL, 142 S_02880C_Z_EXPORT_ENABLE(1), 143 S_02880C_Z_EXPORT_ENABLE(1), NULL); 144 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 145 r600_pipe_state_add_reg(rstate, 146 R_02880C_DB_SHADER_CONTROL, 147 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), 148 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); 149 } 150 151 exports_ps = 0; 152 num_cout = 0; 153 for (i = 0; i < rshader->noutput; i++) { 154 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 155 exports_ps |= 1; 156 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { 157 num_cout++; 158 } 159 } 160 exports_ps |= S_028854_EXPORT_COLORS(num_cout); 161 if (!exports_ps) { 162 /* always at least export 1 component per pixel */ 163 exports_ps = 2; 164 } 165 166 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | 167 S_0286CC_PERSP_GRADIENT_ENA(1); 168 spi_input_z = 0; 169 if (pos_index != -1) { 170 spi_ps_in_control_0 |= (S_0286CC_POSITION_ENA(1) | 171 S_0286CC_POSITION_CENTROID(rshader->input[pos_index].centroid) | 172 S_0286CC_POSITION_ADDR(rshader->input[pos_index].gpr) | 173 S_0286CC_BARYC_SAMPLE_CNTL(1)); 174 spi_input_z |= 1; 175 } 176 177 spi_ps_in_control_1 = 0; 178 if (face_index != -1) { 179 spi_ps_in_control_1 |= S_0286D0_FRONT_FACE_ENA(1) | 180 S_0286D0_FRONT_FACE_ADDR(rshader->input[face_index].gpr); 181 } 182 183 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); 184 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, spi_ps_in_control_1, 0xFFFFFFFF, NULL); 185 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); 186 r600_pipe_state_add_reg(rstate, 187 R_028840_SQ_PGM_START_PS, 188 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 189 r600_pipe_state_add_reg(rstate, 190 R_028850_SQ_PGM_RESOURCES_PS, 191 S_028868_NUM_GPRS(rshader->bc.ngpr) | 192 S_028868_STACK_SIZE(rshader->bc.nstack), 193 0xFFFFFFFF, NULL); 194 r600_pipe_state_add_reg(rstate, 195 R_028854_SQ_PGM_EXPORTS_PS, 196 exports_ps, 0xFFFFFFFF, NULL); 197 r600_pipe_state_add_reg(rstate, 198 R_0288CC_SQ_PGM_CF_OFFSET_PS, 199 0x00000000, 0xFFFFFFFF, NULL); 200 201 if (rshader->uses_kill) { 202 /* only set some bits here, the other bits are set in the dsa state */ 203 r600_pipe_state_add_reg(rstate, 204 R_02880C_DB_SHADER_CONTROL, 205 S_02880C_KILL_ENABLE(1), 206 S_02880C_KILL_ENABLE(1), NULL); 207 } 208 r600_pipe_state_add_reg(rstate, 209 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, 210 0xFFFFFFFF, NULL); 211} 212 213static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 214{ 215 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 216 struct r600_shader *rshader = &shader->shader; 217 void *ptr; 218 219 /* copy new shader */ 220 if (rshader->processor_type == TGSI_PROCESSOR_VERTEX && shader->bo_fetch == NULL) { 221 shader->bo_fetch = r600_bo(rctx->radeon, rshader->bc_fetch.ndw * 4, 4096, 0, 0); 222 if (shader->bo_fetch == NULL) { 223 return -ENOMEM; 224 } 225 ptr = r600_bo_map(rctx->radeon, shader->bo_fetch, 0, NULL); 226 memcpy(ptr, rshader->bc_fetch.bytecode, rshader->bc_fetch.ndw * 4); 227 r600_bo_unmap(rctx->radeon, shader->bo_fetch); 228 } 229 if (shader->bo == NULL) { 230 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0, 0); 231 if (shader->bo == NULL) { 232 return -ENOMEM; 233 } 234 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 235 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); 236 r600_bo_unmap(rctx->radeon, shader->bo); 237 } 238 /* build state */ 239 rshader->flat_shade = rctx->flatshade; 240 switch (rshader->processor_type) { 241 case TGSI_PROCESSOR_VERTEX: 242 if (rshader->family >= CHIP_CEDAR) { 243 evergreen_pipe_shader_vs(ctx, shader); 244 } else { 245 r600_pipe_shader_vs(ctx, shader); 246 } 247 break; 248 case TGSI_PROCESSOR_FRAGMENT: 249 if (rshader->family >= CHIP_CEDAR) { 250 evergreen_pipe_shader_ps(ctx, shader); 251 } else { 252 r600_pipe_shader_ps(ctx, shader); 253 } 254 break; 255 default: 256 return -EINVAL; 257 } 258 r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); 259 return 0; 260} 261 262static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) 263{ 264 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 265 struct r600_shader *shader = &rshader->shader; 266 const struct util_format_description *desc; 267 enum pipe_format resource_format[160]; 268 unsigned i, nresources = 0; 269 struct r600_bc *bc = &shader->bc_fetch; 270 struct r600_bc_cf *cf; 271 struct r600_bc_vtx *vtx; 272 273 if (shader->processor_type != TGSI_PROCESSOR_VERTEX) 274 return 0; 275 /* doing a full memcmp fell over the refcount */ 276 if ((rshader->vertex_elements.count == rctx->vertex_elements->count) && 277 (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 278 rctx->vertex_elements->count * sizeof(struct pipe_vertex_element)))) { 279 return 0; 280 } 281 rshader->vertex_elements = *rctx->vertex_elements; 282 for (i = 0; i < rctx->vertex_elements->count; i++) { 283 resource_format[nresources++] = rctx->vertex_elements->hw_format[i]; 284 } 285 r600_bo_reference(rctx->radeon, &rshader->bo_fetch, NULL); 286 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 287 switch (cf->inst) { 288 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 289 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 290 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 291 desc = util_format_description(resource_format[vtx->buffer_id]); 292 if (desc == NULL) { 293 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); 294 return -EINVAL; 295 } 296 vtx->dst_sel_x = desc->swizzle[0]; 297 vtx->dst_sel_y = desc->swizzle[1]; 298 vtx->dst_sel_z = desc->swizzle[2]; 299 vtx->dst_sel_w = desc->swizzle[3]; 300 } 301 break; 302 default: 303 break; 304 } 305 } 306 return r600_bc_build(&shader->bc_fetch); 307} 308 309int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader) 310{ 311 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 312 int r; 313 314 if (shader == NULL) 315 return -EINVAL; 316 /* there should be enough input */ 317 if (rctx->vertex_elements->count < shader->shader.bc.nresource) { 318 R600_ERR("%d resources provided, expecting %d\n", 319 rctx->vertex_elements->count, shader->shader.bc.nresource); 320 return -EINVAL; 321 } 322 r = r600_shader_update(ctx, shader); 323 if (r) 324 return r; 325 return r600_pipe_shader(ctx, shader); 326} 327 328int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 329int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 330{ 331 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 332 int r; 333 334//fprintf(stderr, "--------------------------------------------------------------\n"); 335//tgsi_dump(tokens, 0); 336 shader->shader.family = r600_get_family(rctx->radeon); 337 r = r600_shader_from_tgsi(tokens, &shader->shader); 338 if (r) { 339 R600_ERR("translation from TGSI failed !\n"); 340 return r; 341 } 342 r = r600_bc_build(&shader->shader.bc); 343 if (r) { 344 R600_ERR("building bytecode failed !\n"); 345 return r; 346 } 347 if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) { 348 r = r600_bc_build(&shader->shader.bc_fetch); 349 if (r) { 350 R600_ERR("building bytecode failed !\n"); 351 return r; 352 } 353 } 354//fprintf(stderr, "______________________________________________________________\n"); 355 return 0; 356} 357 358void 359r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 360{ 361 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 362 363 r600_bo_reference(rctx->radeon, &shader->bo, NULL); 364 365 r600_bc_clear(&shader->shader.bc); 366 367 /* FIXME: is there more stuff to free? */ 368} 369 370/* 371 * tgsi -> r600 shader 372 */ 373struct r600_shader_tgsi_instruction; 374 375struct r600_shader_ctx { 376 struct tgsi_shader_info info; 377 struct tgsi_parse_context parse; 378 const struct tgsi_token *tokens; 379 unsigned type; 380 unsigned file_offset[TGSI_FILE_COUNT]; 381 unsigned temp_reg; 382 struct r600_shader_tgsi_instruction *inst_info; 383 struct r600_bc *bc; 384 struct r600_bc *bc_fetch; 385 struct r600_shader *shader; 386 u32 value[4]; 387 u32 *literals; 388 u32 nliterals; 389 u32 max_driver_temp_used; 390 /* needed for evergreen interpolation */ 391 boolean input_centroid; 392 boolean input_linear; 393 boolean input_perspective; 394 int num_interp_gpr; 395}; 396 397struct r600_shader_tgsi_instruction { 398 unsigned tgsi_opcode; 399 unsigned is_op3; 400 unsigned r600_opcode; 401 int (*process)(struct r600_shader_ctx *ctx); 402}; 403 404static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 405static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 406 407static int tgsi_is_supported(struct r600_shader_ctx *ctx) 408{ 409 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 410 int j; 411 412 if (i->Instruction.NumDstRegs > 1) { 413 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 414 return -EINVAL; 415 } 416 if (i->Instruction.Predicate) { 417 R600_ERR("predicate unsupported\n"); 418 return -EINVAL; 419 } 420#if 0 421 if (i->Instruction.Label) { 422 R600_ERR("label unsupported\n"); 423 return -EINVAL; 424 } 425#endif 426 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 427 if (i->Src[j].Register.Dimension) { 428 R600_ERR("unsupported src %d (dimension %d)\n", j, 429 i->Src[j].Register.Dimension); 430 return -EINVAL; 431 } 432 } 433 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 434 if (i->Dst[j].Register.Dimension) { 435 R600_ERR("unsupported dst (dimension)\n"); 436 return -EINVAL; 437 } 438 } 439 return 0; 440} 441 442static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 443{ 444 int i, r; 445 struct r600_bc_alu alu; 446 int gpr = 0, base_chan = 0; 447 int ij_index = 0; 448 449 if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { 450 ij_index = 0; 451 if (ctx->shader->input[input].centroid) 452 ij_index++; 453 } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { 454 ij_index = 0; 455 /* if we have perspective add one */ 456 if (ctx->input_perspective) { 457 ij_index++; 458 /* if we have perspective centroid */ 459 if (ctx->input_centroid) 460 ij_index++; 461 } 462 if (ctx->shader->input[input].centroid) 463 ij_index++; 464 } 465 466 /* work out gpr and base_chan from index */ 467 gpr = ij_index / 2; 468 base_chan = (2 * (ij_index % 2)) + 1; 469 470 for (i = 0; i < 8; i++) { 471 memset(&alu, 0, sizeof(struct r600_bc_alu)); 472 473 if (i < 4) 474 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 475 else 476 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 477 478 if ((i > 1) && (i < 6)) { 479 alu.dst.sel = ctx->shader->input[input].gpr; 480 alu.dst.write = 1; 481 } 482 483 alu.dst.chan = i % 4; 484 485 alu.src[0].sel = gpr; 486 alu.src[0].chan = (base_chan - (i % 2)); 487 488 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 489 490 alu.bank_swizzle_force = SQ_ALU_VEC_210; 491 if ((i % 4) == 3) 492 alu.last = 1; 493 r = r600_bc_add_alu(ctx->bc, &alu); 494 if (r) 495 return r; 496 } 497 return 0; 498} 499 500 501static int tgsi_declaration(struct r600_shader_ctx *ctx) 502{ 503 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 504 struct r600_bc_vtx vtx; 505 unsigned i; 506 int r; 507 508 switch (d->Declaration.File) { 509 case TGSI_FILE_INPUT: 510 i = ctx->shader->ninput++; 511 ctx->shader->input[i].name = d->Semantic.Name; 512 ctx->shader->input[i].sid = d->Semantic.Index; 513 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 514 ctx->shader->input[i].centroid = d->Declaration.Centroid; 515 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 516 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 517 /* turn input into fetch */ 518 memset(&vtx, 0, sizeof(struct r600_bc_vtx)); 519 vtx.inst = 0; 520 vtx.fetch_type = 0; 521 vtx.buffer_id = i; 522 /* register containing the index into the buffer */ 523 vtx.src_gpr = 0; 524 vtx.src_sel_x = 0; 525 vtx.mega_fetch_count = 0x1F; 526 vtx.dst_gpr = ctx->shader->input[i].gpr; 527 vtx.dst_sel_x = 0; 528 vtx.dst_sel_y = 1; 529 vtx.dst_sel_z = 2; 530 vtx.dst_sel_w = 3; 531 vtx.use_const_fields = 1; 532 r = r600_bc_add_vtx(ctx->bc_fetch, &vtx); 533 if (r) 534 return r; 535 } 536 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { 537 /* turn input into interpolate on EG */ 538 if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { 539 if (ctx->shader->input[i].interpolate > 0) { 540 ctx->shader->input[i].lds_pos = ctx->shader->nlds++; 541 evergreen_interp_alu(ctx, i); 542 } 543 } 544 } 545 break; 546 case TGSI_FILE_OUTPUT: 547 i = ctx->shader->noutput++; 548 ctx->shader->output[i].name = d->Semantic.Name; 549 ctx->shader->output[i].sid = d->Semantic.Index; 550 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 551 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 552 break; 553 case TGSI_FILE_CONSTANT: 554 case TGSI_FILE_TEMPORARY: 555 case TGSI_FILE_SAMPLER: 556 case TGSI_FILE_ADDRESS: 557 break; 558 default: 559 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 560 return -EINVAL; 561 } 562 return 0; 563} 564 565static int r600_get_temp(struct r600_shader_ctx *ctx) 566{ 567 return ctx->temp_reg + ctx->max_driver_temp_used++; 568} 569 570/* 571 * for evergreen we need to scan the shader to find the number of GPRs we need to 572 * reserve for interpolation. 573 * 574 * we need to know if we are going to emit 575 * any centroid inputs 576 * if perspective and linear are required 577*/ 578static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 579{ 580 int i; 581 int num_baryc; 582 583 ctx->input_linear = FALSE; 584 ctx->input_perspective = FALSE; 585 ctx->input_centroid = FALSE; 586 ctx->num_interp_gpr = 1; 587 588 /* any centroid inputs */ 589 for (i = 0; i < ctx->info.num_inputs; i++) { 590 /* skip position/face */ 591 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 592 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE) 593 continue; 594 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_LINEAR) 595 ctx->input_linear = TRUE; 596 if (ctx->info.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE) 597 ctx->input_perspective = TRUE; 598 if (ctx->info.input_centroid[i]) 599 ctx->input_centroid = TRUE; 600 } 601 602 num_baryc = 0; 603 /* ignoring sample for now */ 604 if (ctx->input_perspective) 605 num_baryc++; 606 if (ctx->input_linear) 607 num_baryc++; 608 if (ctx->input_centroid) 609 num_baryc *= 2; 610 611 ctx->num_interp_gpr += (num_baryc + 1) >> 1; 612 613 /* TODO PULL MODEL and LINE STIPPLE, FIXED PT POS */ 614 return ctx->num_interp_gpr; 615} 616 617int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 618{ 619 struct tgsi_full_immediate *immediate; 620 struct r600_shader_ctx ctx; 621 struct r600_bc_output output[32]; 622 unsigned output_done, noutput; 623 unsigned opcode; 624 int i, r = 0, pos0; 625 626 ctx.bc = &shader->bc; 627 ctx.bc_fetch = &shader->bc_fetch; 628 ctx.shader = shader; 629 r = r600_bc_init(ctx.bc, shader->family); 630 if (r) 631 return r; 632 ctx.tokens = tokens; 633 tgsi_scan_shader(tokens, &ctx.info); 634 tgsi_parse_init(&ctx.parse, tokens); 635 ctx.type = ctx.parse.FullHeader.Processor.Processor; 636 shader->processor_type = ctx.type; 637 if (shader->processor_type == TGSI_PROCESSOR_VERTEX) { 638 r = r600_bc_init(ctx.bc_fetch, shader->family); 639 if (r) 640 return r; 641 ctx.bc_fetch->type = -1; 642 } 643 ctx.bc->type = shader->processor_type; 644 645 /* register allocations */ 646 /* Values [0,127] correspond to GPR[0..127]. 647 * Values [128,159] correspond to constant buffer bank 0 648 * Values [160,191] correspond to constant buffer bank 1 649 * Values [256,511] correspond to cfile constants c[0..255]. 650 * Other special values are shown in the list below. 651 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 652 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 653 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 654 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 655 * 248 SQ_ALU_SRC_0: special constant 0.0. 656 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 657 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 658 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 659 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 660 * 253 SQ_ALU_SRC_LITERAL: literal constant. 661 * 254 SQ_ALU_SRC_PV: previous vector result. 662 * 255 SQ_ALU_SRC_PS: previous scalar result. 663 */ 664 for (i = 0; i < TGSI_FILE_COUNT; i++) { 665 ctx.file_offset[i] = 0; 666 } 667 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 668 ctx.file_offset[TGSI_FILE_INPUT] = 1; 669 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { 670 r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 671 } else { 672 r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); 673 } 674 } 675 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) { 676 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 677 } 678 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 679 ctx.info.file_count[TGSI_FILE_INPUT]; 680 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 681 ctx.info.file_count[TGSI_FILE_OUTPUT]; 682 683 ctx.file_offset[TGSI_FILE_CONSTANT] = 128; 684 685 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 686 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 687 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 688 689 ctx.nliterals = 0; 690 ctx.literals = NULL; 691 692 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 693 tgsi_parse_token(&ctx.parse); 694 switch (ctx.parse.FullToken.Token.Type) { 695 case TGSI_TOKEN_TYPE_IMMEDIATE: 696 immediate = &ctx.parse.FullToken.FullImmediate; 697 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 698 if(ctx.literals == NULL) { 699 r = -ENOMEM; 700 goto out_err; 701 } 702 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 703 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 704 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 705 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 706 ctx.nliterals++; 707 break; 708 case TGSI_TOKEN_TYPE_DECLARATION: 709 r = tgsi_declaration(&ctx); 710 if (r) 711 goto out_err; 712 break; 713 case TGSI_TOKEN_TYPE_INSTRUCTION: 714 r = tgsi_is_supported(&ctx); 715 if (r) 716 goto out_err; 717 ctx.max_driver_temp_used = 0; 718 /* reserve first tmp for everyone */ 719 r600_get_temp(&ctx); 720 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 721 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) 722 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 723 else 724 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 725 r = ctx.inst_info->process(&ctx); 726 if (r) 727 goto out_err; 728 r = r600_bc_add_literal(ctx.bc, ctx.value); 729 if (r) 730 goto out_err; 731 break; 732 default: 733 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 734 r = -EINVAL; 735 goto out_err; 736 } 737 } 738 /* export output */ 739 noutput = shader->noutput; 740 for (i = 0, pos0 = 0; i < noutput; i++) { 741 memset(&output[i], 0, sizeof(struct r600_bc_output)); 742 output[i].gpr = shader->output[i].gpr; 743 output[i].elem_size = 3; 744 output[i].swizzle_x = 0; 745 output[i].swizzle_y = 1; 746 output[i].swizzle_z = 2; 747 output[i].swizzle_w = 3; 748 output[i].barrier = 1; 749 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 750 output[i].array_base = i - pos0; 751 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 752 switch (ctx.type) { 753 case TGSI_PROCESSOR_VERTEX: 754 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 755 output[i].array_base = 60; 756 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 757 /* position doesn't count in array_base */ 758 pos0++; 759 } 760 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 761 output[i].array_base = 61; 762 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 763 /* position doesn't count in array_base */ 764 pos0++; 765 } 766 break; 767 case TGSI_PROCESSOR_FRAGMENT: 768 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 769 output[i].array_base = shader->output[i].sid; 770 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 771 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 772 output[i].array_base = 61; 773 output[i].swizzle_x = 2; 774 output[i].swizzle_y = 7; 775 output[i].swizzle_z = output[i].swizzle_w = 7; 776 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 777 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 778 output[i].array_base = 61; 779 output[i].swizzle_x = 7; 780 output[i].swizzle_y = 1; 781 output[i].swizzle_z = output[i].swizzle_w = 7; 782 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 783 } else { 784 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 785 r = -EINVAL; 786 goto out_err; 787 } 788 break; 789 default: 790 R600_ERR("unsupported processor type %d\n", ctx.type); 791 r = -EINVAL; 792 goto out_err; 793 } 794 } 795 /* add fake param output for vertex shader if no param is exported */ 796 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 797 for (i = 0, pos0 = 0; i < noutput; i++) { 798 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 799 pos0 = 1; 800 break; 801 } 802 } 803 if (!pos0) { 804 memset(&output[i], 0, sizeof(struct r600_bc_output)); 805 output[i].gpr = 0; 806 output[i].elem_size = 3; 807 output[i].swizzle_x = 0; 808 output[i].swizzle_y = 1; 809 output[i].swizzle_z = 2; 810 output[i].swizzle_w = 3; 811 output[i].barrier = 1; 812 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 813 output[i].array_base = 0; 814 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 815 noutput++; 816 } 817 } 818 /* add fake pixel export */ 819 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 820 memset(&output[0], 0, sizeof(struct r600_bc_output)); 821 output[0].gpr = 0; 822 output[0].elem_size = 3; 823 output[0].swizzle_x = 7; 824 output[0].swizzle_y = 7; 825 output[0].swizzle_z = 7; 826 output[0].swizzle_w = 7; 827 output[0].barrier = 1; 828 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 829 output[0].array_base = 0; 830 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 831 noutput++; 832 } 833 /* set export done on last export of each type */ 834 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 835 if (i == (noutput - 1)) { 836 output[i].end_of_program = 1; 837 } 838 if (!(output_done & (1 << output[i].type))) { 839 output_done |= (1 << output[i].type); 840 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 841 } 842 } 843 /* add return to fetch shader */ 844 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 845 if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { 846 r600_bc_add_cfinst(ctx.bc_fetch, EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 847 } else { 848 r600_bc_add_cfinst(ctx.bc_fetch, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 849 } 850 } 851 /* add output to bytecode */ 852 for (i = 0; i < noutput; i++) { 853 r = r600_bc_add_output(ctx.bc, &output[i]); 854 if (r) 855 goto out_err; 856 } 857 free(ctx.literals); 858 tgsi_parse_free(&ctx.parse); 859 return 0; 860out_err: 861 free(ctx.literals); 862 tgsi_parse_free(&ctx.parse); 863 return r; 864} 865 866static int tgsi_unsupported(struct r600_shader_ctx *ctx) 867{ 868 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 869 return -EINVAL; 870} 871 872static int tgsi_end(struct r600_shader_ctx *ctx) 873{ 874 return 0; 875} 876 877static int tgsi_src(struct r600_shader_ctx *ctx, 878 const struct tgsi_full_src_register *tgsi_src, 879 struct r600_bc_alu_src *r600_src) 880{ 881 int index; 882 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 883 r600_src->sel = tgsi_src->Register.Index; 884 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 885 r600_src->sel = 0; 886 index = tgsi_src->Register.Index; 887 ctx->value[0] = ctx->literals[index * 4 + 0]; 888 ctx->value[1] = ctx->literals[index * 4 + 1]; 889 ctx->value[2] = ctx->literals[index * 4 + 2]; 890 ctx->value[3] = ctx->literals[index * 4 + 3]; 891 } 892 if (tgsi_src->Register.Indirect) 893 r600_src->rel = V_SQ_REL_RELATIVE; 894 r600_src->neg = tgsi_src->Register.Negate; 895 r600_src->abs = tgsi_src->Register.Absolute; 896 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 897 return 0; 898} 899 900static int tgsi_dst(struct r600_shader_ctx *ctx, 901 const struct tgsi_full_dst_register *tgsi_dst, 902 unsigned swizzle, 903 struct r600_bc_alu_dst *r600_dst) 904{ 905 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 906 907 r600_dst->sel = tgsi_dst->Register.Index; 908 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 909 r600_dst->chan = swizzle; 910 r600_dst->write = 1; 911 if (tgsi_dst->Register.Indirect) 912 r600_dst->rel = V_SQ_REL_RELATIVE; 913 if (inst->Instruction.Saturate) { 914 r600_dst->clamp = 1; 915 } 916 return 0; 917} 918 919static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 920{ 921 switch (swizzle) { 922 case 0: 923 return tgsi_src->Register.SwizzleX; 924 case 1: 925 return tgsi_src->Register.SwizzleY; 926 case 2: 927 return tgsi_src->Register.SwizzleZ; 928 case 3: 929 return tgsi_src->Register.SwizzleW; 930 default: 931 return 0; 932 } 933} 934 935static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 936{ 937 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 938 struct r600_bc_alu alu; 939 int i, j, k, nconst, r; 940 941 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 942 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 943 nconst++; 944 } 945 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 946 if (r) { 947 return r; 948 } 949 } 950 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 951 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 952 int treg = r600_get_temp(ctx); 953 for (k = 0; k < 4; k++) { 954 memset(&alu, 0, sizeof(struct r600_bc_alu)); 955 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 956 alu.src[0].sel = r600_src[i].sel; 957 alu.src[0].chan = k; 958 alu.src[0].rel = r600_src[i].rel; 959 alu.dst.sel = treg; 960 alu.dst.chan = k; 961 alu.dst.write = 1; 962 if (k == 3) 963 alu.last = 1; 964 r = r600_bc_add_alu(ctx->bc, &alu); 965 if (r) 966 return r; 967 } 968 r600_src[i].sel = treg; 969 r600_src[i].rel =0; 970 j--; 971 } 972 } 973 return 0; 974} 975 976/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 977static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 978{ 979 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 980 struct r600_bc_alu alu; 981 int i, j, k, nliteral, r; 982 983 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 984 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 985 nliteral++; 986 } 987 } 988 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 989 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 990 int treg = r600_get_temp(ctx); 991 for (k = 0; k < 4; k++) { 992 memset(&alu, 0, sizeof(struct r600_bc_alu)); 993 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 994 alu.src[0].sel = r600_src[i].sel; 995 alu.src[0].chan = k; 996 alu.dst.sel = treg; 997 alu.dst.chan = k; 998 alu.dst.write = 1; 999 if (k == 3) 1000 alu.last = 1; 1001 r = r600_bc_add_alu(ctx->bc, &alu); 1002 if (r) 1003 return r; 1004 } 1005 r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); 1006 if (r) 1007 return r; 1008 r600_src[i].sel = treg; 1009 j--; 1010 } 1011 } 1012 return 0; 1013} 1014 1015static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 1016{ 1017 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1018 struct r600_bc_alu_src r600_src[3]; 1019 struct r600_bc_alu alu; 1020 int i, j, r; 1021 int lasti = 0; 1022 1023 for (i = 0; i < 4; i++) { 1024 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 1025 lasti = i; 1026 } 1027 } 1028 1029 r = tgsi_split_constant(ctx, r600_src); 1030 if (r) 1031 return r; 1032 r = tgsi_split_literal_constant(ctx, r600_src); 1033 if (r) 1034 return r; 1035 for (i = 0; i < lasti + 1; i++) { 1036 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1037 continue; 1038 1039 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1040 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1041 if (r) 1042 return r; 1043 1044 alu.inst = ctx->inst_info->r600_opcode; 1045 if (!swap) { 1046 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1047 alu.src[j] = r600_src[j]; 1048 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1049 } 1050 } else { 1051 alu.src[0] = r600_src[1]; 1052 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 1053 1054 alu.src[1] = r600_src[0]; 1055 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1056 } 1057 /* handle some special cases */ 1058 switch (ctx->inst_info->tgsi_opcode) { 1059 case TGSI_OPCODE_SUB: 1060 alu.src[1].neg = 1; 1061 break; 1062 case TGSI_OPCODE_ABS: 1063 alu.src[0].abs = 1; 1064 break; 1065 default: 1066 break; 1067 } 1068 if (i == lasti) { 1069 alu.last = 1; 1070 } 1071 r = r600_bc_add_alu(ctx->bc, &alu); 1072 if (r) 1073 return r; 1074 } 1075 return 0; 1076} 1077 1078static int tgsi_op2(struct r600_shader_ctx *ctx) 1079{ 1080 return tgsi_op2_s(ctx, 0); 1081} 1082 1083static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 1084{ 1085 return tgsi_op2_s(ctx, 1); 1086} 1087 1088/* 1089 * r600 - trunc to -PI..PI range 1090 * r700 - normalize by dividing by 2PI 1091 * see fdo bug 27901 1092 */ 1093static int tgsi_setup_trig(struct r600_shader_ctx *ctx, 1094 struct r600_bc_alu_src r600_src[3]) 1095{ 1096 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1097 int r; 1098 uint32_t lit_vals[4]; 1099 struct r600_bc_alu alu; 1100 1101 memset(lit_vals, 0, 4*4); 1102 r = tgsi_split_constant(ctx, r600_src); 1103 if (r) 1104 return r; 1105 r = tgsi_split_literal_constant(ctx, r600_src); 1106 if (r) 1107 return r; 1108 1109 r = tgsi_split_literal_constant(ctx, r600_src); 1110 if (r) 1111 return r; 1112 1113 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 1114 lit_vals[1] = fui(0.5f); 1115 1116 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1117 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1118 alu.is_op3 = 1; 1119 1120 alu.dst.chan = 0; 1121 alu.dst.sel = ctx->temp_reg; 1122 alu.dst.write = 1; 1123 1124 alu.src[0] = r600_src[0]; 1125 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1126 1127 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1128 alu.src[1].chan = 0; 1129 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1130 alu.src[2].chan = 1; 1131 alu.last = 1; 1132 r = r600_bc_add_alu(ctx->bc, &alu); 1133 if (r) 1134 return r; 1135 r = r600_bc_add_literal(ctx->bc, lit_vals); 1136 if (r) 1137 return r; 1138 1139 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1140 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 1141 1142 alu.dst.chan = 0; 1143 alu.dst.sel = ctx->temp_reg; 1144 alu.dst.write = 1; 1145 1146 alu.src[0].sel = ctx->temp_reg; 1147 alu.src[0].chan = 0; 1148 alu.last = 1; 1149 r = r600_bc_add_alu(ctx->bc, &alu); 1150 if (r) 1151 return r; 1152 1153 if (ctx->bc->chiprev == CHIPREV_R600) { 1154 lit_vals[0] = fui(3.1415926535897f * 2.0f); 1155 lit_vals[1] = fui(-3.1415926535897f); 1156 } else { 1157 lit_vals[0] = fui(1.0f); 1158 lit_vals[1] = fui(-0.5f); 1159 } 1160 1161 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1162 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1163 alu.is_op3 = 1; 1164 1165 alu.dst.chan = 0; 1166 alu.dst.sel = ctx->temp_reg; 1167 alu.dst.write = 1; 1168 1169 alu.src[0].sel = ctx->temp_reg; 1170 alu.src[0].chan = 0; 1171 1172 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1173 alu.src[1].chan = 0; 1174 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1175 alu.src[2].chan = 1; 1176 alu.last = 1; 1177 r = r600_bc_add_alu(ctx->bc, &alu); 1178 if (r) 1179 return r; 1180 r = r600_bc_add_literal(ctx->bc, lit_vals); 1181 if (r) 1182 return r; 1183 return 0; 1184} 1185 1186static int tgsi_trig(struct r600_shader_ctx *ctx) 1187{ 1188 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1189 struct r600_bc_alu_src r600_src[3]; 1190 struct r600_bc_alu alu; 1191 int i, r; 1192 int lasti = 0; 1193 1194 r = tgsi_setup_trig(ctx, r600_src); 1195 if (r) 1196 return r; 1197 1198 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1199 alu.inst = ctx->inst_info->r600_opcode; 1200 alu.dst.chan = 0; 1201 alu.dst.sel = ctx->temp_reg; 1202 alu.dst.write = 1; 1203 1204 alu.src[0].sel = ctx->temp_reg; 1205 alu.src[0].chan = 0; 1206 alu.last = 1; 1207 r = r600_bc_add_alu(ctx->bc, &alu); 1208 if (r) 1209 return r; 1210 1211 /* replicate result */ 1212 for (i = 0; i < 4; i++) { 1213 if (inst->Dst[0].Register.WriteMask & (1 << i)) 1214 lasti = i; 1215 } 1216 for (i = 0; i < lasti + 1; i++) { 1217 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1218 continue; 1219 1220 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1221 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1222 1223 alu.src[0].sel = ctx->temp_reg; 1224 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1225 if (r) 1226 return r; 1227 if (i == lasti) 1228 alu.last = 1; 1229 r = r600_bc_add_alu(ctx->bc, &alu); 1230 if (r) 1231 return r; 1232 } 1233 return 0; 1234} 1235 1236static int tgsi_scs(struct r600_shader_ctx *ctx) 1237{ 1238 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1239 struct r600_bc_alu_src r600_src[3]; 1240 struct r600_bc_alu alu; 1241 int r; 1242 1243 /* We'll only need the trig stuff if we are going to write to the 1244 * X or Y components of the destination vector. 1245 */ 1246 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1247 r = tgsi_setup_trig(ctx, r600_src); 1248 if (r) 1249 return r; 1250 } 1251 1252 /* dst.x = COS */ 1253 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1254 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1255 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1256 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1257 if (r) 1258 return r; 1259 1260 alu.src[0].sel = ctx->temp_reg; 1261 alu.src[0].chan = 0; 1262 alu.last = 1; 1263 r = r600_bc_add_alu(ctx->bc, &alu); 1264 if (r) 1265 return r; 1266 } 1267 1268 /* dst.y = SIN */ 1269 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1270 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1271 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1272 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1273 if (r) 1274 return r; 1275 1276 alu.src[0].sel = ctx->temp_reg; 1277 alu.src[0].chan = 0; 1278 alu.last = 1; 1279 r = r600_bc_add_alu(ctx->bc, &alu); 1280 if (r) 1281 return r; 1282 } 1283 1284 /* dst.z = 0.0; */ 1285 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1286 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1287 1288 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1289 1290 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1291 if (r) 1292 return r; 1293 1294 alu.src[0].sel = V_SQ_ALU_SRC_0; 1295 alu.src[0].chan = 0; 1296 1297 alu.last = 1; 1298 1299 r = r600_bc_add_alu(ctx->bc, &alu); 1300 if (r) 1301 return r; 1302 1303 r = r600_bc_add_literal(ctx->bc, ctx->value); 1304 if (r) 1305 return r; 1306 } 1307 1308 /* dst.w = 1.0; */ 1309 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1310 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1311 1312 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1313 1314 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1315 if (r) 1316 return r; 1317 1318 alu.src[0].sel = V_SQ_ALU_SRC_1; 1319 alu.src[0].chan = 0; 1320 1321 alu.last = 1; 1322 1323 r = r600_bc_add_alu(ctx->bc, &alu); 1324 if (r) 1325 return r; 1326 1327 r = r600_bc_add_literal(ctx->bc, ctx->value); 1328 if (r) 1329 return r; 1330 } 1331 1332 return 0; 1333} 1334 1335static int tgsi_kill(struct r600_shader_ctx *ctx) 1336{ 1337 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1338 struct r600_bc_alu alu; 1339 int i, r; 1340 1341 for (i = 0; i < 4; i++) { 1342 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1343 alu.inst = ctx->inst_info->r600_opcode; 1344 1345 alu.dst.chan = i; 1346 1347 alu.src[0].sel = V_SQ_ALU_SRC_0; 1348 1349 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1350 alu.src[1].sel = V_SQ_ALU_SRC_1; 1351 alu.src[1].neg = 1; 1352 } else { 1353 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1354 if (r) 1355 return r; 1356 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1357 } 1358 if (i == 3) { 1359 alu.last = 1; 1360 } 1361 r = r600_bc_add_alu(ctx->bc, &alu); 1362 if (r) 1363 return r; 1364 } 1365 r = r600_bc_add_literal(ctx->bc, ctx->value); 1366 if (r) 1367 return r; 1368 1369 /* kill must be last in ALU */ 1370 ctx->bc->force_add_cf = 1; 1371 ctx->shader->uses_kill = TRUE; 1372 return 0; 1373} 1374 1375static int tgsi_lit(struct r600_shader_ctx *ctx) 1376{ 1377 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1378 struct r600_bc_alu alu; 1379 struct r600_bc_alu_src r600_src[3]; 1380 int r; 1381 1382 r = tgsi_split_constant(ctx, r600_src); 1383 if (r) 1384 return r; 1385 r = tgsi_split_literal_constant(ctx, r600_src); 1386 if (r) 1387 return r; 1388 1389 /* dst.x, <- 1.0 */ 1390 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1391 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1392 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1393 alu.src[0].chan = 0; 1394 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1395 if (r) 1396 return r; 1397 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1398 r = r600_bc_add_alu(ctx->bc, &alu); 1399 if (r) 1400 return r; 1401 1402 /* dst.y = max(src.x, 0.0) */ 1403 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1404 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1405 alu.src[0] = r600_src[0]; 1406 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1407 alu.src[1].chan = 0; 1408 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1409 if (r) 1410 return r; 1411 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1412 r = r600_bc_add_alu(ctx->bc, &alu); 1413 if (r) 1414 return r; 1415 1416 /* dst.w, <- 1.0 */ 1417 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1418 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1419 alu.src[0].sel = V_SQ_ALU_SRC_1; 1420 alu.src[0].chan = 0; 1421 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1422 if (r) 1423 return r; 1424 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1425 alu.last = 1; 1426 r = r600_bc_add_alu(ctx->bc, &alu); 1427 if (r) 1428 return r; 1429 1430 r = r600_bc_add_literal(ctx->bc, ctx->value); 1431 if (r) 1432 return r; 1433 1434 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1435 { 1436 int chan; 1437 int sel; 1438 1439 /* dst.z = log(src.y) */ 1440 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1441 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1442 alu.src[0] = r600_src[0]; 1443 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1444 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1445 if (r) 1446 return r; 1447 alu.last = 1; 1448 r = r600_bc_add_alu(ctx->bc, &alu); 1449 if (r) 1450 return r; 1451 1452 r = r600_bc_add_literal(ctx->bc, ctx->value); 1453 if (r) 1454 return r; 1455 1456 chan = alu.dst.chan; 1457 sel = alu.dst.sel; 1458 1459 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1460 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1461 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1462 alu.src[0] = r600_src[0]; 1463 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1464 alu.src[1].sel = sel; 1465 alu.src[1].chan = chan; 1466 1467 alu.src[2] = r600_src[0]; 1468 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 1469 alu.dst.sel = ctx->temp_reg; 1470 alu.dst.chan = 0; 1471 alu.dst.write = 1; 1472 alu.is_op3 = 1; 1473 alu.last = 1; 1474 r = r600_bc_add_alu(ctx->bc, &alu); 1475 if (r) 1476 return r; 1477 1478 r = r600_bc_add_literal(ctx->bc, ctx->value); 1479 if (r) 1480 return r; 1481 /* dst.z = exp(tmp.x) */ 1482 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1483 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1484 alu.src[0].sel = ctx->temp_reg; 1485 alu.src[0].chan = 0; 1486 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1487 if (r) 1488 return r; 1489 alu.last = 1; 1490 r = r600_bc_add_alu(ctx->bc, &alu); 1491 if (r) 1492 return r; 1493 } 1494 return 0; 1495} 1496 1497static int tgsi_rsq(struct r600_shader_ctx *ctx) 1498{ 1499 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1500 struct r600_bc_alu alu; 1501 int i, r; 1502 1503 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1504 1505 /* FIXME: 1506 * For state trackers other than OpenGL, we'll want to use 1507 * _RECIPSQRT_IEEE instead. 1508 */ 1509 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1510 1511 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1512 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1513 if (r) 1514 return r; 1515 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1516 alu.src[i].abs = 1; 1517 } 1518 alu.dst.sel = ctx->temp_reg; 1519 alu.dst.write = 1; 1520 alu.last = 1; 1521 r = r600_bc_add_alu(ctx->bc, &alu); 1522 if (r) 1523 return r; 1524 r = r600_bc_add_literal(ctx->bc, ctx->value); 1525 if (r) 1526 return r; 1527 /* replicate result */ 1528 return tgsi_helper_tempx_replicate(ctx); 1529} 1530 1531static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1532{ 1533 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1534 struct r600_bc_alu alu; 1535 int i, r; 1536 1537 for (i = 0; i < 4; i++) { 1538 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1539 alu.src[0].sel = ctx->temp_reg; 1540 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1541 alu.dst.chan = i; 1542 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1543 if (r) 1544 return r; 1545 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1546 if (i == 3) 1547 alu.last = 1; 1548 r = r600_bc_add_alu(ctx->bc, &alu); 1549 if (r) 1550 return r; 1551 } 1552 return 0; 1553} 1554 1555static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1556{ 1557 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1558 struct r600_bc_alu alu; 1559 int i, r; 1560 1561 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1562 alu.inst = ctx->inst_info->r600_opcode; 1563 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1564 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1565 if (r) 1566 return r; 1567 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1568 } 1569 alu.dst.sel = ctx->temp_reg; 1570 alu.dst.write = 1; 1571 alu.last = 1; 1572 r = r600_bc_add_alu(ctx->bc, &alu); 1573 if (r) 1574 return r; 1575 r = r600_bc_add_literal(ctx->bc, ctx->value); 1576 if (r) 1577 return r; 1578 /* replicate result */ 1579 return tgsi_helper_tempx_replicate(ctx); 1580} 1581 1582static int tgsi_pow(struct r600_shader_ctx *ctx) 1583{ 1584 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1585 struct r600_bc_alu alu; 1586 int r; 1587 1588 /* LOG2(a) */ 1589 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1590 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1591 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1592 if (r) 1593 return r; 1594 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1595 alu.dst.sel = ctx->temp_reg; 1596 alu.dst.write = 1; 1597 alu.last = 1; 1598 r = r600_bc_add_alu(ctx->bc, &alu); 1599 if (r) 1600 return r; 1601 r = r600_bc_add_literal(ctx->bc,ctx->value); 1602 if (r) 1603 return r; 1604 /* b * LOG2(a) */ 1605 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1606 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); 1607 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1608 if (r) 1609 return r; 1610 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1611 alu.src[1].sel = ctx->temp_reg; 1612 alu.dst.sel = ctx->temp_reg; 1613 alu.dst.write = 1; 1614 alu.last = 1; 1615 r = r600_bc_add_alu(ctx->bc, &alu); 1616 if (r) 1617 return r; 1618 r = r600_bc_add_literal(ctx->bc,ctx->value); 1619 if (r) 1620 return r; 1621 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1622 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1623 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1624 alu.src[0].sel = ctx->temp_reg; 1625 alu.dst.sel = ctx->temp_reg; 1626 alu.dst.write = 1; 1627 alu.last = 1; 1628 r = r600_bc_add_alu(ctx->bc, &alu); 1629 if (r) 1630 return r; 1631 r = r600_bc_add_literal(ctx->bc,ctx->value); 1632 if (r) 1633 return r; 1634 return tgsi_helper_tempx_replicate(ctx); 1635} 1636 1637static int tgsi_ssg(struct r600_shader_ctx *ctx) 1638{ 1639 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1640 struct r600_bc_alu alu; 1641 struct r600_bc_alu_src r600_src[3]; 1642 int i, r; 1643 1644 r = tgsi_split_constant(ctx, r600_src); 1645 if (r) 1646 return r; 1647 r = tgsi_split_literal_constant(ctx, r600_src); 1648 if (r) 1649 return r; 1650 1651 /* tmp = (src > 0 ? 1 : src) */ 1652 for (i = 0; i < 4; i++) { 1653 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1654 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1655 alu.is_op3 = 1; 1656 1657 alu.dst.sel = ctx->temp_reg; 1658 alu.dst.chan = i; 1659 1660 alu.src[0] = r600_src[0]; 1661 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1662 1663 alu.src[1].sel = V_SQ_ALU_SRC_1; 1664 1665 alu.src[2] = r600_src[0]; 1666 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1667 if (i == 3) 1668 alu.last = 1; 1669 r = r600_bc_add_alu(ctx->bc, &alu); 1670 if (r) 1671 return r; 1672 } 1673 r = r600_bc_add_literal(ctx->bc, ctx->value); 1674 if (r) 1675 return r; 1676 1677 /* dst = (-tmp > 0 ? -1 : tmp) */ 1678 for (i = 0; i < 4; i++) { 1679 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1680 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1681 alu.is_op3 = 1; 1682 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1683 if (r) 1684 return r; 1685 1686 alu.src[0].sel = ctx->temp_reg; 1687 alu.src[0].chan = i; 1688 alu.src[0].neg = 1; 1689 1690 alu.src[1].sel = V_SQ_ALU_SRC_1; 1691 alu.src[1].neg = 1; 1692 1693 alu.src[2].sel = ctx->temp_reg; 1694 alu.src[2].chan = i; 1695 1696 if (i == 3) 1697 alu.last = 1; 1698 r = r600_bc_add_alu(ctx->bc, &alu); 1699 if (r) 1700 return r; 1701 } 1702 return 0; 1703} 1704 1705static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1706{ 1707 struct r600_bc_alu alu; 1708 int i, r; 1709 1710 r = r600_bc_add_literal(ctx->bc, ctx->value); 1711 if (r) 1712 return r; 1713 for (i = 0; i < 4; i++) { 1714 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1715 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1716 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1717 alu.dst.chan = i; 1718 } else { 1719 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1720 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1721 if (r) 1722 return r; 1723 alu.src[0].sel = ctx->temp_reg; 1724 alu.src[0].chan = i; 1725 } 1726 if (i == 3) { 1727 alu.last = 1; 1728 } 1729 r = r600_bc_add_alu(ctx->bc, &alu); 1730 if (r) 1731 return r; 1732 } 1733 return 0; 1734} 1735 1736static int tgsi_op3(struct r600_shader_ctx *ctx) 1737{ 1738 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1739 struct r600_bc_alu_src r600_src[3]; 1740 struct r600_bc_alu alu; 1741 int i, j, r; 1742 1743 r = tgsi_split_constant(ctx, r600_src); 1744 if (r) 1745 return r; 1746 r = tgsi_split_literal_constant(ctx, r600_src); 1747 if (r) 1748 return r; 1749 /* do it in 2 step as op3 doesn't support writemask */ 1750 for (i = 0; i < 4; i++) { 1751 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1752 alu.inst = ctx->inst_info->r600_opcode; 1753 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1754 alu.src[j] = r600_src[j]; 1755 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1756 } 1757 alu.dst.sel = ctx->temp_reg; 1758 alu.dst.chan = i; 1759 alu.dst.write = 1; 1760 alu.is_op3 = 1; 1761 if (i == 3) { 1762 alu.last = 1; 1763 } 1764 r = r600_bc_add_alu(ctx->bc, &alu); 1765 if (r) 1766 return r; 1767 } 1768 return tgsi_helper_copy(ctx, inst); 1769} 1770 1771static int tgsi_dp(struct r600_shader_ctx *ctx) 1772{ 1773 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1774 struct r600_bc_alu_src r600_src[3]; 1775 struct r600_bc_alu alu; 1776 int i, j, r; 1777 1778 r = tgsi_split_constant(ctx, r600_src); 1779 if (r) 1780 return r; 1781 r = tgsi_split_literal_constant(ctx, r600_src); 1782 if (r) 1783 return r; 1784 for (i = 0; i < 4; i++) { 1785 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1786 alu.inst = ctx->inst_info->r600_opcode; 1787 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1788 alu.src[j] = r600_src[j]; 1789 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1790 } 1791 alu.dst.sel = ctx->temp_reg; 1792 alu.dst.chan = i; 1793 alu.dst.write = 1; 1794 /* handle some special cases */ 1795 switch (ctx->inst_info->tgsi_opcode) { 1796 case TGSI_OPCODE_DP2: 1797 if (i > 1) { 1798 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1799 alu.src[0].chan = alu.src[1].chan = 0; 1800 } 1801 break; 1802 case TGSI_OPCODE_DP3: 1803 if (i > 2) { 1804 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1805 alu.src[0].chan = alu.src[1].chan = 0; 1806 } 1807 break; 1808 case TGSI_OPCODE_DPH: 1809 if (i == 3) { 1810 alu.src[0].sel = V_SQ_ALU_SRC_1; 1811 alu.src[0].chan = 0; 1812 alu.src[0].neg = 0; 1813 } 1814 break; 1815 default: 1816 break; 1817 } 1818 if (i == 3) { 1819 alu.last = 1; 1820 } 1821 r = r600_bc_add_alu(ctx->bc, &alu); 1822 if (r) 1823 return r; 1824 } 1825 return tgsi_helper_copy(ctx, inst); 1826} 1827 1828static int tgsi_tex(struct r600_shader_ctx *ctx) 1829{ 1830 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1831 struct r600_bc_tex tex; 1832 struct r600_bc_alu alu; 1833 unsigned src_gpr; 1834 int r, i; 1835 int opcode; 1836 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; 1837 uint32_t lit_vals[4]; 1838 1839 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1840 1841 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1842 /* Add perspective divide */ 1843 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1844 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1845 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1846 if (r) 1847 return r; 1848 1849 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1850 alu.dst.sel = ctx->temp_reg; 1851 alu.dst.chan = 3; 1852 alu.last = 1; 1853 alu.dst.write = 1; 1854 r = r600_bc_add_alu(ctx->bc, &alu); 1855 if (r) 1856 return r; 1857 1858 for (i = 0; i < 3; i++) { 1859 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1860 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1861 alu.src[0].sel = ctx->temp_reg; 1862 alu.src[0].chan = 3; 1863 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1864 if (r) 1865 return r; 1866 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1867 alu.dst.sel = ctx->temp_reg; 1868 alu.dst.chan = i; 1869 alu.dst.write = 1; 1870 r = r600_bc_add_alu(ctx->bc, &alu); 1871 if (r) 1872 return r; 1873 } 1874 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1875 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1876 alu.src[0].sel = V_SQ_ALU_SRC_1; 1877 alu.src[0].chan = 0; 1878 alu.dst.sel = ctx->temp_reg; 1879 alu.dst.chan = 3; 1880 alu.last = 1; 1881 alu.dst.write = 1; 1882 r = r600_bc_add_alu(ctx->bc, &alu); 1883 if (r) 1884 return r; 1885 src_not_temp = FALSE; 1886 src_gpr = ctx->temp_reg; 1887 } 1888 1889 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1890 int src_chan, src2_chan; 1891 1892 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1893 for (i = 0; i < 4; i++) { 1894 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1895 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1896 switch (i) { 1897 case 0: 1898 src_chan = 2; 1899 src2_chan = 1; 1900 break; 1901 case 1: 1902 src_chan = 2; 1903 src2_chan = 0; 1904 break; 1905 case 2: 1906 src_chan = 0; 1907 src2_chan = 2; 1908 break; 1909 case 3: 1910 src_chan = 1; 1911 src2_chan = 2; 1912 break; 1913 default: 1914 assert(0); 1915 src_chan = 0; 1916 src2_chan = 0; 1917 break; 1918 } 1919 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1920 if (r) 1921 return r; 1922 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); 1923 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1924 if (r) 1925 return r; 1926 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); 1927 alu.dst.sel = ctx->temp_reg; 1928 alu.dst.chan = i; 1929 if (i == 3) 1930 alu.last = 1; 1931 alu.dst.write = 1; 1932 r = r600_bc_add_alu(ctx->bc, &alu); 1933 if (r) 1934 return r; 1935 } 1936 1937 /* tmp1.z = RCP_e(|tmp1.z|) */ 1938 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1939 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1940 alu.src[0].sel = ctx->temp_reg; 1941 alu.src[0].chan = 2; 1942 alu.src[0].abs = 1; 1943 alu.dst.sel = ctx->temp_reg; 1944 alu.dst.chan = 2; 1945 alu.dst.write = 1; 1946 alu.last = 1; 1947 r = r600_bc_add_alu(ctx->bc, &alu); 1948 if (r) 1949 return r; 1950 1951 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1952 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1953 * muladd has no writemask, have to use another temp 1954 */ 1955 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1956 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1957 alu.is_op3 = 1; 1958 1959 alu.src[0].sel = ctx->temp_reg; 1960 alu.src[0].chan = 0; 1961 alu.src[1].sel = ctx->temp_reg; 1962 alu.src[1].chan = 2; 1963 1964 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1965 alu.src[2].chan = 0; 1966 1967 alu.dst.sel = ctx->temp_reg; 1968 alu.dst.chan = 0; 1969 alu.dst.write = 1; 1970 1971 r = r600_bc_add_alu(ctx->bc, &alu); 1972 if (r) 1973 return r; 1974 1975 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1976 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1977 alu.is_op3 = 1; 1978 1979 alu.src[0].sel = ctx->temp_reg; 1980 alu.src[0].chan = 1; 1981 alu.src[1].sel = ctx->temp_reg; 1982 alu.src[1].chan = 2; 1983 1984 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1985 alu.src[2].chan = 0; 1986 1987 alu.dst.sel = ctx->temp_reg; 1988 alu.dst.chan = 1; 1989 alu.dst.write = 1; 1990 1991 alu.last = 1; 1992 r = r600_bc_add_alu(ctx->bc, &alu); 1993 if (r) 1994 return r; 1995 1996 lit_vals[0] = fui(1.5f); 1997 1998 r = r600_bc_add_literal(ctx->bc, lit_vals); 1999 if (r) 2000 return r; 2001 src_not_temp = FALSE; 2002 src_gpr = ctx->temp_reg; 2003 } 2004 2005 if (src_not_temp) { 2006 for (i = 0; i < 4; i++) { 2007 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2008 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2009 alu.src[0].sel = src_gpr; 2010 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2011 alu.dst.sel = ctx->temp_reg; 2012 alu.dst.chan = i; 2013 if (i == 3) 2014 alu.last = 1; 2015 alu.dst.write = 1; 2016 r = r600_bc_add_alu(ctx->bc, &alu); 2017 if (r) 2018 return r; 2019 } 2020 src_gpr = ctx->temp_reg; 2021 } 2022 2023 opcode = ctx->inst_info->r600_opcode; 2024 if (opcode == SQ_TEX_INST_SAMPLE && 2025 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 2026 opcode = SQ_TEX_INST_SAMPLE_C; 2027 2028 memset(&tex, 0, sizeof(struct r600_bc_tex)); 2029 tex.inst = opcode; 2030 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 2031 tex.resource_id = tex.sampler_id; 2032 tex.src_gpr = src_gpr; 2033 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 2034 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 2035 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 2036 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 2037 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 2038 tex.src_sel_x = 0; 2039 tex.src_sel_y = 1; 2040 tex.src_sel_z = 2; 2041 tex.src_sel_w = 3; 2042 2043 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 2044 tex.src_sel_x = 1; 2045 tex.src_sel_y = 0; 2046 tex.src_sel_z = 3; 2047 tex.src_sel_w = 1; 2048 } 2049 2050 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 2051 tex.coord_type_x = 1; 2052 tex.coord_type_y = 1; 2053 tex.coord_type_z = 1; 2054 tex.coord_type_w = 1; 2055 } 2056 2057 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 2058 tex.src_sel_w = 2; 2059 2060 r = r600_bc_add_tex(ctx->bc, &tex); 2061 if (r) 2062 return r; 2063 2064 /* add shadow ambient support - gallium doesn't do it yet */ 2065 return 0; 2066} 2067 2068static int tgsi_lrp(struct r600_shader_ctx *ctx) 2069{ 2070 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2071 struct r600_bc_alu_src r600_src[3]; 2072 struct r600_bc_alu alu; 2073 unsigned i; 2074 int r; 2075 2076 r = tgsi_split_constant(ctx, r600_src); 2077 if (r) 2078 return r; 2079 r = tgsi_split_literal_constant(ctx, r600_src); 2080 if (r) 2081 return r; 2082 /* 1 - src0 */ 2083 for (i = 0; i < 4; i++) { 2084 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2085 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 2086 alu.src[0].sel = V_SQ_ALU_SRC_1; 2087 alu.src[0].chan = 0; 2088 alu.src[1] = r600_src[0]; 2089 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 2090 alu.src[1].neg = 1; 2091 alu.dst.sel = ctx->temp_reg; 2092 alu.dst.chan = i; 2093 if (i == 3) { 2094 alu.last = 1; 2095 } 2096 alu.dst.write = 1; 2097 r = r600_bc_add_alu(ctx->bc, &alu); 2098 if (r) 2099 return r; 2100 } 2101 r = r600_bc_add_literal(ctx->bc, ctx->value); 2102 if (r) 2103 return r; 2104 2105 /* (1 - src0) * src2 */ 2106 for (i = 0; i < 4; i++) { 2107 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2108 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2109 alu.src[0].sel = ctx->temp_reg; 2110 alu.src[0].chan = i; 2111 alu.src[1] = r600_src[2]; 2112 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2113 alu.dst.sel = ctx->temp_reg; 2114 alu.dst.chan = i; 2115 if (i == 3) { 2116 alu.last = 1; 2117 } 2118 alu.dst.write = 1; 2119 r = r600_bc_add_alu(ctx->bc, &alu); 2120 if (r) 2121 return r; 2122 } 2123 r = r600_bc_add_literal(ctx->bc, ctx->value); 2124 if (r) 2125 return r; 2126 2127 /* src0 * src1 + (1 - src0) * src2 */ 2128 for (i = 0; i < 4; i++) { 2129 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2130 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2131 alu.is_op3 = 1; 2132 alu.src[0] = r600_src[0]; 2133 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2134 alu.src[1] = r600_src[1]; 2135 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2136 alu.src[2].sel = ctx->temp_reg; 2137 alu.src[2].chan = i; 2138 alu.dst.sel = ctx->temp_reg; 2139 alu.dst.chan = i; 2140 if (i == 3) { 2141 alu.last = 1; 2142 } 2143 r = r600_bc_add_alu(ctx->bc, &alu); 2144 if (r) 2145 return r; 2146 } 2147 return tgsi_helper_copy(ctx, inst); 2148} 2149 2150static int tgsi_cmp(struct r600_shader_ctx *ctx) 2151{ 2152 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2153 struct r600_bc_alu_src r600_src[3]; 2154 struct r600_bc_alu alu; 2155 int use_temp = 0; 2156 int i, r; 2157 2158 r = tgsi_split_constant(ctx, r600_src); 2159 if (r) 2160 return r; 2161 r = tgsi_split_literal_constant(ctx, r600_src); 2162 if (r) 2163 return r; 2164 2165 if (inst->Dst[0].Register.WriteMask != 0xf) 2166 use_temp = 1; 2167 2168 for (i = 0; i < 4; i++) { 2169 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2170 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2171 alu.src[0] = r600_src[0]; 2172 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2173 2174 alu.src[1] = r600_src[2]; 2175 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2176 2177 alu.src[2] = r600_src[1]; 2178 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 2179 2180 if (use_temp) 2181 alu.dst.sel = ctx->temp_reg; 2182 else { 2183 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2184 if (r) 2185 return r; 2186 } 2187 alu.dst.chan = i; 2188 alu.dst.write = 1; 2189 alu.is_op3 = 1; 2190 if (i == 3) 2191 alu.last = 1; 2192 r = r600_bc_add_alu(ctx->bc, &alu); 2193 if (r) 2194 return r; 2195 } 2196 if (use_temp) 2197 return tgsi_helper_copy(ctx, inst); 2198 return 0; 2199} 2200 2201static int tgsi_xpd(struct r600_shader_ctx *ctx) 2202{ 2203 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2204 struct r600_bc_alu_src r600_src[3]; 2205 struct r600_bc_alu alu; 2206 uint32_t use_temp = 0; 2207 int i, r; 2208 2209 if (inst->Dst[0].Register.WriteMask != 0xf) 2210 use_temp = 1; 2211 2212 r = tgsi_split_constant(ctx, r600_src); 2213 if (r) 2214 return r; 2215 r = tgsi_split_literal_constant(ctx, r600_src); 2216 if (r) 2217 return r; 2218 2219 for (i = 0; i < 4; i++) { 2220 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2221 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2222 2223 alu.src[0] = r600_src[0]; 2224 switch (i) { 2225 case 0: 2226 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2227 break; 2228 case 1: 2229 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2230 break; 2231 case 2: 2232 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2233 break; 2234 case 3: 2235 alu.src[0].sel = V_SQ_ALU_SRC_0; 2236 alu.src[0].chan = i; 2237 } 2238 2239 alu.src[1] = r600_src[1]; 2240 switch (i) { 2241 case 0: 2242 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2243 break; 2244 case 1: 2245 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2246 break; 2247 case 2: 2248 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2249 break; 2250 case 3: 2251 alu.src[1].sel = V_SQ_ALU_SRC_0; 2252 alu.src[1].chan = i; 2253 } 2254 2255 alu.dst.sel = ctx->temp_reg; 2256 alu.dst.chan = i; 2257 alu.dst.write = 1; 2258 2259 if (i == 3) 2260 alu.last = 1; 2261 r = r600_bc_add_alu(ctx->bc, &alu); 2262 if (r) 2263 return r; 2264 2265 r = r600_bc_add_literal(ctx->bc, ctx->value); 2266 if (r) 2267 return r; 2268 } 2269 2270 for (i = 0; i < 4; i++) { 2271 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2272 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2273 2274 alu.src[0] = r600_src[0]; 2275 switch (i) { 2276 case 0: 2277 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2278 break; 2279 case 1: 2280 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2281 break; 2282 case 2: 2283 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2284 break; 2285 case 3: 2286 alu.src[0].sel = V_SQ_ALU_SRC_0; 2287 alu.src[0].chan = i; 2288 } 2289 2290 alu.src[1] = r600_src[1]; 2291 switch (i) { 2292 case 0: 2293 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2294 break; 2295 case 1: 2296 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2297 break; 2298 case 2: 2299 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2300 break; 2301 case 3: 2302 alu.src[1].sel = V_SQ_ALU_SRC_0; 2303 alu.src[1].chan = i; 2304 } 2305 2306 alu.src[2].sel = ctx->temp_reg; 2307 alu.src[2].neg = 1; 2308 alu.src[2].chan = i; 2309 2310 if (use_temp) 2311 alu.dst.sel = ctx->temp_reg; 2312 else { 2313 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2314 if (r) 2315 return r; 2316 } 2317 alu.dst.chan = i; 2318 alu.dst.write = 1; 2319 alu.is_op3 = 1; 2320 if (i == 3) 2321 alu.last = 1; 2322 r = r600_bc_add_alu(ctx->bc, &alu); 2323 if (r) 2324 return r; 2325 2326 r = r600_bc_add_literal(ctx->bc, ctx->value); 2327 if (r) 2328 return r; 2329 } 2330 if (use_temp) 2331 return tgsi_helper_copy(ctx, inst); 2332 return 0; 2333} 2334 2335static int tgsi_exp(struct r600_shader_ctx *ctx) 2336{ 2337 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2338 struct r600_bc_alu_src r600_src[3] = { { 0 } }; 2339 struct r600_bc_alu alu; 2340 int r; 2341 2342 /* result.x = 2^floor(src); */ 2343 if (inst->Dst[0].Register.WriteMask & 1) { 2344 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2345 2346 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2347 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2348 if (r) 2349 return r; 2350 2351 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2352 2353 alu.dst.sel = ctx->temp_reg; 2354 alu.dst.chan = 0; 2355 alu.dst.write = 1; 2356 alu.last = 1; 2357 r = r600_bc_add_alu(ctx->bc, &alu); 2358 if (r) 2359 return r; 2360 2361 r = r600_bc_add_literal(ctx->bc, ctx->value); 2362 if (r) 2363 return r; 2364 2365 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2366 alu.src[0].sel = ctx->temp_reg; 2367 alu.src[0].chan = 0; 2368 2369 alu.dst.sel = ctx->temp_reg; 2370 alu.dst.chan = 0; 2371 alu.dst.write = 1; 2372 alu.last = 1; 2373 r = r600_bc_add_alu(ctx->bc, &alu); 2374 if (r) 2375 return r; 2376 2377 r = r600_bc_add_literal(ctx->bc, ctx->value); 2378 if (r) 2379 return r; 2380 } 2381 2382 /* result.y = tmp - floor(tmp); */ 2383 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2384 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2385 2386 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2387 alu.src[0] = r600_src[0]; 2388 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2389 if (r) 2390 return r; 2391 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2392 2393 alu.dst.sel = ctx->temp_reg; 2394// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2395// if (r) 2396// return r; 2397 alu.dst.write = 1; 2398 alu.dst.chan = 1; 2399 2400 alu.last = 1; 2401 2402 r = r600_bc_add_alu(ctx->bc, &alu); 2403 if (r) 2404 return r; 2405 r = r600_bc_add_literal(ctx->bc, ctx->value); 2406 if (r) 2407 return r; 2408 } 2409 2410 /* result.z = RoughApprox2ToX(tmp);*/ 2411 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2412 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2413 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2414 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2415 if (r) 2416 return r; 2417 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2418 2419 alu.dst.sel = ctx->temp_reg; 2420 alu.dst.write = 1; 2421 alu.dst.chan = 2; 2422 2423 alu.last = 1; 2424 2425 r = r600_bc_add_alu(ctx->bc, &alu); 2426 if (r) 2427 return r; 2428 r = r600_bc_add_literal(ctx->bc, ctx->value); 2429 if (r) 2430 return r; 2431 } 2432 2433 /* result.w = 1.0;*/ 2434 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2435 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2436 2437 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2438 alu.src[0].sel = V_SQ_ALU_SRC_1; 2439 alu.src[0].chan = 0; 2440 2441 alu.dst.sel = ctx->temp_reg; 2442 alu.dst.chan = 3; 2443 alu.dst.write = 1; 2444 alu.last = 1; 2445 r = r600_bc_add_alu(ctx->bc, &alu); 2446 if (r) 2447 return r; 2448 r = r600_bc_add_literal(ctx->bc, ctx->value); 2449 if (r) 2450 return r; 2451 } 2452 return tgsi_helper_copy(ctx, inst); 2453} 2454 2455static int tgsi_log(struct r600_shader_ctx *ctx) 2456{ 2457 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2458 struct r600_bc_alu alu; 2459 int r; 2460 2461 /* result.x = floor(log2(src)); */ 2462 if (inst->Dst[0].Register.WriteMask & 1) { 2463 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2464 2465 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2466 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2467 if (r) 2468 return r; 2469 2470 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2471 2472 alu.dst.sel = ctx->temp_reg; 2473 alu.dst.chan = 0; 2474 alu.dst.write = 1; 2475 alu.last = 1; 2476 r = r600_bc_add_alu(ctx->bc, &alu); 2477 if (r) 2478 return r; 2479 2480 r = r600_bc_add_literal(ctx->bc, ctx->value); 2481 if (r) 2482 return r; 2483 2484 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2485 alu.src[0].sel = ctx->temp_reg; 2486 alu.src[0].chan = 0; 2487 2488 alu.dst.sel = ctx->temp_reg; 2489 alu.dst.chan = 0; 2490 alu.dst.write = 1; 2491 alu.last = 1; 2492 2493 r = r600_bc_add_alu(ctx->bc, &alu); 2494 if (r) 2495 return r; 2496 2497 r = r600_bc_add_literal(ctx->bc, ctx->value); 2498 if (r) 2499 return r; 2500 } 2501 2502 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2503 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2504 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2505 2506 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2507 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2508 if (r) 2509 return r; 2510 2511 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2512 2513 alu.dst.sel = ctx->temp_reg; 2514 alu.dst.chan = 1; 2515 alu.dst.write = 1; 2516 alu.last = 1; 2517 2518 r = r600_bc_add_alu(ctx->bc, &alu); 2519 if (r) 2520 return r; 2521 2522 r = r600_bc_add_literal(ctx->bc, ctx->value); 2523 if (r) 2524 return r; 2525 2526 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2527 2528 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2529 alu.src[0].sel = ctx->temp_reg; 2530 alu.src[0].chan = 1; 2531 2532 alu.dst.sel = ctx->temp_reg; 2533 alu.dst.chan = 1; 2534 alu.dst.write = 1; 2535 alu.last = 1; 2536 2537 r = r600_bc_add_alu(ctx->bc, &alu); 2538 if (r) 2539 return r; 2540 2541 r = r600_bc_add_literal(ctx->bc, ctx->value); 2542 if (r) 2543 return r; 2544 2545 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2546 2547 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2548 alu.src[0].sel = ctx->temp_reg; 2549 alu.src[0].chan = 1; 2550 2551 alu.dst.sel = ctx->temp_reg; 2552 alu.dst.chan = 1; 2553 alu.dst.write = 1; 2554 alu.last = 1; 2555 2556 r = r600_bc_add_alu(ctx->bc, &alu); 2557 if (r) 2558 return r; 2559 2560 r = r600_bc_add_literal(ctx->bc, ctx->value); 2561 if (r) 2562 return r; 2563 2564 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2565 2566 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2567 alu.src[0].sel = ctx->temp_reg; 2568 alu.src[0].chan = 1; 2569 2570 alu.dst.sel = ctx->temp_reg; 2571 alu.dst.chan = 1; 2572 alu.dst.write = 1; 2573 alu.last = 1; 2574 2575 r = r600_bc_add_alu(ctx->bc, &alu); 2576 if (r) 2577 return r; 2578 2579 r = r600_bc_add_literal(ctx->bc, ctx->value); 2580 if (r) 2581 return r; 2582 2583 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2584 2585 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2586 2587 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2588 if (r) 2589 return r; 2590 2591 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2592 2593 alu.src[1].sel = ctx->temp_reg; 2594 alu.src[1].chan = 1; 2595 2596 alu.dst.sel = ctx->temp_reg; 2597 alu.dst.chan = 1; 2598 alu.dst.write = 1; 2599 alu.last = 1; 2600 2601 r = r600_bc_add_alu(ctx->bc, &alu); 2602 if (r) 2603 return r; 2604 2605 r = r600_bc_add_literal(ctx->bc, ctx->value); 2606 if (r) 2607 return r; 2608 } 2609 2610 /* result.z = log2(src);*/ 2611 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2612 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2613 2614 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2615 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2616 if (r) 2617 return r; 2618 2619 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2620 2621 alu.dst.sel = ctx->temp_reg; 2622 alu.dst.write = 1; 2623 alu.dst.chan = 2; 2624 alu.last = 1; 2625 2626 r = r600_bc_add_alu(ctx->bc, &alu); 2627 if (r) 2628 return r; 2629 2630 r = r600_bc_add_literal(ctx->bc, ctx->value); 2631 if (r) 2632 return r; 2633 } 2634 2635 /* result.w = 1.0; */ 2636 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2637 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2638 2639 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2640 alu.src[0].sel = V_SQ_ALU_SRC_1; 2641 alu.src[0].chan = 0; 2642 2643 alu.dst.sel = ctx->temp_reg; 2644 alu.dst.chan = 3; 2645 alu.dst.write = 1; 2646 alu.last = 1; 2647 2648 r = r600_bc_add_alu(ctx->bc, &alu); 2649 if (r) 2650 return r; 2651 2652 r = r600_bc_add_literal(ctx->bc, ctx->value); 2653 if (r) 2654 return r; 2655 } 2656 2657 return tgsi_helper_copy(ctx, inst); 2658} 2659 2660static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 2661{ 2662 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2663 struct r600_bc_alu alu; 2664 int r; 2665 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2666 2667 switch (inst->Instruction.Opcode) { 2668 case TGSI_OPCODE_ARL: 2669 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR; 2670 break; 2671 case TGSI_OPCODE_ARR: 2672 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT; 2673 break; 2674 default: 2675 assert(0); 2676 return -1; 2677 } 2678 2679 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2680 if (r) 2681 return r; 2682 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2683 alu.last = 1; 2684 alu.dst.chan = 0; 2685 alu.dst.sel = ctx->temp_reg; 2686 alu.dst.write = 1; 2687 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2688 if (r) 2689 return r; 2690 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2691 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT; 2692 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2693 if (r) 2694 return r; 2695 alu.src[0].sel = ctx->temp_reg; 2696 alu.src[0].chan = 0; 2697 alu.last = 1; 2698 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2699 if (r) 2700 return r; 2701 return 0; 2702} 2703static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 2704{ 2705 /* TODO from r600c, ar values don't persist between clauses */ 2706 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2707 struct r600_bc_alu alu; 2708 int r; 2709 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2710 2711 switch (inst->Instruction.Opcode) { 2712 case TGSI_OPCODE_ARL: 2713 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; 2714 break; 2715 case TGSI_OPCODE_ARR: 2716 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA; 2717 break; 2718 default: 2719 assert(0); 2720 return -1; 2721 } 2722 2723 2724 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2725 if (r) 2726 return r; 2727 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2728 2729 alu.last = 1; 2730 2731 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2732 if (r) 2733 return r; 2734 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2735 return 0; 2736} 2737 2738static int tgsi_opdst(struct r600_shader_ctx *ctx) 2739{ 2740 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2741 struct r600_bc_alu alu; 2742 int i, r = 0; 2743 2744 for (i = 0; i < 4; i++) { 2745 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2746 2747 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2748 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2749 if (r) 2750 return r; 2751 2752 if (i == 0 || i == 3) { 2753 alu.src[0].sel = V_SQ_ALU_SRC_1; 2754 } else { 2755 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2756 if (r) 2757 return r; 2758 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2759 } 2760 2761 if (i == 0 || i == 2) { 2762 alu.src[1].sel = V_SQ_ALU_SRC_1; 2763 } else { 2764 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); 2765 if (r) 2766 return r; 2767 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2768 } 2769 if (i == 3) 2770 alu.last = 1; 2771 r = r600_bc_add_alu(ctx->bc, &alu); 2772 if (r) 2773 return r; 2774 } 2775 return 0; 2776} 2777 2778static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2779{ 2780 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2781 struct r600_bc_alu alu; 2782 int r; 2783 2784 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2785 alu.inst = opcode; 2786 alu.predicate = 1; 2787 2788 alu.dst.sel = ctx->temp_reg; 2789 alu.dst.write = 1; 2790 alu.dst.chan = 0; 2791 2792 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2793 if (r) 2794 return r; 2795 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2796 alu.src[1].sel = V_SQ_ALU_SRC_0; 2797 alu.src[1].chan = 0; 2798 2799 alu.last = 1; 2800 2801 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2802 if (r) 2803 return r; 2804 return 0; 2805} 2806 2807static int pops(struct r600_shader_ctx *ctx, int pops) 2808{ 2809 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2810 ctx->bc->cf_last->pop_count = pops; 2811 return 0; 2812} 2813 2814static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2815{ 2816 switch(reason) { 2817 case FC_PUSH_VPM: 2818 ctx->bc->callstack[ctx->bc->call_sp].current--; 2819 break; 2820 case FC_PUSH_WQM: 2821 case FC_LOOP: 2822 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2823 break; 2824 case FC_REP: 2825 /* TOODO : for 16 vp asic should -= 2; */ 2826 ctx->bc->callstack[ctx->bc->call_sp].current --; 2827 break; 2828 } 2829} 2830 2831static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2832{ 2833 if (check_max_only) { 2834 int diff; 2835 switch (reason) { 2836 case FC_PUSH_VPM: 2837 diff = 1; 2838 break; 2839 case FC_PUSH_WQM: 2840 diff = 4; 2841 break; 2842 default: 2843 assert(0); 2844 diff = 0; 2845 } 2846 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2847 ctx->bc->callstack[ctx->bc->call_sp].max) { 2848 ctx->bc->callstack[ctx->bc->call_sp].max = 2849 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2850 } 2851 return; 2852 } 2853 switch (reason) { 2854 case FC_PUSH_VPM: 2855 ctx->bc->callstack[ctx->bc->call_sp].current++; 2856 break; 2857 case FC_PUSH_WQM: 2858 case FC_LOOP: 2859 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2860 break; 2861 case FC_REP: 2862 ctx->bc->callstack[ctx->bc->call_sp].current++; 2863 break; 2864 } 2865 2866 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2867 ctx->bc->callstack[ctx->bc->call_sp].max) { 2868 ctx->bc->callstack[ctx->bc->call_sp].max = 2869 ctx->bc->callstack[ctx->bc->call_sp].current; 2870 } 2871} 2872 2873static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2874{ 2875 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2876 2877 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2878 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2879 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2880 sp->num_mid++; 2881} 2882 2883static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2884{ 2885 ctx->bc->fc_sp++; 2886 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2887 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2888} 2889 2890static void fc_poplevel(struct r600_shader_ctx *ctx) 2891{ 2892 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2893 if (sp->mid) { 2894 free(sp->mid); 2895 sp->mid = NULL; 2896 } 2897 sp->num_mid = 0; 2898 sp->start = NULL; 2899 sp->type = 0; 2900 ctx->bc->fc_sp--; 2901} 2902 2903#if 0 2904static int emit_return(struct r600_shader_ctx *ctx) 2905{ 2906 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2907 return 0; 2908} 2909 2910static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2911{ 2912 2913 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2914 ctx->bc->cf_last->pop_count = pops; 2915 /* TODO work out offset */ 2916 return 0; 2917} 2918 2919static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2920{ 2921 return 0; 2922} 2923 2924static void emit_testflag(struct r600_shader_ctx *ctx) 2925{ 2926 2927} 2928 2929static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2930{ 2931 emit_testflag(ctx); 2932 emit_jump_to_offset(ctx, 1, 4); 2933 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2934 pops(ctx, ifidx + 1); 2935 emit_return(ctx); 2936} 2937 2938static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2939{ 2940 emit_testflag(ctx); 2941 2942 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2943 ctx->bc->cf_last->pop_count = 1; 2944 2945 fc_set_mid(ctx, fc_sp); 2946 2947 pops(ctx, 1); 2948} 2949#endif 2950 2951static int tgsi_if(struct r600_shader_ctx *ctx) 2952{ 2953 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2954 2955 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2956 2957 fc_pushlevel(ctx, FC_IF); 2958 2959 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2960 return 0; 2961} 2962 2963static int tgsi_else(struct r600_shader_ctx *ctx) 2964{ 2965 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2966 ctx->bc->cf_last->pop_count = 1; 2967 2968 fc_set_mid(ctx, ctx->bc->fc_sp); 2969 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2970 return 0; 2971} 2972 2973static int tgsi_endif(struct r600_shader_ctx *ctx) 2974{ 2975 pops(ctx, 1); 2976 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2977 R600_ERR("if/endif unbalanced in shader\n"); 2978 return -1; 2979 } 2980 2981 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2982 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2983 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2984 } else { 2985 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2986 } 2987 fc_poplevel(ctx); 2988 2989 callstack_decrease_current(ctx, FC_PUSH_VPM); 2990 return 0; 2991} 2992 2993static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2994{ 2995 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2996 2997 fc_pushlevel(ctx, FC_LOOP); 2998 2999 /* check stack depth */ 3000 callstack_check_depth(ctx, FC_LOOP, 0); 3001 return 0; 3002} 3003 3004static int tgsi_endloop(struct r600_shader_ctx *ctx) 3005{ 3006 int i; 3007 3008 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 3009 3010 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 3011 R600_ERR("loop/endloop in shader code are not paired.\n"); 3012 return -EINVAL; 3013 } 3014 3015 /* fixup loop pointers - from r600isa 3016 LOOP END points to CF after LOOP START, 3017 LOOP START point to CF after LOOP END 3018 BRK/CONT point to LOOP END CF 3019 */ 3020 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 3021 3022 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 3023 3024 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 3025 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 3026 } 3027 /* TODO add LOOPRET support */ 3028 fc_poplevel(ctx); 3029 callstack_decrease_current(ctx, FC_LOOP); 3030 return 0; 3031} 3032 3033static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 3034{ 3035 unsigned int fscp; 3036 3037 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 3038 { 3039 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 3040 break; 3041 } 3042 3043 if (fscp == 0) { 3044 R600_ERR("Break not inside loop/endloop pair\n"); 3045 return -EINVAL; 3046 } 3047 3048 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 3049 ctx->bc->cf_last->pop_count = 1; 3050 3051 fc_set_mid(ctx, fscp); 3052 3053 pops(ctx, 1); 3054 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 3055 return 0; 3056} 3057 3058static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 3059 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3060 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3061 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3062 3063 /* FIXME: 3064 * For state trackers other than OpenGL, we'll want to use 3065 * _RECIP_IEEE instead. 3066 */ 3067 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 3068 3069 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 3070 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3071 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 3072 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3073 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3074 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3075 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3076 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3077 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3078 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3079 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3080 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3081 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3082 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3083 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3084 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3085 /* gap */ 3086 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3087 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3088 /* gap */ 3089 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3090 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3091 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3092 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3093 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3094 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3095 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3096 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3097 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3098 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3099 /* gap */ 3100 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3101 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3102 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3103 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3104 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3105 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3106 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3107 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3108 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3109 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3110 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3111 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3112 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3113 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3114 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3115 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3116 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3117 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3118 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3119 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3120 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3121 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3122 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3123 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3124 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3125 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3126 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3127 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3128 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3129 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_r600_arl}, 3130 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3131 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3132 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3133 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3134 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3135 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3136 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3137 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3138 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3139 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3140 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3141 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3142 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3143 /* gap */ 3144 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3145 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3146 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3147 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3148 /* gap */ 3149 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3150 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3151 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3152 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3153 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3154 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3155 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3156 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3157 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3158 /* gap */ 3159 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3160 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3161 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3162 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3163 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3164 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3165 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3166 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3167 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3168 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3169 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3170 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3171 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3172 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3173 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3174 /* gap */ 3175 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3176 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3177 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3178 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3179 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3180 /* gap */ 3181 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3182 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3183 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3184 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3185 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3186 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3187 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3188 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3189 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3190 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3191 /* gap */ 3192 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3193 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3194 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3195 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3196 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3197 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3198 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3199 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3200 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3201 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3202 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3203 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3204 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3205 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3206 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3207 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3208 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3209 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3210 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3211 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3212 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3213 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3214 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3215 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3216 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3217 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3218 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3219 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3220}; 3221 3222static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 3223 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3224 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3225 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3226 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3227 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 3228 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3229 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3230 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3231 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3232 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3233 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3234 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3235 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3236 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3237 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3238 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3239 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3240 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3241 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3242 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3243 /* gap */ 3244 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3245 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3246 /* gap */ 3247 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3248 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3249 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3250 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3251 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3252 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3253 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3254 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3255 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3256 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3257 /* gap */ 3258 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3259 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3260 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3261 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3262 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3263 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3264 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3265 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3266 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3267 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3268 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3269 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3270 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3271 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3272 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3273 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3274 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3275 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3276 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3277 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3278 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3279 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3280 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3281 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3282 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3283 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3284 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3285 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3286 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3287 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, 3288 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3289 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3290 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3291 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3292 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3293 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3294 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3295 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3296 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3297 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3298 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3299 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3300 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3301 /* gap */ 3302 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3303 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3304 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3305 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3306 /* gap */ 3307 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3308 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3309 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3310 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3311 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3312 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3313 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3314 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3315 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3316 /* gap */ 3317 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3318 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3319 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3320 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3321 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3322 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3323 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3324 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3325 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3326 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3327 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3328 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3329 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3330 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3331 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3332 /* gap */ 3333 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3334 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3335 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3336 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3337 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3338 /* gap */ 3339 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3340 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3341 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3342 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3343 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3344 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3345 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3346 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3347 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3348 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3349 /* gap */ 3350 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3351 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3352 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3353 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3354 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3355 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3356 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3357 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3358 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3359 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3360 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3361 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3362 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3363 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3364 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3365 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3366 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3367 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3368 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3369 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3370 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3371 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3372 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3373 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3374 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3375 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3376 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3377 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3378}; 3379