r600_shader.c revision 40cc5bfcd70e412289dbb32a1ebca91bf109e1bd
1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23#include "pipe/p_shader_tokens.h" 24#include "tgsi/tgsi_parse.h" 25#include "tgsi/tgsi_scan.h" 26#include "tgsi/tgsi_dump.h" 27#include "util/u_format.h" 28#include "r600_pipe.h" 29#include "r600_asm.h" 30#include "r600_sq.h" 31#include "r600_opcodes.h" 32#include "r600d.h" 33#include <stdio.h> 34#include <errno.h> 35 36static void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) 37{ 38 struct r600_pipe_state *rstate = &shader->rstate; 39 struct r600_shader *rshader = &shader->shader; 40 unsigned spi_vs_out_id[10]; 41 unsigned i, tmp; 42 43 /* clear previous register */ 44 rstate->nregs = 0; 45 46 /* so far never got proper semantic id from tgsi */ 47 for (i = 0; i < 10; i++) { 48 spi_vs_out_id[i] = 0; 49 } 50 for (i = 0; i < 32; i++) { 51 tmp = i << ((i & 3) * 8); 52 spi_vs_out_id[i / 4] |= tmp; 53 } 54 for (i = 0; i < 10; i++) { 55 r600_pipe_state_add_reg(rstate, 56 R_028614_SPI_VS_OUT_ID_0 + i * 4, 57 spi_vs_out_id[i], 0xFFFFFFFF, NULL); 58 } 59 60 r600_pipe_state_add_reg(rstate, 61 R_0286C4_SPI_VS_OUT_CONFIG, 62 S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2), 63 0xFFFFFFFF, NULL); 64 r600_pipe_state_add_reg(rstate, 65 R_028868_SQ_PGM_RESOURCES_VS, 66 S_028868_NUM_GPRS(rshader->bc.ngpr) | 67 S_028868_STACK_SIZE(rshader->bc.nstack), 68 0xFFFFFFFF, NULL); 69 r600_pipe_state_add_reg(rstate, 70 R_0288A4_SQ_PGM_RESOURCES_FS, 71 0x00000000, 0xFFFFFFFF, NULL); 72 r600_pipe_state_add_reg(rstate, 73 R_0288D0_SQ_PGM_CF_OFFSET_VS, 74 0x00000000, 0xFFFFFFFF, NULL); 75 r600_pipe_state_add_reg(rstate, 76 R_0288DC_SQ_PGM_CF_OFFSET_FS, 77 0x00000000, 0xFFFFFFFF, NULL); 78 r600_pipe_state_add_reg(rstate, 79 R_028858_SQ_PGM_START_VS, 80 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 81 r600_pipe_state_add_reg(rstate, 82 R_028894_SQ_PGM_START_FS, 83 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 84 85 r600_pipe_state_add_reg(rstate, 86 R_03E200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, 87 0xFFFFFFFF, NULL); 88 89} 90 91int r600_find_vs_semantic_index(struct r600_shader *vs, 92 struct r600_shader *ps, int id) 93{ 94 struct r600_shader_io *input = &ps->input[id]; 95 96 for (int i = 0; i < vs->noutput; i++) { 97 if (input->name == vs->output[i].name && 98 input->sid == vs->output[i].sid) { 99 return i - 1; 100 } 101 } 102 return 0; 103} 104 105static void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) 106{ 107 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 108 struct r600_pipe_state *rstate = &shader->rstate; 109 struct r600_shader *rshader = &shader->shader; 110 unsigned i, tmp, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z; 111 boolean have_pos = FALSE, have_face = FALSE; 112 113 /* clear previous register */ 114 rstate->nregs = 0; 115 116 for (i = 0; i < rshader->ninput; i++) { 117 tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); 118 tmp |= S_028644_SEL_CENTROID(1); 119 if (rshader->input[i].name == TGSI_SEMANTIC_POSITION) 120 have_pos = TRUE; 121 if (rshader->input[i].name == TGSI_SEMANTIC_COLOR || 122 rshader->input[i].name == TGSI_SEMANTIC_BCOLOR || 123 rshader->input[i].name == TGSI_SEMANTIC_POSITION) { 124 tmp |= S_028644_FLAT_SHADE(rshader->flat_shade); 125 } 126 if (rshader->input[i].name == TGSI_SEMANTIC_FACE) 127 have_face = TRUE; 128 if (rshader->input[i].name == TGSI_SEMANTIC_GENERIC && 129 rctx->sprite_coord_enable & (1 << rshader->input[i].sid)) { 130 tmp |= S_028644_PT_SPRITE_TEX(1); 131 } 132 r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); 133 } 134 for (i = 0; i < rshader->noutput; i++) { 135 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION) 136 r600_pipe_state_add_reg(rstate, 137 R_02880C_DB_SHADER_CONTROL, 138 S_02880C_Z_EXPORT_ENABLE(1), 139 S_02880C_Z_EXPORT_ENABLE(1), NULL); 140 if (rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 141 r600_pipe_state_add_reg(rstate, 142 R_02880C_DB_SHADER_CONTROL, 143 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), 144 S_02880C_STENCIL_REF_EXPORT_ENABLE(1), NULL); 145 } 146 147 exports_ps = 0; 148 num_cout = 0; 149 for (i = 0; i < rshader->noutput; i++) { 150 if (rshader->output[i].name == TGSI_SEMANTIC_POSITION || rshader->output[i].name == TGSI_SEMANTIC_STENCIL) 151 exports_ps |= 1; 152 else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) { 153 num_cout++; 154 } 155 } 156 exports_ps |= S_028854_EXPORT_COLORS(num_cout); 157 if (!exports_ps) { 158 /* always at least export 1 component per pixel */ 159 exports_ps = 2; 160 } 161 162 spi_ps_in_control_0 = S_0286CC_NUM_INTERP(rshader->ninput) | 163 S_0286CC_PERSP_GRADIENT_ENA(1); 164 spi_input_z = 0; 165 if (have_pos) { 166 spi_ps_in_control_0 |= S_0286CC_POSITION_ENA(1) | 167 S_0286CC_BARYC_SAMPLE_CNTL(1); 168 spi_input_z |= 1; 169 } 170 r600_pipe_state_add_reg(rstate, R_0286CC_SPI_PS_IN_CONTROL_0, spi_ps_in_control_0, 0xFFFFFFFF, NULL); 171 r600_pipe_state_add_reg(rstate, R_0286D0_SPI_PS_IN_CONTROL_1, S_0286D0_FRONT_FACE_ENA(have_face), 0xFFFFFFFF, NULL); 172 r600_pipe_state_add_reg(rstate, R_0286D8_SPI_INPUT_Z, spi_input_z, 0xFFFFFFFF, NULL); 173 r600_pipe_state_add_reg(rstate, 174 R_028840_SQ_PGM_START_PS, 175 r600_bo_offset(shader->bo) >> 8, 0xFFFFFFFF, shader->bo); 176 r600_pipe_state_add_reg(rstate, 177 R_028850_SQ_PGM_RESOURCES_PS, 178 S_028868_NUM_GPRS(rshader->bc.ngpr) | 179 S_028868_STACK_SIZE(rshader->bc.nstack), 180 0xFFFFFFFF, NULL); 181 r600_pipe_state_add_reg(rstate, 182 R_028854_SQ_PGM_EXPORTS_PS, 183 exports_ps, 0xFFFFFFFF, NULL); 184 r600_pipe_state_add_reg(rstate, 185 R_0288CC_SQ_PGM_CF_OFFSET_PS, 186 0x00000000, 0xFFFFFFFF, NULL); 187 188 if (rshader->uses_kill) { 189 /* only set some bits here, the other bits are set in the dsa state */ 190 r600_pipe_state_add_reg(rstate, 191 R_02880C_DB_SHADER_CONTROL, 192 S_02880C_KILL_ENABLE(1), 193 S_02880C_KILL_ENABLE(1), NULL); 194 } 195 r600_pipe_state_add_reg(rstate, 196 R_03E200_SQ_LOOP_CONST_0, 0x01000FFF, 197 0xFFFFFFFF, NULL); 198} 199 200static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader) 201{ 202 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 203 struct r600_shader *rshader = &shader->shader; 204 void *ptr; 205 206 /* copy new shader */ 207 if (shader->bo == NULL) { 208 shader->bo = r600_bo(rctx->radeon, rshader->bc.ndw * 4, 4096, 0); 209 if (shader->bo == NULL) { 210 return -ENOMEM; 211 } 212 ptr = r600_bo_map(rctx->radeon, shader->bo, 0, NULL); 213 memcpy(ptr, rshader->bc.bytecode, rshader->bc.ndw * 4); 214 r600_bo_unmap(rctx->radeon, shader->bo); 215 } 216 /* build state */ 217 rshader->flat_shade = rctx->flatshade; 218 switch (rshader->processor_type) { 219 case TGSI_PROCESSOR_VERTEX: 220 if (rshader->family >= CHIP_CEDAR) { 221 evergreen_pipe_shader_vs(ctx, shader); 222 } else { 223 r600_pipe_shader_vs(ctx, shader); 224 } 225 break; 226 case TGSI_PROCESSOR_FRAGMENT: 227 if (rshader->family >= CHIP_CEDAR) { 228 evergreen_pipe_shader_ps(ctx, shader); 229 } else { 230 r600_pipe_shader_ps(ctx, shader); 231 } 232 break; 233 default: 234 return -EINVAL; 235 } 236 r600_context_pipe_state_set(&rctx->ctx, &shader->rstate); 237 return 0; 238} 239 240static int r600_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *rshader) 241{ 242 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 243 struct r600_shader *shader = &rshader->shader; 244 const struct util_format_description *desc; 245 enum pipe_format resource_format[160]; 246 unsigned i, nresources = 0; 247 struct r600_bc *bc = &shader->bc; 248 struct r600_bc_cf *cf; 249 struct r600_bc_vtx *vtx; 250 251 if (shader->processor_type != TGSI_PROCESSOR_VERTEX) 252 return 0; 253 /* doing a full memcmp fell over the refcount */ 254 if ((rshader->vertex_elements.count == rctx->vertex_elements->count) && 255 (!memcmp(&rshader->vertex_elements.elements, &rctx->vertex_elements->elements, 32 * sizeof(struct pipe_vertex_element)))) { 256 return 0; 257 } 258 rshader->vertex_elements = *rctx->vertex_elements; 259 for (i = 0; i < rctx->vertex_elements->count; i++) { 260 resource_format[nresources++] = rctx->vertex_elements->elements[i].src_format; 261 } 262 r600_bo_reference(rctx->radeon, &rshader->bo, NULL); 263 LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { 264 switch (cf->inst) { 265 case V_SQ_CF_WORD1_SQ_CF_INST_VTX: 266 case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC: 267 LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { 268 desc = util_format_description(resource_format[vtx->buffer_id]); 269 if (desc == NULL) { 270 R600_ERR("unknown format %d\n", resource_format[vtx->buffer_id]); 271 return -EINVAL; 272 } 273 vtx->dst_sel_x = desc->swizzle[0]; 274 vtx->dst_sel_y = desc->swizzle[1]; 275 vtx->dst_sel_z = desc->swizzle[2]; 276 vtx->dst_sel_w = desc->swizzle[3]; 277 } 278 break; 279 default: 280 break; 281 } 282 } 283 return r600_bc_build(&shader->bc); 284} 285 286int r600_pipe_shader_update(struct pipe_context *ctx, struct r600_pipe_shader *shader) 287{ 288 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 289 int r; 290 291 if (shader == NULL) 292 return -EINVAL; 293 /* there should be enough input */ 294 if (rctx->vertex_elements->count < shader->shader.bc.nresource) { 295 R600_ERR("%d resources provided, expecting %d\n", 296 rctx->vertex_elements->count, shader->shader.bc.nresource); 297 return -EINVAL; 298 } 299 r = r600_shader_update(ctx, shader); 300 if (r) 301 return r; 302 return r600_pipe_shader(ctx, shader); 303} 304 305int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader); 306int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens) 307{ 308 struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; 309 int r; 310 311//fprintf(stderr, "--------------------------------------------------------------\n"); 312//tgsi_dump(tokens, 0); 313 shader->shader.family = r600_get_family(rctx->radeon); 314 r = r600_shader_from_tgsi(tokens, &shader->shader); 315 if (r) { 316 R600_ERR("translation from TGSI failed !\n"); 317 return r; 318 } 319 r = r600_bc_build(&shader->shader.bc); 320 if (r) { 321 R600_ERR("building bytecode failed !\n"); 322 return r; 323 } 324//fprintf(stderr, "______________________________________________________________\n"); 325 return 0; 326} 327 328/* 329 * tgsi -> r600 shader 330 */ 331struct r600_shader_tgsi_instruction; 332 333struct r600_shader_ctx { 334 struct tgsi_shader_info info; 335 struct tgsi_parse_context parse; 336 const struct tgsi_token *tokens; 337 unsigned type; 338 unsigned file_offset[TGSI_FILE_COUNT]; 339 unsigned temp_reg; 340 struct r600_shader_tgsi_instruction *inst_info; 341 struct r600_bc *bc; 342 struct r600_shader *shader; 343 u32 value[4]; 344 u32 *literals; 345 u32 nliterals; 346 u32 max_driver_temp_used; 347}; 348 349struct r600_shader_tgsi_instruction { 350 unsigned tgsi_opcode; 351 unsigned is_op3; 352 unsigned r600_opcode; 353 int (*process)(struct r600_shader_ctx *ctx); 354}; 355 356static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; 357static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 358 359static int tgsi_is_supported(struct r600_shader_ctx *ctx) 360{ 361 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 362 int j; 363 364 if (i->Instruction.NumDstRegs > 1) { 365 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 366 return -EINVAL; 367 } 368 if (i->Instruction.Predicate) { 369 R600_ERR("predicate unsupported\n"); 370 return -EINVAL; 371 } 372#if 0 373 if (i->Instruction.Label) { 374 R600_ERR("label unsupported\n"); 375 return -EINVAL; 376 } 377#endif 378 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 379 if (i->Src[j].Register.Dimension || 380 i->Src[j].Register.Absolute) { 381 R600_ERR("unsupported src %d (dimension %d|absolute %d)\n", j, 382 i->Src[j].Register.Dimension, 383 i->Src[j].Register.Absolute); 384 return -EINVAL; 385 } 386 } 387 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 388 if (i->Dst[j].Register.Dimension) { 389 R600_ERR("unsupported dst (dimension)\n"); 390 return -EINVAL; 391 } 392 } 393 return 0; 394} 395 396static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int gpr) 397{ 398 int i, r; 399 struct r600_bc_alu alu; 400 401 for (i = 0; i < 8; i++) { 402 memset(&alu, 0, sizeof(struct r600_bc_alu)); 403 404 if (i < 4) 405 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW; 406 else 407 alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY; 408 409 if ((i > 1) && (i < 6)) { 410 alu.dst.sel = ctx->shader->input[gpr].gpr; 411 alu.dst.write = 1; 412 } 413 414 alu.dst.chan = i % 4; 415 alu.src[0].chan = (1 - (i % 2)); 416 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + gpr; 417 418 alu.bank_swizzle_force = SQ_ALU_VEC_210; 419 if ((i % 4) == 3) 420 alu.last = 1; 421 r = r600_bc_add_alu(ctx->bc, &alu); 422 if (r) 423 return r; 424 } 425 return 0; 426} 427 428 429static int tgsi_declaration(struct r600_shader_ctx *ctx) 430{ 431 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 432 struct r600_bc_vtx vtx; 433 unsigned i; 434 int r; 435 436 switch (d->Declaration.File) { 437 case TGSI_FILE_INPUT: 438 i = ctx->shader->ninput++; 439 ctx->shader->input[i].name = d->Semantic.Name; 440 ctx->shader->input[i].sid = d->Semantic.Index; 441 ctx->shader->input[i].interpolate = d->Declaration.Interpolate; 442 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; 443 if (ctx->type == TGSI_PROCESSOR_VERTEX) { 444 /* turn input into fetch */ 445 memset(&vtx, 0, sizeof(struct r600_bc_vtx)); 446 vtx.inst = 0; 447 vtx.fetch_type = 0; 448 vtx.buffer_id = i; 449 /* register containing the index into the buffer */ 450 vtx.src_gpr = 0; 451 vtx.src_sel_x = 0; 452 vtx.mega_fetch_count = 0x1F; 453 vtx.dst_gpr = ctx->shader->input[i].gpr; 454 vtx.dst_sel_x = 0; 455 vtx.dst_sel_y = 1; 456 vtx.dst_sel_z = 2; 457 vtx.dst_sel_w = 3; 458 vtx.use_const_fields = 1; 459 r = r600_bc_add_vtx(ctx->bc, &vtx); 460 if (r) 461 return r; 462 } 463 if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == 2) { 464 /* turn input into interpolate on EG */ 465 evergreen_interp_alu(ctx, i); 466 } 467 break; 468 case TGSI_FILE_OUTPUT: 469 i = ctx->shader->noutput++; 470 ctx->shader->output[i].name = d->Semantic.Name; 471 ctx->shader->output[i].sid = d->Semantic.Index; 472 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i; 473 ctx->shader->output[i].interpolate = d->Declaration.Interpolate; 474 break; 475 case TGSI_FILE_CONSTANT: 476 case TGSI_FILE_TEMPORARY: 477 case TGSI_FILE_SAMPLER: 478 case TGSI_FILE_ADDRESS: 479 break; 480 default: 481 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 482 return -EINVAL; 483 } 484 return 0; 485} 486 487static int r600_get_temp(struct r600_shader_ctx *ctx) 488{ 489 return ctx->temp_reg + ctx->max_driver_temp_used++; 490} 491 492int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader) 493{ 494 struct tgsi_full_immediate *immediate; 495 struct r600_shader_ctx ctx; 496 struct r600_bc_output output[32]; 497 unsigned output_done, noutput; 498 unsigned opcode; 499 int i, r = 0, pos0; 500 501 ctx.bc = &shader->bc; 502 ctx.shader = shader; 503 r = r600_bc_init(ctx.bc, shader->family); 504 if (r) 505 return r; 506 ctx.tokens = tokens; 507 tgsi_scan_shader(tokens, &ctx.info); 508 tgsi_parse_init(&ctx.parse, tokens); 509 ctx.type = ctx.parse.FullHeader.Processor.Processor; 510 shader->processor_type = ctx.type; 511 512 /* register allocations */ 513 /* Values [0,127] correspond to GPR[0..127]. 514 * Values [128,159] correspond to constant buffer bank 0 515 * Values [160,191] correspond to constant buffer bank 1 516 * Values [256,511] correspond to cfile constants c[0..255]. 517 * Other special values are shown in the list below. 518 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 519 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 520 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 521 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 522 * 248 SQ_ALU_SRC_0: special constant 0.0. 523 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 524 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 525 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 526 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 527 * 253 SQ_ALU_SRC_LITERAL: literal constant. 528 * 254 SQ_ALU_SRC_PV: previous vector result. 529 * 255 SQ_ALU_SRC_PS: previous scalar result. 530 */ 531 for (i = 0; i < TGSI_FILE_COUNT; i++) { 532 ctx.file_offset[i] = 0; 533 } 534 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 535 ctx.file_offset[TGSI_FILE_INPUT] = 1; 536 } 537 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == 2) { 538 ctx.file_offset[TGSI_FILE_INPUT] = 1; 539 } 540 ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + 541 ctx.info.file_count[TGSI_FILE_INPUT]; 542 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 543 ctx.info.file_count[TGSI_FILE_OUTPUT]; 544 545 ctx.file_offset[TGSI_FILE_CONSTANT] = 128; 546 547 ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; 548 ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 549 ctx.info.file_count[TGSI_FILE_TEMPORARY]; 550 551 ctx.nliterals = 0; 552 ctx.literals = NULL; 553 554 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 555 tgsi_parse_token(&ctx.parse); 556 switch (ctx.parse.FullToken.Token.Type) { 557 case TGSI_TOKEN_TYPE_IMMEDIATE: 558 immediate = &ctx.parse.FullToken.FullImmediate; 559 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 560 if(ctx.literals == NULL) { 561 r = -ENOMEM; 562 goto out_err; 563 } 564 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 565 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 566 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 567 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 568 ctx.nliterals++; 569 break; 570 case TGSI_TOKEN_TYPE_DECLARATION: 571 r = tgsi_declaration(&ctx); 572 if (r) 573 goto out_err; 574 break; 575 case TGSI_TOKEN_TYPE_INSTRUCTION: 576 r = tgsi_is_supported(&ctx); 577 if (r) 578 goto out_err; 579 ctx.max_driver_temp_used = 0; 580 /* reserve first tmp for everyone */ 581 r600_get_temp(&ctx); 582 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 583 if (ctx.bc->chiprev == 2) 584 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 585 else 586 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 587 r = ctx.inst_info->process(&ctx); 588 if (r) 589 goto out_err; 590 r = r600_bc_add_literal(ctx.bc, ctx.value); 591 if (r) 592 goto out_err; 593 break; 594 default: 595 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 596 r = -EINVAL; 597 goto out_err; 598 } 599 } 600 /* export output */ 601 noutput = shader->noutput; 602 for (i = 0, pos0 = 0; i < noutput; i++) { 603 memset(&output[i], 0, sizeof(struct r600_bc_output)); 604 output[i].gpr = shader->output[i].gpr; 605 output[i].elem_size = 3; 606 output[i].swizzle_x = 0; 607 output[i].swizzle_y = 1; 608 output[i].swizzle_z = 2; 609 output[i].swizzle_w = 3; 610 output[i].barrier = 1; 611 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 612 output[i].array_base = i - pos0; 613 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 614 switch (ctx.type) { 615 case TGSI_PROCESSOR_VERTEX: 616 if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 617 output[i].array_base = 60; 618 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 619 /* position doesn't count in array_base */ 620 pos0++; 621 } 622 if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) { 623 output[i].array_base = 61; 624 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 625 /* position doesn't count in array_base */ 626 pos0++; 627 } 628 break; 629 case TGSI_PROCESSOR_FRAGMENT: 630 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 631 output[i].array_base = shader->output[i].sid; 632 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 633 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 634 output[i].array_base = 61; 635 output[i].swizzle_x = 2; 636 output[i].swizzle_y = 7; 637 output[i].swizzle_z = output[i].swizzle_w = 7; 638 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 639 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 640 output[i].array_base = 61; 641 output[i].swizzle_x = 7; 642 output[i].swizzle_y = 1; 643 output[i].swizzle_z = output[i].swizzle_w = 7; 644 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 645 } else { 646 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 647 r = -EINVAL; 648 goto out_err; 649 } 650 break; 651 default: 652 R600_ERR("unsupported processor type %d\n", ctx.type); 653 r = -EINVAL; 654 goto out_err; 655 } 656 } 657 /* add fake param output for vertex shader if no param is exported */ 658 if (ctx.type == TGSI_PROCESSOR_VERTEX) { 659 for (i = 0, pos0 = 0; i < noutput; i++) { 660 if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) { 661 pos0 = 1; 662 break; 663 } 664 } 665 if (!pos0) { 666 memset(&output[i], 0, sizeof(struct r600_bc_output)); 667 output[i].gpr = 0; 668 output[i].elem_size = 3; 669 output[i].swizzle_x = 0; 670 output[i].swizzle_y = 1; 671 output[i].swizzle_z = 2; 672 output[i].swizzle_w = 3; 673 output[i].barrier = 1; 674 output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 675 output[i].array_base = 0; 676 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 677 noutput++; 678 } 679 } 680 /* add fake pixel export */ 681 if (ctx.type == TGSI_PROCESSOR_FRAGMENT && !noutput) { 682 memset(&output[0], 0, sizeof(struct r600_bc_output)); 683 output[0].gpr = 0; 684 output[0].elem_size = 3; 685 output[0].swizzle_x = 7; 686 output[0].swizzle_y = 7; 687 output[0].swizzle_z = 7; 688 output[0].swizzle_w = 7; 689 output[0].barrier = 1; 690 output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 691 output[0].array_base = 0; 692 output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT); 693 noutput++; 694 } 695 /* set export done on last export of each type */ 696 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 697 if (i == (noutput - 1)) { 698 output[i].end_of_program = 1; 699 } 700 if (!(output_done & (1 << output[i].type))) { 701 output_done |= (1 << output[i].type); 702 output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE); 703 } 704 } 705 /* add output to bytecode */ 706 for (i = 0; i < noutput; i++) { 707 r = r600_bc_add_output(ctx.bc, &output[i]); 708 if (r) 709 goto out_err; 710 } 711 free(ctx.literals); 712 tgsi_parse_free(&ctx.parse); 713 return 0; 714out_err: 715 free(ctx.literals); 716 tgsi_parse_free(&ctx.parse); 717 return r; 718} 719 720static int tgsi_unsupported(struct r600_shader_ctx *ctx) 721{ 722 R600_ERR("%d tgsi opcode unsupported\n", ctx->inst_info->tgsi_opcode); 723 return -EINVAL; 724} 725 726static int tgsi_end(struct r600_shader_ctx *ctx) 727{ 728 return 0; 729} 730 731static int tgsi_src(struct r600_shader_ctx *ctx, 732 const struct tgsi_full_src_register *tgsi_src, 733 struct r600_bc_alu_src *r600_src) 734{ 735 int index; 736 memset(r600_src, 0, sizeof(struct r600_bc_alu_src)); 737 r600_src->sel = tgsi_src->Register.Index; 738 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 739 r600_src->sel = 0; 740 index = tgsi_src->Register.Index; 741 ctx->value[0] = ctx->literals[index * 4 + 0]; 742 ctx->value[1] = ctx->literals[index * 4 + 1]; 743 ctx->value[2] = ctx->literals[index * 4 + 2]; 744 ctx->value[3] = ctx->literals[index * 4 + 3]; 745 } 746 if (tgsi_src->Register.Indirect) 747 r600_src->rel = V_SQ_REL_RELATIVE; 748 r600_src->neg = tgsi_src->Register.Negate; 749 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 750 return 0; 751} 752 753static int tgsi_dst(struct r600_shader_ctx *ctx, 754 const struct tgsi_full_dst_register *tgsi_dst, 755 unsigned swizzle, 756 struct r600_bc_alu_dst *r600_dst) 757{ 758 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 759 760 r600_dst->sel = tgsi_dst->Register.Index; 761 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 762 r600_dst->chan = swizzle; 763 r600_dst->write = 1; 764 if (tgsi_dst->Register.Indirect) 765 r600_dst->rel = V_SQ_REL_RELATIVE; 766 if (inst->Instruction.Saturate) { 767 r600_dst->clamp = 1; 768 } 769 return 0; 770} 771 772static unsigned tgsi_chan(const struct tgsi_full_src_register *tgsi_src, unsigned swizzle) 773{ 774 switch (swizzle) { 775 case 0: 776 return tgsi_src->Register.SwizzleX; 777 case 1: 778 return tgsi_src->Register.SwizzleY; 779 case 2: 780 return tgsi_src->Register.SwizzleZ; 781 case 3: 782 return tgsi_src->Register.SwizzleW; 783 default: 784 return 0; 785 } 786} 787 788static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 789{ 790 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 791 struct r600_bc_alu alu; 792 int i, j, k, nconst, r; 793 794 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 795 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 796 nconst++; 797 } 798 r = tgsi_src(ctx, &inst->Src[i], &r600_src[i]); 799 if (r) { 800 return r; 801 } 802 } 803 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 804 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 805 int treg = r600_get_temp(ctx); 806 for (k = 0; k < 4; k++) { 807 memset(&alu, 0, sizeof(struct r600_bc_alu)); 808 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 809 alu.src[0].sel = r600_src[i].sel; 810 alu.src[0].chan = k; 811 alu.src[0].rel = r600_src[i].rel; 812 alu.dst.sel = treg; 813 alu.dst.chan = k; 814 alu.dst.write = 1; 815 if (k == 3) 816 alu.last = 1; 817 r = r600_bc_add_alu(ctx->bc, &alu); 818 if (r) 819 return r; 820 } 821 r600_src[i].sel = treg; 822 r600_src[i].rel =0; 823 j--; 824 } 825 } 826 return 0; 827} 828 829/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 830static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3]) 831{ 832 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 833 struct r600_bc_alu alu; 834 int i, j, k, nliteral, r; 835 836 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 837 if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 838 nliteral++; 839 } 840 } 841 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 842 if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) { 843 int treg = r600_get_temp(ctx); 844 for (k = 0; k < 4; k++) { 845 memset(&alu, 0, sizeof(struct r600_bc_alu)); 846 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 847 alu.src[0].sel = r600_src[i].sel; 848 alu.src[0].chan = k; 849 alu.dst.sel = treg; 850 alu.dst.chan = k; 851 alu.dst.write = 1; 852 if (k == 3) 853 alu.last = 1; 854 r = r600_bc_add_alu(ctx->bc, &alu); 855 if (r) 856 return r; 857 } 858 r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]); 859 if (r) 860 return r; 861 r600_src[i].sel = treg; 862 j--; 863 } 864 } 865 return 0; 866} 867 868static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap) 869{ 870 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 871 struct r600_bc_alu_src r600_src[3]; 872 struct r600_bc_alu alu; 873 int i, j, r; 874 int lasti = 0; 875 876 for (i = 0; i < 4; i++) { 877 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 878 lasti = i; 879 } 880 } 881 882 r = tgsi_split_constant(ctx, r600_src); 883 if (r) 884 return r; 885 r = tgsi_split_literal_constant(ctx, r600_src); 886 if (r) 887 return r; 888 for (i = 0; i < lasti + 1; i++) { 889 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 890 continue; 891 892 memset(&alu, 0, sizeof(struct r600_bc_alu)); 893 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 894 if (r) 895 return r; 896 897 alu.inst = ctx->inst_info->r600_opcode; 898 if (!swap) { 899 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 900 alu.src[j] = r600_src[j]; 901 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 902 } 903 } else { 904 alu.src[0] = r600_src[1]; 905 alu.src[0].chan = tgsi_chan(&inst->Src[1], i); 906 907 alu.src[1] = r600_src[0]; 908 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 909 } 910 /* handle some special cases */ 911 switch (ctx->inst_info->tgsi_opcode) { 912 case TGSI_OPCODE_SUB: 913 alu.src[1].neg = 1; 914 break; 915 case TGSI_OPCODE_ABS: 916 alu.src[0].abs = 1; 917 break; 918 default: 919 break; 920 } 921 if (i == lasti) { 922 alu.last = 1; 923 } 924 r = r600_bc_add_alu(ctx->bc, &alu); 925 if (r) 926 return r; 927 } 928 return 0; 929} 930 931static int tgsi_op2(struct r600_shader_ctx *ctx) 932{ 933 return tgsi_op2_s(ctx, 0); 934} 935 936static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 937{ 938 return tgsi_op2_s(ctx, 1); 939} 940 941/* 942 * r600 - trunc to -PI..PI range 943 * r700 - normalize by dividing by 2PI 944 * see fdo bug 27901 945 */ 946static int tgsi_setup_trig(struct r600_shader_ctx *ctx, 947 struct r600_bc_alu_src r600_src[3]) 948{ 949 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 950 int r; 951 uint32_t lit_vals[4]; 952 struct r600_bc_alu alu; 953 954 memset(lit_vals, 0, 4*4); 955 r = tgsi_split_constant(ctx, r600_src); 956 if (r) 957 return r; 958 r = tgsi_split_literal_constant(ctx, r600_src); 959 if (r) 960 return r; 961 962 r = tgsi_split_literal_constant(ctx, r600_src); 963 if (r) 964 return r; 965 966 lit_vals[0] = fui(1.0 /(3.1415926535 * 2)); 967 lit_vals[1] = fui(0.5f); 968 969 memset(&alu, 0, sizeof(struct r600_bc_alu)); 970 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 971 alu.is_op3 = 1; 972 973 alu.dst.chan = 0; 974 alu.dst.sel = ctx->temp_reg; 975 alu.dst.write = 1; 976 977 alu.src[0] = r600_src[0]; 978 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 979 980 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 981 alu.src[1].chan = 0; 982 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 983 alu.src[2].chan = 1; 984 alu.last = 1; 985 r = r600_bc_add_alu(ctx->bc, &alu); 986 if (r) 987 return r; 988 r = r600_bc_add_literal(ctx->bc, lit_vals); 989 if (r) 990 return r; 991 992 memset(&alu, 0, sizeof(struct r600_bc_alu)); 993 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 994 995 alu.dst.chan = 0; 996 alu.dst.sel = ctx->temp_reg; 997 alu.dst.write = 1; 998 999 alu.src[0].sel = ctx->temp_reg; 1000 alu.src[0].chan = 0; 1001 alu.last = 1; 1002 r = r600_bc_add_alu(ctx->bc, &alu); 1003 if (r) 1004 return r; 1005 1006 if (ctx->bc->chiprev == 0) { 1007 lit_vals[0] = fui(3.1415926535897f * 2.0f); 1008 lit_vals[1] = fui(-3.1415926535897f); 1009 } else { 1010 lit_vals[0] = fui(1.0f); 1011 lit_vals[1] = fui(-0.5f); 1012 } 1013 1014 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1015 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1016 alu.is_op3 = 1; 1017 1018 alu.dst.chan = 0; 1019 alu.dst.sel = ctx->temp_reg; 1020 alu.dst.write = 1; 1021 1022 alu.src[0].sel = ctx->temp_reg; 1023 alu.src[0].chan = 0; 1024 1025 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1026 alu.src[1].chan = 0; 1027 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1028 alu.src[2].chan = 1; 1029 alu.last = 1; 1030 r = r600_bc_add_alu(ctx->bc, &alu); 1031 if (r) 1032 return r; 1033 r = r600_bc_add_literal(ctx->bc, lit_vals); 1034 if (r) 1035 return r; 1036 return 0; 1037} 1038 1039static int tgsi_trig(struct r600_shader_ctx *ctx) 1040{ 1041 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1042 struct r600_bc_alu_src r600_src[3]; 1043 struct r600_bc_alu alu; 1044 int i, r; 1045 int lasti = 0; 1046 1047 r = tgsi_setup_trig(ctx, r600_src); 1048 if (r) 1049 return r; 1050 1051 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1052 alu.inst = ctx->inst_info->r600_opcode; 1053 alu.dst.chan = 0; 1054 alu.dst.sel = ctx->temp_reg; 1055 alu.dst.write = 1; 1056 1057 alu.src[0].sel = ctx->temp_reg; 1058 alu.src[0].chan = 0; 1059 alu.last = 1; 1060 r = r600_bc_add_alu(ctx->bc, &alu); 1061 if (r) 1062 return r; 1063 1064 /* replicate result */ 1065 for (i = 0; i < 4; i++) { 1066 if (inst->Dst[0].Register.WriteMask & (1 << i)) 1067 lasti = i; 1068 } 1069 for (i = 0; i < lasti + 1; i++) { 1070 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 1071 continue; 1072 1073 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1074 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1075 1076 alu.src[0].sel = ctx->temp_reg; 1077 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1078 if (r) 1079 return r; 1080 if (i == lasti) 1081 alu.last = 1; 1082 r = r600_bc_add_alu(ctx->bc, &alu); 1083 if (r) 1084 return r; 1085 } 1086 return 0; 1087} 1088 1089static int tgsi_scs(struct r600_shader_ctx *ctx) 1090{ 1091 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1092 struct r600_bc_alu_src r600_src[3]; 1093 struct r600_bc_alu alu; 1094 int r; 1095 1096 /* We'll only need the trig stuff if we are going to write to the 1097 * X or Y components of the destination vector. 1098 */ 1099 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 1100 r = tgsi_setup_trig(ctx, r600_src); 1101 if (r) 1102 return r; 1103 } 1104 1105 /* dst.x = COS */ 1106 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 1107 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1108 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); 1109 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1110 if (r) 1111 return r; 1112 1113 alu.src[0].sel = ctx->temp_reg; 1114 alu.src[0].chan = 0; 1115 alu.last = 1; 1116 r = r600_bc_add_alu(ctx->bc, &alu); 1117 if (r) 1118 return r; 1119 } 1120 1121 /* dst.y = SIN */ 1122 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 1123 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1124 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); 1125 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1126 if (r) 1127 return r; 1128 1129 alu.src[0].sel = ctx->temp_reg; 1130 alu.src[0].chan = 0; 1131 alu.last = 1; 1132 r = r600_bc_add_alu(ctx->bc, &alu); 1133 if (r) 1134 return r; 1135 } 1136 1137 /* dst.z = 0.0; */ 1138 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 1139 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1140 1141 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1142 1143 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1144 if (r) 1145 return r; 1146 1147 alu.src[0].sel = V_SQ_ALU_SRC_0; 1148 alu.src[0].chan = 0; 1149 1150 alu.last = 1; 1151 1152 r = r600_bc_add_alu(ctx->bc, &alu); 1153 if (r) 1154 return r; 1155 1156 r = r600_bc_add_literal(ctx->bc, ctx->value); 1157 if (r) 1158 return r; 1159 } 1160 1161 /* dst.w = 1.0; */ 1162 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 1163 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1164 1165 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1166 1167 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1168 if (r) 1169 return r; 1170 1171 alu.src[0].sel = V_SQ_ALU_SRC_1; 1172 alu.src[0].chan = 0; 1173 1174 alu.last = 1; 1175 1176 r = r600_bc_add_alu(ctx->bc, &alu); 1177 if (r) 1178 return r; 1179 1180 r = r600_bc_add_literal(ctx->bc, ctx->value); 1181 if (r) 1182 return r; 1183 } 1184 1185 return 0; 1186} 1187 1188static int tgsi_kill(struct r600_shader_ctx *ctx) 1189{ 1190 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1191 struct r600_bc_alu alu; 1192 int i, r; 1193 1194 for (i = 0; i < 4; i++) { 1195 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1196 alu.inst = ctx->inst_info->r600_opcode; 1197 1198 alu.dst.chan = i; 1199 1200 alu.src[0].sel = V_SQ_ALU_SRC_0; 1201 1202 if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_KILP) { 1203 alu.src[1].sel = V_SQ_ALU_SRC_1; 1204 alu.src[1].neg = 1; 1205 } else { 1206 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1207 if (r) 1208 return r; 1209 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1210 } 1211 if (i == 3) { 1212 alu.last = 1; 1213 } 1214 r = r600_bc_add_alu(ctx->bc, &alu); 1215 if (r) 1216 return r; 1217 } 1218 r = r600_bc_add_literal(ctx->bc, ctx->value); 1219 if (r) 1220 return r; 1221 1222 /* kill must be last in ALU */ 1223 ctx->bc->force_add_cf = 1; 1224 ctx->shader->uses_kill = TRUE; 1225 return 0; 1226} 1227 1228static int tgsi_lit(struct r600_shader_ctx *ctx) 1229{ 1230 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1231 struct r600_bc_alu alu; 1232 struct r600_bc_alu_src r600_src[3]; 1233 int r; 1234 1235 r = tgsi_split_constant(ctx, r600_src); 1236 if (r) 1237 return r; 1238 r = tgsi_split_literal_constant(ctx, r600_src); 1239 if (r) 1240 return r; 1241 1242 /* dst.x, <- 1.0 */ 1243 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1244 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1245 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 1246 alu.src[0].chan = 0; 1247 r = tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 1248 if (r) 1249 return r; 1250 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 1251 r = r600_bc_add_alu(ctx->bc, &alu); 1252 if (r) 1253 return r; 1254 1255 /* dst.y = max(src.x, 0.0) */ 1256 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1257 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX); 1258 alu.src[0] = r600_src[0]; 1259 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 1260 alu.src[1].chan = 0; 1261 r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 1262 if (r) 1263 return r; 1264 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 1265 r = r600_bc_add_alu(ctx->bc, &alu); 1266 if (r) 1267 return r; 1268 1269 /* dst.w, <- 1.0 */ 1270 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1271 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1272 alu.src[0].sel = V_SQ_ALU_SRC_1; 1273 alu.src[0].chan = 0; 1274 r = tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 1275 if (r) 1276 return r; 1277 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 1278 alu.last = 1; 1279 r = r600_bc_add_alu(ctx->bc, &alu); 1280 if (r) 1281 return r; 1282 1283 r = r600_bc_add_literal(ctx->bc, ctx->value); 1284 if (r) 1285 return r; 1286 1287 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 1288 { 1289 int chan; 1290 int sel; 1291 1292 /* dst.z = log(src.y) */ 1293 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1294 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); 1295 alu.src[0] = r600_src[0]; 1296 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 1297 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1298 if (r) 1299 return r; 1300 alu.last = 1; 1301 r = r600_bc_add_alu(ctx->bc, &alu); 1302 if (r) 1303 return r; 1304 1305 r = r600_bc_add_literal(ctx->bc, ctx->value); 1306 if (r) 1307 return r; 1308 1309 chan = alu.dst.chan; 1310 sel = alu.dst.sel; 1311 1312 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ 1313 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1314 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT); 1315 alu.src[0] = r600_src[0]; 1316 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1317 alu.src[1].sel = sel; 1318 alu.src[1].chan = chan; 1319 1320 alu.src[2] = r600_src[0]; 1321 alu.src[2].chan = tgsi_chan(&inst->Src[0], 0); 1322 alu.dst.sel = ctx->temp_reg; 1323 alu.dst.chan = 0; 1324 alu.dst.write = 1; 1325 alu.is_op3 = 1; 1326 alu.last = 1; 1327 r = r600_bc_add_alu(ctx->bc, &alu); 1328 if (r) 1329 return r; 1330 1331 r = r600_bc_add_literal(ctx->bc, ctx->value); 1332 if (r) 1333 return r; 1334 /* dst.z = exp(tmp.x) */ 1335 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1336 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1337 alu.src[0].sel = ctx->temp_reg; 1338 alu.src[0].chan = 0; 1339 r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 1340 if (r) 1341 return r; 1342 alu.last = 1; 1343 r = r600_bc_add_alu(ctx->bc, &alu); 1344 if (r) 1345 return r; 1346 } 1347 return 0; 1348} 1349 1350static int tgsi_rsq(struct r600_shader_ctx *ctx) 1351{ 1352 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1353 struct r600_bc_alu alu; 1354 int i, r; 1355 1356 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1357 1358 /* FIXME: 1359 * For state trackers other than OpenGL, we'll want to use 1360 * _RECIPSQRT_IEEE instead. 1361 */ 1362 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED); 1363 1364 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1365 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1366 if (r) 1367 return r; 1368 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1369 alu.src[i].abs = 1; 1370 } 1371 alu.dst.sel = ctx->temp_reg; 1372 alu.dst.write = 1; 1373 alu.last = 1; 1374 r = r600_bc_add_alu(ctx->bc, &alu); 1375 if (r) 1376 return r; 1377 r = r600_bc_add_literal(ctx->bc, ctx->value); 1378 if (r) 1379 return r; 1380 /* replicate result */ 1381 return tgsi_helper_tempx_replicate(ctx); 1382} 1383 1384static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 1385{ 1386 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1387 struct r600_bc_alu alu; 1388 int i, r; 1389 1390 for (i = 0; i < 4; i++) { 1391 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1392 alu.src[0].sel = ctx->temp_reg; 1393 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1394 alu.dst.chan = i; 1395 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1396 if (r) 1397 return r; 1398 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 1399 if (i == 3) 1400 alu.last = 1; 1401 r = r600_bc_add_alu(ctx->bc, &alu); 1402 if (r) 1403 return r; 1404 } 1405 return 0; 1406} 1407 1408static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 1409{ 1410 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1411 struct r600_bc_alu alu; 1412 int i, r; 1413 1414 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1415 alu.inst = ctx->inst_info->r600_opcode; 1416 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1417 r = tgsi_src(ctx, &inst->Src[i], &alu.src[i]); 1418 if (r) 1419 return r; 1420 alu.src[i].chan = tgsi_chan(&inst->Src[i], 0); 1421 } 1422 alu.dst.sel = ctx->temp_reg; 1423 alu.dst.write = 1; 1424 alu.last = 1; 1425 r = r600_bc_add_alu(ctx->bc, &alu); 1426 if (r) 1427 return r; 1428 r = r600_bc_add_literal(ctx->bc, ctx->value); 1429 if (r) 1430 return r; 1431 /* replicate result */ 1432 return tgsi_helper_tempx_replicate(ctx); 1433} 1434 1435static int tgsi_pow(struct r600_shader_ctx *ctx) 1436{ 1437 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1438 struct r600_bc_alu alu; 1439 int r; 1440 1441 /* LOG2(a) */ 1442 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1443 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 1444 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1445 if (r) 1446 return r; 1447 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 1448 alu.dst.sel = ctx->temp_reg; 1449 alu.dst.write = 1; 1450 alu.last = 1; 1451 r = r600_bc_add_alu(ctx->bc, &alu); 1452 if (r) 1453 return r; 1454 r = r600_bc_add_literal(ctx->bc,ctx->value); 1455 if (r) 1456 return r; 1457 /* b * LOG2(a) */ 1458 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1459 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE); 1460 r = tgsi_src(ctx, &inst->Src[1], &alu.src[0]); 1461 if (r) 1462 return r; 1463 alu.src[0].chan = tgsi_chan(&inst->Src[1], 0); 1464 alu.src[1].sel = ctx->temp_reg; 1465 alu.dst.sel = ctx->temp_reg; 1466 alu.dst.write = 1; 1467 alu.last = 1; 1468 r = r600_bc_add_alu(ctx->bc, &alu); 1469 if (r) 1470 return r; 1471 r = r600_bc_add_literal(ctx->bc,ctx->value); 1472 if (r) 1473 return r; 1474 /* POW(a,b) = EXP2(b * LOG2(a))*/ 1475 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1476 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 1477 alu.src[0].sel = ctx->temp_reg; 1478 alu.dst.sel = ctx->temp_reg; 1479 alu.dst.write = 1; 1480 alu.last = 1; 1481 r = r600_bc_add_alu(ctx->bc, &alu); 1482 if (r) 1483 return r; 1484 r = r600_bc_add_literal(ctx->bc,ctx->value); 1485 if (r) 1486 return r; 1487 return tgsi_helper_tempx_replicate(ctx); 1488} 1489 1490static int tgsi_ssg(struct r600_shader_ctx *ctx) 1491{ 1492 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1493 struct r600_bc_alu alu; 1494 struct r600_bc_alu_src r600_src[3]; 1495 int i, r; 1496 1497 r = tgsi_split_constant(ctx, r600_src); 1498 if (r) 1499 return r; 1500 r = tgsi_split_literal_constant(ctx, r600_src); 1501 if (r) 1502 return r; 1503 1504 /* tmp = (src > 0 ? 1 : src) */ 1505 for (i = 0; i < 4; i++) { 1506 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1507 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1508 alu.is_op3 = 1; 1509 1510 alu.dst.sel = ctx->temp_reg; 1511 alu.dst.chan = i; 1512 1513 alu.src[0] = r600_src[0]; 1514 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1515 1516 alu.src[1].sel = V_SQ_ALU_SRC_1; 1517 1518 alu.src[2] = r600_src[0]; 1519 alu.src[2].chan = tgsi_chan(&inst->Src[0], i); 1520 if (i == 3) 1521 alu.last = 1; 1522 r = r600_bc_add_alu(ctx->bc, &alu); 1523 if (r) 1524 return r; 1525 } 1526 r = r600_bc_add_literal(ctx->bc, ctx->value); 1527 if (r) 1528 return r; 1529 1530 /* dst = (-tmp > 0 ? -1 : tmp) */ 1531 for (i = 0; i < 4; i++) { 1532 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1533 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT); 1534 alu.is_op3 = 1; 1535 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1536 if (r) 1537 return r; 1538 1539 alu.src[0].sel = ctx->temp_reg; 1540 alu.src[0].chan = i; 1541 alu.src[0].neg = 1; 1542 1543 alu.src[1].sel = V_SQ_ALU_SRC_1; 1544 alu.src[1].neg = 1; 1545 1546 alu.src[2].sel = ctx->temp_reg; 1547 alu.src[2].chan = i; 1548 1549 if (i == 3) 1550 alu.last = 1; 1551 r = r600_bc_add_alu(ctx->bc, &alu); 1552 if (r) 1553 return r; 1554 } 1555 return 0; 1556} 1557 1558static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 1559{ 1560 struct r600_bc_alu alu; 1561 int i, r; 1562 1563 r = r600_bc_add_literal(ctx->bc, ctx->value); 1564 if (r) 1565 return r; 1566 for (i = 0; i < 4; i++) { 1567 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1568 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 1569 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP); 1570 alu.dst.chan = i; 1571 } else { 1572 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1573 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 1574 if (r) 1575 return r; 1576 alu.src[0].sel = ctx->temp_reg; 1577 alu.src[0].chan = i; 1578 } 1579 if (i == 3) { 1580 alu.last = 1; 1581 } 1582 r = r600_bc_add_alu(ctx->bc, &alu); 1583 if (r) 1584 return r; 1585 } 1586 return 0; 1587} 1588 1589static int tgsi_op3(struct r600_shader_ctx *ctx) 1590{ 1591 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1592 struct r600_bc_alu_src r600_src[3]; 1593 struct r600_bc_alu alu; 1594 int i, j, r; 1595 1596 r = tgsi_split_constant(ctx, r600_src); 1597 if (r) 1598 return r; 1599 r = tgsi_split_literal_constant(ctx, r600_src); 1600 if (r) 1601 return r; 1602 /* do it in 2 step as op3 doesn't support writemask */ 1603 for (i = 0; i < 4; i++) { 1604 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1605 alu.inst = ctx->inst_info->r600_opcode; 1606 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1607 alu.src[j] = r600_src[j]; 1608 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1609 } 1610 alu.dst.sel = ctx->temp_reg; 1611 alu.dst.chan = i; 1612 alu.dst.write = 1; 1613 alu.is_op3 = 1; 1614 if (i == 3) { 1615 alu.last = 1; 1616 } 1617 r = r600_bc_add_alu(ctx->bc, &alu); 1618 if (r) 1619 return r; 1620 } 1621 return tgsi_helper_copy(ctx, inst); 1622} 1623 1624static int tgsi_dp(struct r600_shader_ctx *ctx) 1625{ 1626 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1627 struct r600_bc_alu_src r600_src[3]; 1628 struct r600_bc_alu alu; 1629 int i, j, r; 1630 1631 r = tgsi_split_constant(ctx, r600_src); 1632 if (r) 1633 return r; 1634 r = tgsi_split_literal_constant(ctx, r600_src); 1635 if (r) 1636 return r; 1637 for (i = 0; i < 4; i++) { 1638 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1639 alu.inst = ctx->inst_info->r600_opcode; 1640 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 1641 alu.src[j] = r600_src[j]; 1642 alu.src[j].chan = tgsi_chan(&inst->Src[j], i); 1643 } 1644 alu.dst.sel = ctx->temp_reg; 1645 alu.dst.chan = i; 1646 alu.dst.write = 1; 1647 /* handle some special cases */ 1648 switch (ctx->inst_info->tgsi_opcode) { 1649 case TGSI_OPCODE_DP2: 1650 if (i > 1) { 1651 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1652 alu.src[0].chan = alu.src[1].chan = 0; 1653 } 1654 break; 1655 case TGSI_OPCODE_DP3: 1656 if (i > 2) { 1657 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 1658 alu.src[0].chan = alu.src[1].chan = 0; 1659 } 1660 break; 1661 case TGSI_OPCODE_DPH: 1662 if (i == 3) { 1663 alu.src[0].sel = V_SQ_ALU_SRC_1; 1664 alu.src[0].chan = 0; 1665 alu.src[0].neg = 0; 1666 } 1667 break; 1668 default: 1669 break; 1670 } 1671 if (i == 3) { 1672 alu.last = 1; 1673 } 1674 r = r600_bc_add_alu(ctx->bc, &alu); 1675 if (r) 1676 return r; 1677 } 1678 return tgsi_helper_copy(ctx, inst); 1679} 1680 1681static int tgsi_tex(struct r600_shader_ctx *ctx) 1682{ 1683 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1684 struct r600_bc_tex tex; 1685 struct r600_bc_alu alu; 1686 unsigned src_gpr; 1687 int r, i; 1688 int opcode; 1689 boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY; 1690 uint32_t lit_vals[4]; 1691 1692 src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; 1693 1694 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 1695 /* Add perspective divide */ 1696 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1697 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1698 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1699 if (r) 1700 return r; 1701 1702 alu.src[0].chan = tgsi_chan(&inst->Src[0], 3); 1703 alu.dst.sel = ctx->temp_reg; 1704 alu.dst.chan = 3; 1705 alu.last = 1; 1706 alu.dst.write = 1; 1707 r = r600_bc_add_alu(ctx->bc, &alu); 1708 if (r) 1709 return r; 1710 1711 for (i = 0; i < 3; i++) { 1712 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1713 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1714 alu.src[0].sel = ctx->temp_reg; 1715 alu.src[0].chan = 3; 1716 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1717 if (r) 1718 return r; 1719 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1720 alu.dst.sel = ctx->temp_reg; 1721 alu.dst.chan = i; 1722 alu.dst.write = 1; 1723 r = r600_bc_add_alu(ctx->bc, &alu); 1724 if (r) 1725 return r; 1726 } 1727 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1728 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1729 alu.src[0].sel = V_SQ_ALU_SRC_1; 1730 alu.src[0].chan = 0; 1731 alu.dst.sel = ctx->temp_reg; 1732 alu.dst.chan = 3; 1733 alu.last = 1; 1734 alu.dst.write = 1; 1735 r = r600_bc_add_alu(ctx->bc, &alu); 1736 if (r) 1737 return r; 1738 src_not_temp = FALSE; 1739 src_gpr = ctx->temp_reg; 1740 } 1741 1742 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1743 int src_chan, src2_chan; 1744 1745 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 1746 for (i = 0; i < 4; i++) { 1747 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1748 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE); 1749 switch (i) { 1750 case 0: 1751 src_chan = 2; 1752 src2_chan = 1; 1753 break; 1754 case 1: 1755 src_chan = 2; 1756 src2_chan = 0; 1757 break; 1758 case 2: 1759 src_chan = 0; 1760 src2_chan = 2; 1761 break; 1762 case 3: 1763 src_chan = 1; 1764 src2_chan = 2; 1765 break; 1766 default: 1767 assert(0); 1768 src_chan = 0; 1769 src2_chan = 0; 1770 break; 1771 } 1772 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 1773 if (r) 1774 return r; 1775 alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan); 1776 r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]); 1777 if (r) 1778 return r; 1779 alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan); 1780 alu.dst.sel = ctx->temp_reg; 1781 alu.dst.chan = i; 1782 if (i == 3) 1783 alu.last = 1; 1784 alu.dst.write = 1; 1785 r = r600_bc_add_alu(ctx->bc, &alu); 1786 if (r) 1787 return r; 1788 } 1789 1790 /* tmp1.z = RCP_e(|tmp1.z|) */ 1791 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1792 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 1793 alu.src[0].sel = ctx->temp_reg; 1794 alu.src[0].chan = 2; 1795 alu.src[0].abs = 1; 1796 alu.dst.sel = ctx->temp_reg; 1797 alu.dst.chan = 2; 1798 alu.dst.write = 1; 1799 alu.last = 1; 1800 r = r600_bc_add_alu(ctx->bc, &alu); 1801 if (r) 1802 return r; 1803 1804 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 1805 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 1806 * muladd has no writemask, have to use another temp 1807 */ 1808 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1809 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1810 alu.is_op3 = 1; 1811 1812 alu.src[0].sel = ctx->temp_reg; 1813 alu.src[0].chan = 0; 1814 alu.src[1].sel = ctx->temp_reg; 1815 alu.src[1].chan = 2; 1816 1817 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1818 alu.src[2].chan = 0; 1819 1820 alu.dst.sel = ctx->temp_reg; 1821 alu.dst.chan = 0; 1822 alu.dst.write = 1; 1823 1824 r = r600_bc_add_alu(ctx->bc, &alu); 1825 if (r) 1826 return r; 1827 1828 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1829 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1830 alu.is_op3 = 1; 1831 1832 alu.src[0].sel = ctx->temp_reg; 1833 alu.src[0].chan = 1; 1834 alu.src[1].sel = ctx->temp_reg; 1835 alu.src[1].chan = 2; 1836 1837 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 1838 alu.src[2].chan = 0; 1839 1840 alu.dst.sel = ctx->temp_reg; 1841 alu.dst.chan = 1; 1842 alu.dst.write = 1; 1843 1844 alu.last = 1; 1845 r = r600_bc_add_alu(ctx->bc, &alu); 1846 if (r) 1847 return r; 1848 1849 lit_vals[0] = fui(1.5f); 1850 1851 r = r600_bc_add_literal(ctx->bc, lit_vals); 1852 if (r) 1853 return r; 1854 src_not_temp = FALSE; 1855 src_gpr = ctx->temp_reg; 1856 } 1857 1858 if (src_not_temp) { 1859 for (i = 0; i < 4; i++) { 1860 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1861 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 1862 alu.src[0].sel = src_gpr; 1863 alu.src[0].chan = i; 1864 alu.dst.sel = ctx->temp_reg; 1865 alu.dst.chan = i; 1866 if (i == 3) 1867 alu.last = 1; 1868 alu.dst.write = 1; 1869 r = r600_bc_add_alu(ctx->bc, &alu); 1870 if (r) 1871 return r; 1872 } 1873 src_gpr = ctx->temp_reg; 1874 } 1875 1876 opcode = ctx->inst_info->r600_opcode; 1877 if (opcode == SQ_TEX_INST_SAMPLE && 1878 (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)) 1879 opcode = SQ_TEX_INST_SAMPLE_C; 1880 1881 memset(&tex, 0, sizeof(struct r600_bc_tex)); 1882 tex.inst = opcode; 1883 tex.sampler_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index; 1884 tex.resource_id = tex.sampler_id; 1885 if (ctx->shader->processor_type == TGSI_PROCESSOR_VERTEX) 1886 tex.resource_id += PIPE_MAX_ATTRIBS; 1887 tex.src_gpr = src_gpr; 1888 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 1889 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 1890 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 1891 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 1892 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 1893 tex.src_sel_x = 0; 1894 tex.src_sel_y = 1; 1895 tex.src_sel_z = 2; 1896 tex.src_sel_w = 3; 1897 1898 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) { 1899 tex.src_sel_x = 1; 1900 tex.src_sel_y = 0; 1901 tex.src_sel_z = 3; 1902 tex.src_sel_w = 1; 1903 } 1904 1905 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 1906 tex.coord_type_x = 1; 1907 tex.coord_type_y = 1; 1908 tex.coord_type_z = 1; 1909 tex.coord_type_w = 1; 1910 } 1911 1912 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D) 1913 tex.src_sel_w = 2; 1914 1915 r = r600_bc_add_tex(ctx->bc, &tex); 1916 if (r) 1917 return r; 1918 1919 /* add shadow ambient support - gallium doesn't do it yet */ 1920 return 0; 1921 1922} 1923 1924static int tgsi_lrp(struct r600_shader_ctx *ctx) 1925{ 1926 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1927 struct r600_bc_alu_src r600_src[3]; 1928 struct r600_bc_alu alu; 1929 unsigned i; 1930 int r; 1931 1932 r = tgsi_split_constant(ctx, r600_src); 1933 if (r) 1934 return r; 1935 r = tgsi_split_literal_constant(ctx, r600_src); 1936 if (r) 1937 return r; 1938 /* 1 - src0 */ 1939 for (i = 0; i < 4; i++) { 1940 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1941 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD); 1942 alu.src[0].sel = V_SQ_ALU_SRC_1; 1943 alu.src[0].chan = 0; 1944 alu.src[1] = r600_src[0]; 1945 alu.src[1].chan = tgsi_chan(&inst->Src[0], i); 1946 alu.src[1].neg = 1; 1947 alu.dst.sel = ctx->temp_reg; 1948 alu.dst.chan = i; 1949 if (i == 3) { 1950 alu.last = 1; 1951 } 1952 alu.dst.write = 1; 1953 r = r600_bc_add_alu(ctx->bc, &alu); 1954 if (r) 1955 return r; 1956 } 1957 r = r600_bc_add_literal(ctx->bc, ctx->value); 1958 if (r) 1959 return r; 1960 1961 /* (1 - src0) * src2 */ 1962 for (i = 0; i < 4; i++) { 1963 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1964 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 1965 alu.src[0].sel = ctx->temp_reg; 1966 alu.src[0].chan = i; 1967 alu.src[1] = r600_src[2]; 1968 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 1969 alu.dst.sel = ctx->temp_reg; 1970 alu.dst.chan = i; 1971 if (i == 3) { 1972 alu.last = 1; 1973 } 1974 alu.dst.write = 1; 1975 r = r600_bc_add_alu(ctx->bc, &alu); 1976 if (r) 1977 return r; 1978 } 1979 r = r600_bc_add_literal(ctx->bc, ctx->value); 1980 if (r) 1981 return r; 1982 1983 /* src0 * src1 + (1 - src0) * src2 */ 1984 for (i = 0; i < 4; i++) { 1985 memset(&alu, 0, sizeof(struct r600_bc_alu)); 1986 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 1987 alu.is_op3 = 1; 1988 alu.src[0] = r600_src[0]; 1989 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 1990 alu.src[1] = r600_src[1]; 1991 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 1992 alu.src[2].sel = ctx->temp_reg; 1993 alu.src[2].chan = i; 1994 alu.dst.sel = ctx->temp_reg; 1995 alu.dst.chan = i; 1996 if (i == 3) { 1997 alu.last = 1; 1998 } 1999 r = r600_bc_add_alu(ctx->bc, &alu); 2000 if (r) 2001 return r; 2002 } 2003 return tgsi_helper_copy(ctx, inst); 2004} 2005 2006static int tgsi_cmp(struct r600_shader_ctx *ctx) 2007{ 2008 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2009 struct r600_bc_alu_src r600_src[3]; 2010 struct r600_bc_alu alu; 2011 int use_temp = 0; 2012 int i, r; 2013 2014 r = tgsi_split_constant(ctx, r600_src); 2015 if (r) 2016 return r; 2017 r = tgsi_split_literal_constant(ctx, r600_src); 2018 if (r) 2019 return r; 2020 2021 if (inst->Dst[0].Register.WriteMask != 0xf) 2022 use_temp = 1; 2023 2024 for (i = 0; i < 4; i++) { 2025 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2026 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE); 2027 alu.src[0] = r600_src[0]; 2028 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2029 2030 alu.src[1] = r600_src[2]; 2031 alu.src[1].chan = tgsi_chan(&inst->Src[2], i); 2032 2033 alu.src[2] = r600_src[1]; 2034 alu.src[2].chan = tgsi_chan(&inst->Src[1], i); 2035 2036 if (use_temp) 2037 alu.dst.sel = ctx->temp_reg; 2038 else { 2039 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2040 if (r) 2041 return r; 2042 } 2043 alu.dst.chan = i; 2044 alu.dst.write = 1; 2045 alu.is_op3 = 1; 2046 if (i == 3) 2047 alu.last = 1; 2048 r = r600_bc_add_alu(ctx->bc, &alu); 2049 if (r) 2050 return r; 2051 } 2052 if (use_temp) 2053 return tgsi_helper_copy(ctx, inst); 2054 return 0; 2055} 2056 2057static int tgsi_xpd(struct r600_shader_ctx *ctx) 2058{ 2059 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2060 struct r600_bc_alu_src r600_src[3]; 2061 struct r600_bc_alu alu; 2062 uint32_t use_temp = 0; 2063 int i, r; 2064 2065 if (inst->Dst[0].Register.WriteMask != 0xf) 2066 use_temp = 1; 2067 2068 r = tgsi_split_constant(ctx, r600_src); 2069 if (r) 2070 return r; 2071 r = tgsi_split_literal_constant(ctx, r600_src); 2072 if (r) 2073 return r; 2074 2075 for (i = 0; i < 4; i++) { 2076 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2077 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2078 2079 alu.src[0] = r600_src[0]; 2080 switch (i) { 2081 case 0: 2082 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2083 break; 2084 case 1: 2085 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2086 break; 2087 case 2: 2088 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2089 break; 2090 case 3: 2091 alu.src[0].sel = V_SQ_ALU_SRC_0; 2092 alu.src[0].chan = i; 2093 } 2094 2095 alu.src[1] = r600_src[1]; 2096 switch (i) { 2097 case 0: 2098 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2099 break; 2100 case 1: 2101 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2102 break; 2103 case 2: 2104 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2105 break; 2106 case 3: 2107 alu.src[1].sel = V_SQ_ALU_SRC_0; 2108 alu.src[1].chan = i; 2109 } 2110 2111 alu.dst.sel = ctx->temp_reg; 2112 alu.dst.chan = i; 2113 alu.dst.write = 1; 2114 2115 if (i == 3) 2116 alu.last = 1; 2117 r = r600_bc_add_alu(ctx->bc, &alu); 2118 if (r) 2119 return r; 2120 2121 r = r600_bc_add_literal(ctx->bc, ctx->value); 2122 if (r) 2123 return r; 2124 } 2125 2126 for (i = 0; i < 4; i++) { 2127 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2128 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD); 2129 2130 alu.src[0] = r600_src[0]; 2131 switch (i) { 2132 case 0: 2133 alu.src[0].chan = tgsi_chan(&inst->Src[0], 1); 2134 break; 2135 case 1: 2136 alu.src[0].chan = tgsi_chan(&inst->Src[0], 2); 2137 break; 2138 case 2: 2139 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2140 break; 2141 case 3: 2142 alu.src[0].sel = V_SQ_ALU_SRC_0; 2143 alu.src[0].chan = i; 2144 } 2145 2146 alu.src[1] = r600_src[1]; 2147 switch (i) { 2148 case 0: 2149 alu.src[1].chan = tgsi_chan(&inst->Src[1], 2); 2150 break; 2151 case 1: 2152 alu.src[1].chan = tgsi_chan(&inst->Src[1], 0); 2153 break; 2154 case 2: 2155 alu.src[1].chan = tgsi_chan(&inst->Src[1], 1); 2156 break; 2157 case 3: 2158 alu.src[1].sel = V_SQ_ALU_SRC_0; 2159 alu.src[1].chan = i; 2160 } 2161 2162 alu.src[2].sel = ctx->temp_reg; 2163 alu.src[2].neg = 1; 2164 alu.src[2].chan = i; 2165 2166 if (use_temp) 2167 alu.dst.sel = ctx->temp_reg; 2168 else { 2169 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2170 if (r) 2171 return r; 2172 } 2173 alu.dst.chan = i; 2174 alu.dst.write = 1; 2175 alu.is_op3 = 1; 2176 if (i == 3) 2177 alu.last = 1; 2178 r = r600_bc_add_alu(ctx->bc, &alu); 2179 if (r) 2180 return r; 2181 2182 r = r600_bc_add_literal(ctx->bc, ctx->value); 2183 if (r) 2184 return r; 2185 } 2186 if (use_temp) 2187 return tgsi_helper_copy(ctx, inst); 2188 return 0; 2189} 2190 2191static int tgsi_exp(struct r600_shader_ctx *ctx) 2192{ 2193 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2194 struct r600_bc_alu_src r600_src[3]; 2195 struct r600_bc_alu alu; 2196 int r; 2197 2198 /* result.x = 2^floor(src); */ 2199 if (inst->Dst[0].Register.WriteMask & 1) { 2200 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2201 2202 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2203 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2204 if (r) 2205 return r; 2206 2207 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2208 2209 alu.dst.sel = ctx->temp_reg; 2210 alu.dst.chan = 0; 2211 alu.dst.write = 1; 2212 alu.last = 1; 2213 r = r600_bc_add_alu(ctx->bc, &alu); 2214 if (r) 2215 return r; 2216 2217 r = r600_bc_add_literal(ctx->bc, ctx->value); 2218 if (r) 2219 return r; 2220 2221 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2222 alu.src[0].sel = ctx->temp_reg; 2223 alu.src[0].chan = 0; 2224 2225 alu.dst.sel = ctx->temp_reg; 2226 alu.dst.chan = 0; 2227 alu.dst.write = 1; 2228 alu.last = 1; 2229 r = r600_bc_add_alu(ctx->bc, &alu); 2230 if (r) 2231 return r; 2232 2233 r = r600_bc_add_literal(ctx->bc, ctx->value); 2234 if (r) 2235 return r; 2236 } 2237 2238 /* result.y = tmp - floor(tmp); */ 2239 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2240 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2241 2242 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT); 2243 alu.src[0] = r600_src[0]; 2244 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2245 if (r) 2246 return r; 2247 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2248 2249 alu.dst.sel = ctx->temp_reg; 2250// r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2251// if (r) 2252// return r; 2253 alu.dst.write = 1; 2254 alu.dst.chan = 1; 2255 2256 alu.last = 1; 2257 2258 r = r600_bc_add_alu(ctx->bc, &alu); 2259 if (r) 2260 return r; 2261 r = r600_bc_add_literal(ctx->bc, ctx->value); 2262 if (r) 2263 return r; 2264 } 2265 2266 /* result.z = RoughApprox2ToX(tmp);*/ 2267 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 2268 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2269 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2270 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2271 if (r) 2272 return r; 2273 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2274 2275 alu.dst.sel = ctx->temp_reg; 2276 alu.dst.write = 1; 2277 alu.dst.chan = 2; 2278 2279 alu.last = 1; 2280 2281 r = r600_bc_add_alu(ctx->bc, &alu); 2282 if (r) 2283 return r; 2284 r = r600_bc_add_literal(ctx->bc, ctx->value); 2285 if (r) 2286 return r; 2287 } 2288 2289 /* result.w = 1.0;*/ 2290 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 2291 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2292 2293 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2294 alu.src[0].sel = V_SQ_ALU_SRC_1; 2295 alu.src[0].chan = 0; 2296 2297 alu.dst.sel = ctx->temp_reg; 2298 alu.dst.chan = 3; 2299 alu.dst.write = 1; 2300 alu.last = 1; 2301 r = r600_bc_add_alu(ctx->bc, &alu); 2302 if (r) 2303 return r; 2304 r = r600_bc_add_literal(ctx->bc, ctx->value); 2305 if (r) 2306 return r; 2307 } 2308 return tgsi_helper_copy(ctx, inst); 2309} 2310 2311static int tgsi_log(struct r600_shader_ctx *ctx) 2312{ 2313 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2314 struct r600_bc_alu alu; 2315 int r; 2316 2317 /* result.x = floor(log2(src)); */ 2318 if (inst->Dst[0].Register.WriteMask & 1) { 2319 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2320 2321 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2322 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2323 if (r) 2324 return r; 2325 2326 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2327 2328 alu.dst.sel = ctx->temp_reg; 2329 alu.dst.chan = 0; 2330 alu.dst.write = 1; 2331 alu.last = 1; 2332 r = r600_bc_add_alu(ctx->bc, &alu); 2333 if (r) 2334 return r; 2335 2336 r = r600_bc_add_literal(ctx->bc, ctx->value); 2337 if (r) 2338 return r; 2339 2340 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2341 alu.src[0].sel = ctx->temp_reg; 2342 alu.src[0].chan = 0; 2343 2344 alu.dst.sel = ctx->temp_reg; 2345 alu.dst.chan = 0; 2346 alu.dst.write = 1; 2347 alu.last = 1; 2348 2349 r = r600_bc_add_alu(ctx->bc, &alu); 2350 if (r) 2351 return r; 2352 2353 r = r600_bc_add_literal(ctx->bc, ctx->value); 2354 if (r) 2355 return r; 2356 } 2357 2358 /* result.y = src.x / (2 ^ floor(log2(src.x))); */ 2359 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 2360 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2361 2362 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2363 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2364 if (r) 2365 return r; 2366 2367 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2368 2369 alu.dst.sel = ctx->temp_reg; 2370 alu.dst.chan = 1; 2371 alu.dst.write = 1; 2372 alu.last = 1; 2373 2374 r = r600_bc_add_alu(ctx->bc, &alu); 2375 if (r) 2376 return r; 2377 2378 r = r600_bc_add_literal(ctx->bc, ctx->value); 2379 if (r) 2380 return r; 2381 2382 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2383 2384 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); 2385 alu.src[0].sel = ctx->temp_reg; 2386 alu.src[0].chan = 1; 2387 2388 alu.dst.sel = ctx->temp_reg; 2389 alu.dst.chan = 1; 2390 alu.dst.write = 1; 2391 alu.last = 1; 2392 2393 r = r600_bc_add_alu(ctx->bc, &alu); 2394 if (r) 2395 return r; 2396 2397 r = r600_bc_add_literal(ctx->bc, ctx->value); 2398 if (r) 2399 return r; 2400 2401 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2402 2403 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); 2404 alu.src[0].sel = ctx->temp_reg; 2405 alu.src[0].chan = 1; 2406 2407 alu.dst.sel = ctx->temp_reg; 2408 alu.dst.chan = 1; 2409 alu.dst.write = 1; 2410 alu.last = 1; 2411 2412 r = r600_bc_add_alu(ctx->bc, &alu); 2413 if (r) 2414 return r; 2415 2416 r = r600_bc_add_literal(ctx->bc, ctx->value); 2417 if (r) 2418 return r; 2419 2420 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2421 2422 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); 2423 alu.src[0].sel = ctx->temp_reg; 2424 alu.src[0].chan = 1; 2425 2426 alu.dst.sel = ctx->temp_reg; 2427 alu.dst.chan = 1; 2428 alu.dst.write = 1; 2429 alu.last = 1; 2430 2431 r = r600_bc_add_alu(ctx->bc, &alu); 2432 if (r) 2433 return r; 2434 2435 r = r600_bc_add_literal(ctx->bc, ctx->value); 2436 if (r) 2437 return r; 2438 2439 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2440 2441 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2442 2443 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2444 if (r) 2445 return r; 2446 2447 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2448 2449 alu.src[1].sel = ctx->temp_reg; 2450 alu.src[1].chan = 1; 2451 2452 alu.dst.sel = ctx->temp_reg; 2453 alu.dst.chan = 1; 2454 alu.dst.write = 1; 2455 alu.last = 1; 2456 2457 r = r600_bc_add_alu(ctx->bc, &alu); 2458 if (r) 2459 return r; 2460 2461 r = r600_bc_add_literal(ctx->bc, ctx->value); 2462 if (r) 2463 return r; 2464 } 2465 2466 /* result.z = log2(src);*/ 2467 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 2468 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2469 2470 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); 2471 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2472 if (r) 2473 return r; 2474 2475 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2476 2477 alu.dst.sel = ctx->temp_reg; 2478 alu.dst.write = 1; 2479 alu.dst.chan = 2; 2480 alu.last = 1; 2481 2482 r = r600_bc_add_alu(ctx->bc, &alu); 2483 if (r) 2484 return r; 2485 2486 r = r600_bc_add_literal(ctx->bc, ctx->value); 2487 if (r) 2488 return r; 2489 } 2490 2491 /* result.w = 1.0; */ 2492 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 2493 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2494 2495 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV); 2496 alu.src[0].sel = V_SQ_ALU_SRC_1; 2497 alu.src[0].chan = 0; 2498 2499 alu.dst.sel = ctx->temp_reg; 2500 alu.dst.chan = 3; 2501 alu.dst.write = 1; 2502 alu.last = 1; 2503 2504 r = r600_bc_add_alu(ctx->bc, &alu); 2505 if (r) 2506 return r; 2507 2508 r = r600_bc_add_literal(ctx->bc, ctx->value); 2509 if (r) 2510 return r; 2511 } 2512 2513 return tgsi_helper_copy(ctx, inst); 2514} 2515 2516/* r6/7 only for now */ 2517static int tgsi_arl(struct r600_shader_ctx *ctx) 2518{ 2519 /* TODO from r600c, ar values don't persist between clauses */ 2520 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2521 struct r600_bc_alu alu; 2522 int r; 2523 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2524 2525 alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR; 2526 2527 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2528 if (r) 2529 return r; 2530 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2531 2532 alu.last = 1; 2533 2534 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); 2535 if (r) 2536 return r; 2537 ctx->bc->cf_last->r6xx_uses_waterfall = 1; 2538 return 0; 2539} 2540 2541static int tgsi_opdst(struct r600_shader_ctx *ctx) 2542{ 2543 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2544 struct r600_bc_alu alu; 2545 int i, r = 0; 2546 2547 for (i = 0; i < 4; i++) { 2548 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2549 2550 alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); 2551 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 2552 if (r) 2553 return r; 2554 2555 if (i == 0 || i == 3) { 2556 alu.src[0].sel = V_SQ_ALU_SRC_1; 2557 } else { 2558 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2559 if (r) 2560 return r; 2561 alu.src[0].chan = tgsi_chan(&inst->Src[0], i); 2562 } 2563 2564 if (i == 0 || i == 2) { 2565 alu.src[1].sel = V_SQ_ALU_SRC_1; 2566 } else { 2567 r = tgsi_src(ctx, &inst->Src[1], &alu.src[1]); 2568 if (r) 2569 return r; 2570 alu.src[1].chan = tgsi_chan(&inst->Src[1], i); 2571 } 2572 if (i == 3) 2573 alu.last = 1; 2574 r = r600_bc_add_alu(ctx->bc, &alu); 2575 if (r) 2576 return r; 2577 } 2578 return 0; 2579} 2580 2581static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) 2582{ 2583 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2584 struct r600_bc_alu alu; 2585 int r; 2586 2587 memset(&alu, 0, sizeof(struct r600_bc_alu)); 2588 alu.inst = opcode; 2589 alu.predicate = 1; 2590 2591 alu.dst.sel = ctx->temp_reg; 2592 alu.dst.write = 1; 2593 alu.dst.chan = 0; 2594 2595 r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); 2596 if (r) 2597 return r; 2598 alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); 2599 alu.src[1].sel = V_SQ_ALU_SRC_0; 2600 alu.src[1].chan = 0; 2601 2602 alu.last = 1; 2603 2604 r = r600_bc_add_alu_type(ctx->bc, &alu, CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)); 2605 if (r) 2606 return r; 2607 return 0; 2608} 2609 2610static int pops(struct r600_shader_ctx *ctx, int pops) 2611{ 2612 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP)); 2613 ctx->bc->cf_last->pop_count = pops; 2614 return 0; 2615} 2616 2617static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason) 2618{ 2619 switch(reason) { 2620 case FC_PUSH_VPM: 2621 ctx->bc->callstack[ctx->bc->call_sp].current--; 2622 break; 2623 case FC_PUSH_WQM: 2624 case FC_LOOP: 2625 ctx->bc->callstack[ctx->bc->call_sp].current -= 4; 2626 break; 2627 case FC_REP: 2628 /* TOODO : for 16 vp asic should -= 2; */ 2629 ctx->bc->callstack[ctx->bc->call_sp].current --; 2630 break; 2631 } 2632} 2633 2634static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only) 2635{ 2636 if (check_max_only) { 2637 int diff; 2638 switch (reason) { 2639 case FC_PUSH_VPM: 2640 diff = 1; 2641 break; 2642 case FC_PUSH_WQM: 2643 diff = 4; 2644 break; 2645 default: 2646 assert(0); 2647 diff = 0; 2648 } 2649 if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) > 2650 ctx->bc->callstack[ctx->bc->call_sp].max) { 2651 ctx->bc->callstack[ctx->bc->call_sp].max = 2652 ctx->bc->callstack[ctx->bc->call_sp].current + diff; 2653 } 2654 return; 2655 } 2656 switch (reason) { 2657 case FC_PUSH_VPM: 2658 ctx->bc->callstack[ctx->bc->call_sp].current++; 2659 break; 2660 case FC_PUSH_WQM: 2661 case FC_LOOP: 2662 ctx->bc->callstack[ctx->bc->call_sp].current += 4; 2663 break; 2664 case FC_REP: 2665 ctx->bc->callstack[ctx->bc->call_sp].current++; 2666 break; 2667 } 2668 2669 if ((ctx->bc->callstack[ctx->bc->call_sp].current) > 2670 ctx->bc->callstack[ctx->bc->call_sp].max) { 2671 ctx->bc->callstack[ctx->bc->call_sp].max = 2672 ctx->bc->callstack[ctx->bc->call_sp].current; 2673 } 2674} 2675 2676static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 2677{ 2678 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 2679 2680 sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid, 2681 sizeof(struct r600_bc_cf *) * (sp->num_mid + 1)); 2682 sp->mid[sp->num_mid] = ctx->bc->cf_last; 2683 sp->num_mid++; 2684} 2685 2686static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 2687{ 2688 ctx->bc->fc_sp++; 2689 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 2690 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 2691} 2692 2693static void fc_poplevel(struct r600_shader_ctx *ctx) 2694{ 2695 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 2696 if (sp->mid) { 2697 free(sp->mid); 2698 sp->mid = NULL; 2699 } 2700 sp->num_mid = 0; 2701 sp->start = NULL; 2702 sp->type = 0; 2703 ctx->bc->fc_sp--; 2704} 2705 2706#if 0 2707static int emit_return(struct r600_shader_ctx *ctx) 2708{ 2709 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN); 2710 return 0; 2711} 2712 2713static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 2714{ 2715 2716 r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); 2717 ctx->bc->cf_last->pop_count = pops; 2718 /* TODO work out offset */ 2719 return 0; 2720} 2721 2722static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 2723{ 2724 return 0; 2725} 2726 2727static void emit_testflag(struct r600_shader_ctx *ctx) 2728{ 2729 2730} 2731 2732static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 2733{ 2734 emit_testflag(ctx); 2735 emit_jump_to_offset(ctx, 1, 4); 2736 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 2737 pops(ctx, ifidx + 1); 2738 emit_return(ctx); 2739} 2740 2741static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 2742{ 2743 emit_testflag(ctx); 2744 2745 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2746 ctx->bc->cf_last->pop_count = 1; 2747 2748 fc_set_mid(ctx, fc_sp); 2749 2750 pops(ctx, 1); 2751} 2752#endif 2753 2754static int tgsi_if(struct r600_shader_ctx *ctx) 2755{ 2756 emit_logic_pred(ctx, CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE)); 2757 2758 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP)); 2759 2760 fc_pushlevel(ctx, FC_IF); 2761 2762 callstack_check_depth(ctx, FC_PUSH_VPM, 0); 2763 return 0; 2764} 2765 2766static int tgsi_else(struct r600_shader_ctx *ctx) 2767{ 2768 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_ELSE)); 2769 ctx->bc->cf_last->pop_count = 1; 2770 2771 fc_set_mid(ctx, ctx->bc->fc_sp); 2772 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 2773 return 0; 2774} 2775 2776static int tgsi_endif(struct r600_shader_ctx *ctx) 2777{ 2778 pops(ctx, 1); 2779 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 2780 R600_ERR("if/endif unbalanced in shader\n"); 2781 return -1; 2782 } 2783 2784 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 2785 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2786 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 2787 } else { 2788 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 2789 } 2790 fc_poplevel(ctx); 2791 2792 callstack_decrease_current(ctx, FC_PUSH_VPM); 2793 return 0; 2794} 2795 2796static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 2797{ 2798 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); 2799 2800 fc_pushlevel(ctx, FC_LOOP); 2801 2802 /* check stack depth */ 2803 callstack_check_depth(ctx, FC_LOOP, 0); 2804 return 0; 2805} 2806 2807static int tgsi_endloop(struct r600_shader_ctx *ctx) 2808{ 2809 int i; 2810 2811 r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END)); 2812 2813 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 2814 R600_ERR("loop/endloop in shader code are not paired.\n"); 2815 return -EINVAL; 2816 } 2817 2818 /* fixup loop pointers - from r600isa 2819 LOOP END points to CF after LOOP START, 2820 LOOP START point to CF after LOOP END 2821 BRK/CONT point to LOOP END CF 2822 */ 2823 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 2824 2825 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 2826 2827 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 2828 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 2829 } 2830 /* TODO add LOOPRET support */ 2831 fc_poplevel(ctx); 2832 callstack_decrease_current(ctx, FC_LOOP); 2833 return 0; 2834} 2835 2836static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 2837{ 2838 unsigned int fscp; 2839 2840 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 2841 { 2842 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 2843 break; 2844 } 2845 2846 if (fscp == 0) { 2847 R600_ERR("Break not inside loop/endloop pair\n"); 2848 return -EINVAL; 2849 } 2850 2851 r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode); 2852 ctx->bc->cf_last->pop_count = 1; 2853 2854 fc_set_mid(ctx, fscp); 2855 2856 pops(ctx, 1); 2857 callstack_check_depth(ctx, FC_PUSH_VPM, 1); 2858 return 0; 2859} 2860 2861static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 2862 {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_arl}, 2863 {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2864 {TGSI_OPCODE_LIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 2865 2866 /* FIXME: 2867 * For state trackers other than OpenGL, we'll want to use 2868 * _RECIP_IEEE instead. 2869 */ 2870 {TGSI_OPCODE_RCP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 2871 2872 {TGSI_OPCODE_RSQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_rsq}, 2873 {TGSI_OPCODE_EXP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 2874 {TGSI_OPCODE_LOG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, 2875 {TGSI_OPCODE_MUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 2876 {TGSI_OPCODE_ADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2877 {TGSI_OPCODE_DP3, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2878 {TGSI_OPCODE_DP4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2879 {TGSI_OPCODE_DST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 2880 {TGSI_OPCODE_MIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 2881 {TGSI_OPCODE_MAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 2882 {TGSI_OPCODE_SLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 2883 {TGSI_OPCODE_SGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 2884 {TGSI_OPCODE_MAD, 1, V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 2885 {TGSI_OPCODE_SUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 2886 {TGSI_OPCODE_LRP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 2887 {TGSI_OPCODE_CND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2888 /* gap */ 2889 {20, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2890 {TGSI_OPCODE_DP2A, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2891 /* gap */ 2892 {22, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2893 {23, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2894 {TGSI_OPCODE_FRC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 2895 {TGSI_OPCODE_CLAMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2896 {TGSI_OPCODE_FLR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 2897 {TGSI_OPCODE_ROUND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2898 {TGSI_OPCODE_EX2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 2899 {TGSI_OPCODE_LG2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 2900 {TGSI_OPCODE_POW, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 2901 {TGSI_OPCODE_XPD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 2902 /* gap */ 2903 {32, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2904 {TGSI_OPCODE_ABS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 2905 {TGSI_OPCODE_RCC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2906 {TGSI_OPCODE_DPH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2907 {TGSI_OPCODE_COS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 2908 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 2909 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 2910 {TGSI_OPCODE_KILP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 2911 {TGSI_OPCODE_PK2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2912 {TGSI_OPCODE_PK2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2913 {TGSI_OPCODE_PK4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2914 {TGSI_OPCODE_PK4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2915 {TGSI_OPCODE_RFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2916 {TGSI_OPCODE_SEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 2917 {TGSI_OPCODE_SFL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2918 {TGSI_OPCODE_SGT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 2919 {TGSI_OPCODE_SIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 2920 {TGSI_OPCODE_SLE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 2921 {TGSI_OPCODE_SNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 2922 {TGSI_OPCODE_STR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2923 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2924 {TGSI_OPCODE_TXD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2925 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 2926 {TGSI_OPCODE_UP2H, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2927 {TGSI_OPCODE_UP2US, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2928 {TGSI_OPCODE_UP4B, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2929 {TGSI_OPCODE_UP4UB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2930 {TGSI_OPCODE_X2D, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2931 {TGSI_OPCODE_ARA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2932 {TGSI_OPCODE_ARR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2933 {TGSI_OPCODE_BRA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2934 {TGSI_OPCODE_CAL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2935 {TGSI_OPCODE_RET, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2936 {TGSI_OPCODE_SSG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 2937 {TGSI_OPCODE_CMP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 2938 {TGSI_OPCODE_SCS, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 2939 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2940 {TGSI_OPCODE_NRM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2941 {TGSI_OPCODE_DIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2942 {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 2943 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 2944 {TGSI_OPCODE_BRK, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 2945 {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 2946 /* gap */ 2947 {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2948 {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2949 {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 2950 {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 2951 /* gap */ 2952 {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2953 {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2954 {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2955 {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2956 {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2957 {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2958 {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2959 {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 2960 {TGSI_OPCODE_SHL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2961 /* gap */ 2962 {88, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2963 {TGSI_OPCODE_AND, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2964 {TGSI_OPCODE_OR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2965 {TGSI_OPCODE_MOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2966 {TGSI_OPCODE_XOR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2967 {TGSI_OPCODE_SAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2968 {TGSI_OPCODE_TXF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2969 {TGSI_OPCODE_TXQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2970 {TGSI_OPCODE_CONT, 0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 2971 {TGSI_OPCODE_EMIT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2972 {TGSI_OPCODE_ENDPRIM, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2973 {TGSI_OPCODE_BGNLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 2974 {TGSI_OPCODE_BGNSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2975 {TGSI_OPCODE_ENDLOOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 2976 {TGSI_OPCODE_ENDSUB, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2977 /* gap */ 2978 {103, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2979 {104, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2980 {105, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2981 {106, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2982 {TGSI_OPCODE_NOP, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2983 /* gap */ 2984 {108, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2985 {109, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2986 {110, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2987 {111, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2988 {TGSI_OPCODE_NRM4, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2989 {TGSI_OPCODE_CALLNZ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2990 {TGSI_OPCODE_IFC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2991 {TGSI_OPCODE_BREAKC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2992 {TGSI_OPCODE_KIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 2993 {TGSI_OPCODE_END, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 2994 /* gap */ 2995 {118, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2996 {TGSI_OPCODE_F2I, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2997 {TGSI_OPCODE_IDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2998 {TGSI_OPCODE_IMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 2999 {TGSI_OPCODE_IMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3000 {TGSI_OPCODE_INEG, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3001 {TGSI_OPCODE_ISGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3002 {TGSI_OPCODE_ISHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3003 {TGSI_OPCODE_ISLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3004 {TGSI_OPCODE_F2U, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3005 {TGSI_OPCODE_U2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3006 {TGSI_OPCODE_UADD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3007 {TGSI_OPCODE_UDIV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3008 {TGSI_OPCODE_UMAD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3009 {TGSI_OPCODE_UMAX, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3010 {TGSI_OPCODE_UMIN, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3011 {TGSI_OPCODE_UMOD, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3012 {TGSI_OPCODE_UMUL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3013 {TGSI_OPCODE_USEQ, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3014 {TGSI_OPCODE_USGE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3015 {TGSI_OPCODE_USHR, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3016 {TGSI_OPCODE_USLT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3017 {TGSI_OPCODE_USNE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3018 {TGSI_OPCODE_SWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3019 {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3020 {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3021 {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3022 {TGSI_OPCODE_LAST, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3023}; 3024 3025static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 3026 {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3027 {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3028 {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, 3029 {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, tgsi_trans_srcx_replicate}, 3030 {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, tgsi_trans_srcx_replicate}, 3031 {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, 3032 {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3033 {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, 3034 {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3035 {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3036 {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3037 {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, 3038 {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, 3039 {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, 3040 {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, 3041 {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, 3042 {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, 3043 {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, 3044 {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, 3045 {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3046 /* gap */ 3047 {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3048 {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3049 /* gap */ 3050 {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3051 {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3052 {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, 3053 {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3054 {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, 3055 {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3056 {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, tgsi_trans_srcx_replicate}, 3057 {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, tgsi_trans_srcx_replicate}, 3058 {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_pow}, 3059 {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, 3060 /* gap */ 3061 {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3062 {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, 3063 {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3064 {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3065 {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, tgsi_trig}, 3066 {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, 3067 {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, 3068 {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ 3069 {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3070 {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3071 {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3072 {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3073 {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3074 {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, 3075 {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3076 {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, 3077 {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, tgsi_trig}, 3078 {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, 3079 {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, 3080 {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3081 {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3082 {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3083 {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, 3084 {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3085 {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3086 {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3087 {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3088 {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3089 {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3090 {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3091 {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3092 {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3093 {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3094 {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, 3095 {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, 3096 {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, 3097 {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3098 {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3099 {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3100 {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, 3101 {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, 3102 {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, 3103 {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, 3104 /* gap */ 3105 {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3106 {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3107 {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, 3108 {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, 3109 /* gap */ 3110 {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3111 {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3112 {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3113 {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3114 {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3115 {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3116 {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3117 {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_trans_srcx_replicate}, 3118 {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3119 /* gap */ 3120 {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3121 {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3122 {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3123 {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3124 {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3125 {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3126 {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3127 {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3128 {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, 3129 {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3130 {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3131 {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, 3132 {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3133 {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, 3134 {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3135 /* gap */ 3136 {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3137 {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3138 {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3139 {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3140 {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3141 /* gap */ 3142 {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3143 {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3144 {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3145 {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3146 {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3147 {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3148 {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3149 {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3150 {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ 3151 {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ 3152 /* gap */ 3153 {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3154 {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3155 {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3156 {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3157 {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3158 {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3159 {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3160 {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3161 {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3162 {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3163 {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3164 {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3165 {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3166 {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3167 {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3168 {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3169 {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3170 {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3171 {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3172 {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3173 {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3174 {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3175 {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3176 {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3177 {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3178 {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3179 {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3180 {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, 3181}; 3182